GNU Linux-libre 6.9-gnu
[releases.git] / fs / f2fs / segment.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/segment.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/sched/mm.h>
13 #include <linux/prefetch.h>
14 #include <linux/kthread.h>
15 #include <linux/swap.h>
16 #include <linux/timer.h>
17 #include <linux/freezer.h>
18 #include <linux/sched/signal.h>
19 #include <linux/random.h>
20
21 #include "f2fs.h"
22 #include "segment.h"
23 #include "node.h"
24 #include "gc.h"
25 #include "iostat.h"
26 #include <trace/events/f2fs.h>
27
28 #define __reverse_ffz(x) __reverse_ffs(~(x))
29
30 static struct kmem_cache *discard_entry_slab;
31 static struct kmem_cache *discard_cmd_slab;
32 static struct kmem_cache *sit_entry_set_slab;
33 static struct kmem_cache *revoke_entry_slab;
34
35 static unsigned long __reverse_ulong(unsigned char *str)
36 {
37         unsigned long tmp = 0;
38         int shift = 24, idx = 0;
39
40 #if BITS_PER_LONG == 64
41         shift = 56;
42 #endif
43         while (shift >= 0) {
44                 tmp |= (unsigned long)str[idx++] << shift;
45                 shift -= BITS_PER_BYTE;
46         }
47         return tmp;
48 }
49
50 /*
51  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
52  * MSB and LSB are reversed in a byte by f2fs_set_bit.
53  */
54 static inline unsigned long __reverse_ffs(unsigned long word)
55 {
56         int num = 0;
57
58 #if BITS_PER_LONG == 64
59         if ((word & 0xffffffff00000000UL) == 0)
60                 num += 32;
61         else
62                 word >>= 32;
63 #endif
64         if ((word & 0xffff0000) == 0)
65                 num += 16;
66         else
67                 word >>= 16;
68
69         if ((word & 0xff00) == 0)
70                 num += 8;
71         else
72                 word >>= 8;
73
74         if ((word & 0xf0) == 0)
75                 num += 4;
76         else
77                 word >>= 4;
78
79         if ((word & 0xc) == 0)
80                 num += 2;
81         else
82                 word >>= 2;
83
84         if ((word & 0x2) == 0)
85                 num += 1;
86         return num;
87 }
88
89 /*
90  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
91  * f2fs_set_bit makes MSB and LSB reversed in a byte.
92  * @size must be integral times of unsigned long.
93  * Example:
94  *                             MSB <--> LSB
95  *   f2fs_set_bit(0, bitmap) => 1000 0000
96  *   f2fs_set_bit(7, bitmap) => 0000 0001
97  */
98 static unsigned long __find_rev_next_bit(const unsigned long *addr,
99                         unsigned long size, unsigned long offset)
100 {
101         const unsigned long *p = addr + BIT_WORD(offset);
102         unsigned long result = size;
103         unsigned long tmp;
104
105         if (offset >= size)
106                 return size;
107
108         size -= (offset & ~(BITS_PER_LONG - 1));
109         offset %= BITS_PER_LONG;
110
111         while (1) {
112                 if (*p == 0)
113                         goto pass;
114
115                 tmp = __reverse_ulong((unsigned char *)p);
116
117                 tmp &= ~0UL >> offset;
118                 if (size < BITS_PER_LONG)
119                         tmp &= (~0UL << (BITS_PER_LONG - size));
120                 if (tmp)
121                         goto found;
122 pass:
123                 if (size <= BITS_PER_LONG)
124                         break;
125                 size -= BITS_PER_LONG;
126                 offset = 0;
127                 p++;
128         }
129         return result;
130 found:
131         return result - size + __reverse_ffs(tmp);
132 }
133
134 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
135                         unsigned long size, unsigned long offset)
136 {
137         const unsigned long *p = addr + BIT_WORD(offset);
138         unsigned long result = size;
139         unsigned long tmp;
140
141         if (offset >= size)
142                 return size;
143
144         size -= (offset & ~(BITS_PER_LONG - 1));
145         offset %= BITS_PER_LONG;
146
147         while (1) {
148                 if (*p == ~0UL)
149                         goto pass;
150
151                 tmp = __reverse_ulong((unsigned char *)p);
152
153                 if (offset)
154                         tmp |= ~0UL << (BITS_PER_LONG - offset);
155                 if (size < BITS_PER_LONG)
156                         tmp |= ~0UL >> size;
157                 if (tmp != ~0UL)
158                         goto found;
159 pass:
160                 if (size <= BITS_PER_LONG)
161                         break;
162                 size -= BITS_PER_LONG;
163                 offset = 0;
164                 p++;
165         }
166         return result;
167 found:
168         return result - size + __reverse_ffz(tmp);
169 }
170
171 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
172 {
173         int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
174         int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
175         int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
176
177         if (f2fs_lfs_mode(sbi))
178                 return false;
179         if (sbi->gc_mode == GC_URGENT_HIGH)
180                 return true;
181         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
182                 return true;
183
184         return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
185                         SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
186 }
187
188 void f2fs_abort_atomic_write(struct inode *inode, bool clean)
189 {
190         struct f2fs_inode_info *fi = F2FS_I(inode);
191
192         if (!f2fs_is_atomic_file(inode))
193                 return;
194
195         if (clean)
196                 truncate_inode_pages_final(inode->i_mapping);
197
198         release_atomic_write_cnt(inode);
199         clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
200         clear_inode_flag(inode, FI_ATOMIC_REPLACE);
201         clear_inode_flag(inode, FI_ATOMIC_FILE);
202         stat_dec_atomic_inode(inode);
203
204         F2FS_I(inode)->atomic_write_task = NULL;
205
206         if (clean) {
207                 f2fs_i_size_write(inode, fi->original_i_size);
208                 fi->original_i_size = 0;
209         }
210         /* avoid stale dirty inode during eviction */
211         sync_inode_metadata(inode, 0);
212 }
213
214 static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
215                         block_t new_addr, block_t *old_addr, bool recover)
216 {
217         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
218         struct dnode_of_data dn;
219         struct node_info ni;
220         int err;
221
222 retry:
223         set_new_dnode(&dn, inode, NULL, NULL, 0);
224         err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
225         if (err) {
226                 if (err == -ENOMEM) {
227                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
228                         goto retry;
229                 }
230                 return err;
231         }
232
233         err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
234         if (err) {
235                 f2fs_put_dnode(&dn);
236                 return err;
237         }
238
239         if (recover) {
240                 /* dn.data_blkaddr is always valid */
241                 if (!__is_valid_data_blkaddr(new_addr)) {
242                         if (new_addr == NULL_ADDR)
243                                 dec_valid_block_count(sbi, inode, 1);
244                         f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
245                         f2fs_update_data_blkaddr(&dn, new_addr);
246                 } else {
247                         f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
248                                 new_addr, ni.version, true, true);
249                 }
250         } else {
251                 blkcnt_t count = 1;
252
253                 err = inc_valid_block_count(sbi, inode, &count, true);
254                 if (err) {
255                         f2fs_put_dnode(&dn);
256                         return err;
257                 }
258
259                 *old_addr = dn.data_blkaddr;
260                 f2fs_truncate_data_blocks_range(&dn, 1);
261                 dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
262
263                 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
264                                         ni.version, true, false);
265         }
266
267         f2fs_put_dnode(&dn);
268
269         trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
270                         index, old_addr ? *old_addr : 0, new_addr, recover);
271         return 0;
272 }
273
274 static void __complete_revoke_list(struct inode *inode, struct list_head *head,
275                                         bool revoke)
276 {
277         struct revoke_entry *cur, *tmp;
278         pgoff_t start_index = 0;
279         bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
280
281         list_for_each_entry_safe(cur, tmp, head, list) {
282                 if (revoke) {
283                         __replace_atomic_write_block(inode, cur->index,
284                                                 cur->old_addr, NULL, true);
285                 } else if (truncate) {
286                         f2fs_truncate_hole(inode, start_index, cur->index);
287                         start_index = cur->index + 1;
288                 }
289
290                 list_del(&cur->list);
291                 kmem_cache_free(revoke_entry_slab, cur);
292         }
293
294         if (!revoke && truncate)
295                 f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
296 }
297
298 static int __f2fs_commit_atomic_write(struct inode *inode)
299 {
300         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
301         struct f2fs_inode_info *fi = F2FS_I(inode);
302         struct inode *cow_inode = fi->cow_inode;
303         struct revoke_entry *new;
304         struct list_head revoke_list;
305         block_t blkaddr;
306         struct dnode_of_data dn;
307         pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
308         pgoff_t off = 0, blen, index;
309         int ret = 0, i;
310
311         INIT_LIST_HEAD(&revoke_list);
312
313         while (len) {
314                 blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
315
316                 set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
317                 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
318                 if (ret && ret != -ENOENT) {
319                         goto out;
320                 } else if (ret == -ENOENT) {
321                         ret = 0;
322                         if (dn.max_level == 0)
323                                 goto out;
324                         goto next;
325                 }
326
327                 blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
328                                 len);
329                 index = off;
330                 for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
331                         blkaddr = f2fs_data_blkaddr(&dn);
332
333                         if (!__is_valid_data_blkaddr(blkaddr)) {
334                                 continue;
335                         } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
336                                         DATA_GENERIC_ENHANCE)) {
337                                 f2fs_put_dnode(&dn);
338                                 ret = -EFSCORRUPTED;
339                                 goto out;
340                         }
341
342                         new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
343                                                         true, NULL);
344
345                         ret = __replace_atomic_write_block(inode, index, blkaddr,
346                                                         &new->old_addr, false);
347                         if (ret) {
348                                 f2fs_put_dnode(&dn);
349                                 kmem_cache_free(revoke_entry_slab, new);
350                                 goto out;
351                         }
352
353                         f2fs_update_data_blkaddr(&dn, NULL_ADDR);
354                         new->index = index;
355                         list_add_tail(&new->list, &revoke_list);
356                 }
357                 f2fs_put_dnode(&dn);
358 next:
359                 off += blen;
360                 len -= blen;
361         }
362
363 out:
364         if (ret) {
365                 sbi->revoked_atomic_block += fi->atomic_write_cnt;
366         } else {
367                 sbi->committed_atomic_block += fi->atomic_write_cnt;
368                 set_inode_flag(inode, FI_ATOMIC_COMMITTED);
369         }
370
371         __complete_revoke_list(inode, &revoke_list, ret ? true : false);
372
373         return ret;
374 }
375
376 int f2fs_commit_atomic_write(struct inode *inode)
377 {
378         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
379         struct f2fs_inode_info *fi = F2FS_I(inode);
380         int err;
381
382         err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
383         if (err)
384                 return err;
385
386         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
387         f2fs_lock_op(sbi);
388
389         err = __f2fs_commit_atomic_write(inode);
390
391         f2fs_unlock_op(sbi);
392         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
393
394         return err;
395 }
396
397 /*
398  * This function balances dirty node and dentry pages.
399  * In addition, it controls garbage collection.
400  */
401 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
402 {
403         if (f2fs_cp_error(sbi))
404                 return;
405
406         if (time_to_inject(sbi, FAULT_CHECKPOINT))
407                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
408
409         /* balance_fs_bg is able to be pending */
410         if (need && excess_cached_nats(sbi))
411                 f2fs_balance_fs_bg(sbi, false);
412
413         if (!f2fs_is_checkpoint_ready(sbi))
414                 return;
415
416         /*
417          * We should do GC or end up with checkpoint, if there are so many dirty
418          * dir/node pages without enough free segments.
419          */
420         if (has_enough_free_secs(sbi, 0, 0))
421                 return;
422
423         if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
424                                 sbi->gc_thread->f2fs_gc_task) {
425                 DEFINE_WAIT(wait);
426
427                 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
428                                         TASK_UNINTERRUPTIBLE);
429                 wake_up(&sbi->gc_thread->gc_wait_queue_head);
430                 io_schedule();
431                 finish_wait(&sbi->gc_thread->fggc_wq, &wait);
432         } else {
433                 struct f2fs_gc_control gc_control = {
434                         .victim_segno = NULL_SEGNO,
435                         .init_gc_type = BG_GC,
436                         .no_bg_gc = true,
437                         .should_migrate_blocks = false,
438                         .err_gc_skipped = false,
439                         .nr_free_secs = 1 };
440                 f2fs_down_write(&sbi->gc_lock);
441                 stat_inc_gc_call_count(sbi, FOREGROUND);
442                 f2fs_gc(sbi, &gc_control);
443         }
444 }
445
446 static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
447 {
448         int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
449         unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
450         unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
451         unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
452         unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
453         unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
454         unsigned int threshold =
455                 SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD));
456         unsigned int global_threshold = threshold * 3 / 2;
457
458         if (dents >= threshold || qdata >= threshold ||
459                 nodes >= threshold || meta >= threshold ||
460                 imeta >= threshold)
461                 return true;
462         return dents + qdata + nodes + meta + imeta >  global_threshold;
463 }
464
465 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
466 {
467         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
468                 return;
469
470         /* try to shrink extent cache when there is no enough memory */
471         if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
472                 f2fs_shrink_read_extent_tree(sbi,
473                                 READ_EXTENT_CACHE_SHRINK_NUMBER);
474
475         /* try to shrink age extent cache when there is no enough memory */
476         if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
477                 f2fs_shrink_age_extent_tree(sbi,
478                                 AGE_EXTENT_CACHE_SHRINK_NUMBER);
479
480         /* check the # of cached NAT entries */
481         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
482                 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
483
484         if (!f2fs_available_free_memory(sbi, FREE_NIDS))
485                 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
486         else
487                 f2fs_build_free_nids(sbi, false, false);
488
489         if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
490                 excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
491                 goto do_sync;
492
493         /* there is background inflight IO or foreground operation recently */
494         if (is_inflight_io(sbi, REQ_TIME) ||
495                 (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
496                 return;
497
498         /* exceed periodical checkpoint timeout threshold */
499         if (f2fs_time_over(sbi, CP_TIME))
500                 goto do_sync;
501
502         /* checkpoint is the only way to shrink partial cached entries */
503         if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
504                 f2fs_available_free_memory(sbi, INO_ENTRIES))
505                 return;
506
507 do_sync:
508         if (test_opt(sbi, DATA_FLUSH) && from_bg) {
509                 struct blk_plug plug;
510
511                 mutex_lock(&sbi->flush_lock);
512
513                 blk_start_plug(&plug);
514                 f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
515                 blk_finish_plug(&plug);
516
517                 mutex_unlock(&sbi->flush_lock);
518         }
519         stat_inc_cp_call_count(sbi, BACKGROUND);
520         f2fs_sync_fs(sbi->sb, 1);
521 }
522
523 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
524                                 struct block_device *bdev)
525 {
526         int ret = blkdev_issue_flush(bdev);
527
528         trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
529                                 test_opt(sbi, FLUSH_MERGE), ret);
530         if (!ret)
531                 f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0);
532         return ret;
533 }
534
535 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
536 {
537         int ret = 0;
538         int i;
539
540         if (!f2fs_is_multi_device(sbi))
541                 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
542
543         for (i = 0; i < sbi->s_ndevs; i++) {
544                 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
545                         continue;
546                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
547                 if (ret)
548                         break;
549         }
550         return ret;
551 }
552
553 static int issue_flush_thread(void *data)
554 {
555         struct f2fs_sb_info *sbi = data;
556         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
557         wait_queue_head_t *q = &fcc->flush_wait_queue;
558 repeat:
559         if (kthread_should_stop())
560                 return 0;
561
562         if (!llist_empty(&fcc->issue_list)) {
563                 struct flush_cmd *cmd, *next;
564                 int ret;
565
566                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
567                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
568
569                 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
570
571                 ret = submit_flush_wait(sbi, cmd->ino);
572                 atomic_inc(&fcc->issued_flush);
573
574                 llist_for_each_entry_safe(cmd, next,
575                                           fcc->dispatch_list, llnode) {
576                         cmd->ret = ret;
577                         complete(&cmd->wait);
578                 }
579                 fcc->dispatch_list = NULL;
580         }
581
582         wait_event_interruptible(*q,
583                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
584         goto repeat;
585 }
586
587 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
588 {
589         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
590         struct flush_cmd cmd;
591         int ret;
592
593         if (test_opt(sbi, NOBARRIER))
594                 return 0;
595
596         if (!test_opt(sbi, FLUSH_MERGE)) {
597                 atomic_inc(&fcc->queued_flush);
598                 ret = submit_flush_wait(sbi, ino);
599                 atomic_dec(&fcc->queued_flush);
600                 atomic_inc(&fcc->issued_flush);
601                 return ret;
602         }
603
604         if (atomic_inc_return(&fcc->queued_flush) == 1 ||
605             f2fs_is_multi_device(sbi)) {
606                 ret = submit_flush_wait(sbi, ino);
607                 atomic_dec(&fcc->queued_flush);
608
609                 atomic_inc(&fcc->issued_flush);
610                 return ret;
611         }
612
613         cmd.ino = ino;
614         init_completion(&cmd.wait);
615
616         llist_add(&cmd.llnode, &fcc->issue_list);
617
618         /*
619          * update issue_list before we wake up issue_flush thread, this
620          * smp_mb() pairs with another barrier in ___wait_event(), see
621          * more details in comments of waitqueue_active().
622          */
623         smp_mb();
624
625         if (waitqueue_active(&fcc->flush_wait_queue))
626                 wake_up(&fcc->flush_wait_queue);
627
628         if (fcc->f2fs_issue_flush) {
629                 wait_for_completion(&cmd.wait);
630                 atomic_dec(&fcc->queued_flush);
631         } else {
632                 struct llist_node *list;
633
634                 list = llist_del_all(&fcc->issue_list);
635                 if (!list) {
636                         wait_for_completion(&cmd.wait);
637                         atomic_dec(&fcc->queued_flush);
638                 } else {
639                         struct flush_cmd *tmp, *next;
640
641                         ret = submit_flush_wait(sbi, ino);
642
643                         llist_for_each_entry_safe(tmp, next, list, llnode) {
644                                 if (tmp == &cmd) {
645                                         cmd.ret = ret;
646                                         atomic_dec(&fcc->queued_flush);
647                                         continue;
648                                 }
649                                 tmp->ret = ret;
650                                 complete(&tmp->wait);
651                         }
652                 }
653         }
654
655         return cmd.ret;
656 }
657
658 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
659 {
660         dev_t dev = sbi->sb->s_bdev->bd_dev;
661         struct flush_cmd_control *fcc;
662
663         if (SM_I(sbi)->fcc_info) {
664                 fcc = SM_I(sbi)->fcc_info;
665                 if (fcc->f2fs_issue_flush)
666                         return 0;
667                 goto init_thread;
668         }
669
670         fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
671         if (!fcc)
672                 return -ENOMEM;
673         atomic_set(&fcc->issued_flush, 0);
674         atomic_set(&fcc->queued_flush, 0);
675         init_waitqueue_head(&fcc->flush_wait_queue);
676         init_llist_head(&fcc->issue_list);
677         SM_I(sbi)->fcc_info = fcc;
678         if (!test_opt(sbi, FLUSH_MERGE))
679                 return 0;
680
681 init_thread:
682         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
683                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
684         if (IS_ERR(fcc->f2fs_issue_flush)) {
685                 int err = PTR_ERR(fcc->f2fs_issue_flush);
686
687                 fcc->f2fs_issue_flush = NULL;
688                 return err;
689         }
690
691         return 0;
692 }
693
694 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
695 {
696         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
697
698         if (fcc && fcc->f2fs_issue_flush) {
699                 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
700
701                 fcc->f2fs_issue_flush = NULL;
702                 kthread_stop(flush_thread);
703         }
704         if (free) {
705                 kfree(fcc);
706                 SM_I(sbi)->fcc_info = NULL;
707         }
708 }
709
710 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
711 {
712         int ret = 0, i;
713
714         if (!f2fs_is_multi_device(sbi))
715                 return 0;
716
717         if (test_opt(sbi, NOBARRIER))
718                 return 0;
719
720         for (i = 1; i < sbi->s_ndevs; i++) {
721                 int count = DEFAULT_RETRY_IO_COUNT;
722
723                 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
724                         continue;
725
726                 do {
727                         ret = __submit_flush_wait(sbi, FDEV(i).bdev);
728                         if (ret)
729                                 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
730                 } while (ret && --count);
731
732                 if (ret) {
733                         f2fs_stop_checkpoint(sbi, false,
734                                         STOP_CP_REASON_FLUSH_FAIL);
735                         break;
736                 }
737
738                 spin_lock(&sbi->dev_lock);
739                 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
740                 spin_unlock(&sbi->dev_lock);
741         }
742
743         return ret;
744 }
745
746 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
747                 enum dirty_type dirty_type)
748 {
749         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
750
751         /* need not be added */
752         if (IS_CURSEG(sbi, segno))
753                 return;
754
755         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
756                 dirty_i->nr_dirty[dirty_type]++;
757
758         if (dirty_type == DIRTY) {
759                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
760                 enum dirty_type t = sentry->type;
761
762                 if (unlikely(t >= DIRTY)) {
763                         f2fs_bug_on(sbi, 1);
764                         return;
765                 }
766                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
767                         dirty_i->nr_dirty[t]++;
768
769                 if (__is_large_section(sbi)) {
770                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
771                         block_t valid_blocks =
772                                 get_valid_blocks(sbi, segno, true);
773
774                         f2fs_bug_on(sbi, unlikely(!valid_blocks ||
775                                         valid_blocks == CAP_BLKS_PER_SEC(sbi)));
776
777                         if (!IS_CURSEC(sbi, secno))
778                                 set_bit(secno, dirty_i->dirty_secmap);
779                 }
780         }
781 }
782
783 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
784                 enum dirty_type dirty_type)
785 {
786         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
787         block_t valid_blocks;
788
789         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
790                 dirty_i->nr_dirty[dirty_type]--;
791
792         if (dirty_type == DIRTY) {
793                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
794                 enum dirty_type t = sentry->type;
795
796                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
797                         dirty_i->nr_dirty[t]--;
798
799                 valid_blocks = get_valid_blocks(sbi, segno, true);
800                 if (valid_blocks == 0) {
801                         clear_bit(GET_SEC_FROM_SEG(sbi, segno),
802                                                 dirty_i->victim_secmap);
803 #ifdef CONFIG_F2FS_CHECK_FS
804                         clear_bit(segno, SIT_I(sbi)->invalid_segmap);
805 #endif
806                 }
807                 if (__is_large_section(sbi)) {
808                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
809
810                         if (!valid_blocks ||
811                                         valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
812                                 clear_bit(secno, dirty_i->dirty_secmap);
813                                 return;
814                         }
815
816                         if (!IS_CURSEC(sbi, secno))
817                                 set_bit(secno, dirty_i->dirty_secmap);
818                 }
819         }
820 }
821
822 /*
823  * Should not occur error such as -ENOMEM.
824  * Adding dirty entry into seglist is not critical operation.
825  * If a given segment is one of current working segments, it won't be added.
826  */
827 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
828 {
829         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
830         unsigned short valid_blocks, ckpt_valid_blocks;
831         unsigned int usable_blocks;
832
833         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
834                 return;
835
836         usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
837         mutex_lock(&dirty_i->seglist_lock);
838
839         valid_blocks = get_valid_blocks(sbi, segno, false);
840         ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
841
842         if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
843                 ckpt_valid_blocks == usable_blocks)) {
844                 __locate_dirty_segment(sbi, segno, PRE);
845                 __remove_dirty_segment(sbi, segno, DIRTY);
846         } else if (valid_blocks < usable_blocks) {
847                 __locate_dirty_segment(sbi, segno, DIRTY);
848         } else {
849                 /* Recovery routine with SSR needs this */
850                 __remove_dirty_segment(sbi, segno, DIRTY);
851         }
852
853         mutex_unlock(&dirty_i->seglist_lock);
854 }
855
856 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
857 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
858 {
859         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
860         unsigned int segno;
861
862         mutex_lock(&dirty_i->seglist_lock);
863         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
864                 if (get_valid_blocks(sbi, segno, false))
865                         continue;
866                 if (IS_CURSEG(sbi, segno))
867                         continue;
868                 __locate_dirty_segment(sbi, segno, PRE);
869                 __remove_dirty_segment(sbi, segno, DIRTY);
870         }
871         mutex_unlock(&dirty_i->seglist_lock);
872 }
873
874 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
875 {
876         int ovp_hole_segs =
877                 (overprovision_segments(sbi) - reserved_segments(sbi));
878         block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs);
879         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
880         block_t holes[2] = {0, 0};      /* DATA and NODE */
881         block_t unusable;
882         struct seg_entry *se;
883         unsigned int segno;
884
885         mutex_lock(&dirty_i->seglist_lock);
886         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
887                 se = get_seg_entry(sbi, segno);
888                 if (IS_NODESEG(se->type))
889                         holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
890                                                         se->valid_blocks;
891                 else
892                         holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
893                                                         se->valid_blocks;
894         }
895         mutex_unlock(&dirty_i->seglist_lock);
896
897         unusable = max(holes[DATA], holes[NODE]);
898         if (unusable > ovp_holes)
899                 return unusable - ovp_holes;
900         return 0;
901 }
902
903 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
904 {
905         int ovp_hole_segs =
906                 (overprovision_segments(sbi) - reserved_segments(sbi));
907
908         if (F2FS_OPTION(sbi).unusable_cap_perc == 100)
909                 return 0;
910         if (unusable > F2FS_OPTION(sbi).unusable_cap)
911                 return -EAGAIN;
912         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
913                 dirty_segments(sbi) > ovp_hole_segs)
914                 return -EAGAIN;
915         if (has_not_enough_free_secs(sbi, 0, 0))
916                 return -EAGAIN;
917         return 0;
918 }
919
920 /* This is only used by SBI_CP_DISABLED */
921 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
922 {
923         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
924         unsigned int segno = 0;
925
926         mutex_lock(&dirty_i->seglist_lock);
927         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
928                 if (get_valid_blocks(sbi, segno, false))
929                         continue;
930                 if (get_ckpt_valid_blocks(sbi, segno, false))
931                         continue;
932                 mutex_unlock(&dirty_i->seglist_lock);
933                 return segno;
934         }
935         mutex_unlock(&dirty_i->seglist_lock);
936         return NULL_SEGNO;
937 }
938
939 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
940                 struct block_device *bdev, block_t lstart,
941                 block_t start, block_t len)
942 {
943         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
944         struct list_head *pend_list;
945         struct discard_cmd *dc;
946
947         f2fs_bug_on(sbi, !len);
948
949         pend_list = &dcc->pend_list[plist_idx(len)];
950
951         dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
952         INIT_LIST_HEAD(&dc->list);
953         dc->bdev = bdev;
954         dc->di.lstart = lstart;
955         dc->di.start = start;
956         dc->di.len = len;
957         dc->ref = 0;
958         dc->state = D_PREP;
959         dc->queued = 0;
960         dc->error = 0;
961         init_completion(&dc->wait);
962         list_add_tail(&dc->list, pend_list);
963         spin_lock_init(&dc->lock);
964         dc->bio_ref = 0;
965         atomic_inc(&dcc->discard_cmd_cnt);
966         dcc->undiscard_blks += len;
967
968         return dc;
969 }
970
971 static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi)
972 {
973 #ifdef CONFIG_F2FS_CHECK_FS
974         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
975         struct rb_node *cur = rb_first_cached(&dcc->root), *next;
976         struct discard_cmd *cur_dc, *next_dc;
977
978         while (cur) {
979                 next = rb_next(cur);
980                 if (!next)
981                         return true;
982
983                 cur_dc = rb_entry(cur, struct discard_cmd, rb_node);
984                 next_dc = rb_entry(next, struct discard_cmd, rb_node);
985
986                 if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) {
987                         f2fs_info(sbi, "broken discard_rbtree, "
988                                 "cur(%u, %u) next(%u, %u)",
989                                 cur_dc->di.lstart, cur_dc->di.len,
990                                 next_dc->di.lstart, next_dc->di.len);
991                         return false;
992                 }
993                 cur = next;
994         }
995 #endif
996         return true;
997 }
998
999 static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi,
1000                                                 block_t blkaddr)
1001 {
1002         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1003         struct rb_node *node = dcc->root.rb_root.rb_node;
1004         struct discard_cmd *dc;
1005
1006         while (node) {
1007                 dc = rb_entry(node, struct discard_cmd, rb_node);
1008
1009                 if (blkaddr < dc->di.lstart)
1010                         node = node->rb_left;
1011                 else if (blkaddr >= dc->di.lstart + dc->di.len)
1012                         node = node->rb_right;
1013                 else
1014                         return dc;
1015         }
1016         return NULL;
1017 }
1018
1019 static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root,
1020                                 block_t blkaddr,
1021                                 struct discard_cmd **prev_entry,
1022                                 struct discard_cmd **next_entry,
1023                                 struct rb_node ***insert_p,
1024                                 struct rb_node **insert_parent)
1025 {
1026         struct rb_node **pnode = &root->rb_root.rb_node;
1027         struct rb_node *parent = NULL, *tmp_node;
1028         struct discard_cmd *dc;
1029
1030         *insert_p = NULL;
1031         *insert_parent = NULL;
1032         *prev_entry = NULL;
1033         *next_entry = NULL;
1034
1035         if (RB_EMPTY_ROOT(&root->rb_root))
1036                 return NULL;
1037
1038         while (*pnode) {
1039                 parent = *pnode;
1040                 dc = rb_entry(*pnode, struct discard_cmd, rb_node);
1041
1042                 if (blkaddr < dc->di.lstart)
1043                         pnode = &(*pnode)->rb_left;
1044                 else if (blkaddr >= dc->di.lstart + dc->di.len)
1045                         pnode = &(*pnode)->rb_right;
1046                 else
1047                         goto lookup_neighbors;
1048         }
1049
1050         *insert_p = pnode;
1051         *insert_parent = parent;
1052
1053         dc = rb_entry(parent, struct discard_cmd, rb_node);
1054         tmp_node = parent;
1055         if (parent && blkaddr > dc->di.lstart)
1056                 tmp_node = rb_next(parent);
1057         *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1058
1059         tmp_node = parent;
1060         if (parent && blkaddr < dc->di.lstart)
1061                 tmp_node = rb_prev(parent);
1062         *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1063         return NULL;
1064
1065 lookup_neighbors:
1066         /* lookup prev node for merging backward later */
1067         tmp_node = rb_prev(&dc->rb_node);
1068         *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1069
1070         /* lookup next node for merging frontward later */
1071         tmp_node = rb_next(&dc->rb_node);
1072         *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1073         return dc;
1074 }
1075
1076 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
1077                                                         struct discard_cmd *dc)
1078 {
1079         if (dc->state == D_DONE)
1080                 atomic_sub(dc->queued, &dcc->queued_discard);
1081
1082         list_del(&dc->list);
1083         rb_erase_cached(&dc->rb_node, &dcc->root);
1084         dcc->undiscard_blks -= dc->di.len;
1085
1086         kmem_cache_free(discard_cmd_slab, dc);
1087
1088         atomic_dec(&dcc->discard_cmd_cnt);
1089 }
1090
1091 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
1092                                                         struct discard_cmd *dc)
1093 {
1094         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1095         unsigned long flags;
1096
1097         trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
1098
1099         spin_lock_irqsave(&dc->lock, flags);
1100         if (dc->bio_ref) {
1101                 spin_unlock_irqrestore(&dc->lock, flags);
1102                 return;
1103         }
1104         spin_unlock_irqrestore(&dc->lock, flags);
1105
1106         f2fs_bug_on(sbi, dc->ref);
1107
1108         if (dc->error == -EOPNOTSUPP)
1109                 dc->error = 0;
1110
1111         if (dc->error)
1112                 printk_ratelimited(
1113                         "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
1114                         KERN_INFO, sbi->sb->s_id,
1115                         dc->di.lstart, dc->di.start, dc->di.len, dc->error);
1116         __detach_discard_cmd(dcc, dc);
1117 }
1118
1119 static void f2fs_submit_discard_endio(struct bio *bio)
1120 {
1121         struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1122         unsigned long flags;
1123
1124         spin_lock_irqsave(&dc->lock, flags);
1125         if (!dc->error)
1126                 dc->error = blk_status_to_errno(bio->bi_status);
1127         dc->bio_ref--;
1128         if (!dc->bio_ref && dc->state == D_SUBMIT) {
1129                 dc->state = D_DONE;
1130                 complete_all(&dc->wait);
1131         }
1132         spin_unlock_irqrestore(&dc->lock, flags);
1133         bio_put(bio);
1134 }
1135
1136 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1137                                 block_t start, block_t end)
1138 {
1139 #ifdef CONFIG_F2FS_CHECK_FS
1140         struct seg_entry *sentry;
1141         unsigned int segno;
1142         block_t blk = start;
1143         unsigned long offset, size, *map;
1144
1145         while (blk < end) {
1146                 segno = GET_SEGNO(sbi, blk);
1147                 sentry = get_seg_entry(sbi, segno);
1148                 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1149
1150                 if (end < START_BLOCK(sbi, segno + 1))
1151                         size = GET_BLKOFF_FROM_SEG0(sbi, end);
1152                 else
1153                         size = BLKS_PER_SEG(sbi);
1154                 map = (unsigned long *)(sentry->cur_valid_map);
1155                 offset = __find_rev_next_bit(map, size, offset);
1156                 f2fs_bug_on(sbi, offset != size);
1157                 blk = START_BLOCK(sbi, segno + 1);
1158         }
1159 #endif
1160 }
1161
1162 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1163                                 struct discard_policy *dpolicy,
1164                                 int discard_type, unsigned int granularity)
1165 {
1166         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1167
1168         /* common policy */
1169         dpolicy->type = discard_type;
1170         dpolicy->sync = true;
1171         dpolicy->ordered = false;
1172         dpolicy->granularity = granularity;
1173
1174         dpolicy->max_requests = dcc->max_discard_request;
1175         dpolicy->io_aware_gran = dcc->discard_io_aware_gran;
1176         dpolicy->timeout = false;
1177
1178         if (discard_type == DPOLICY_BG) {
1179                 dpolicy->min_interval = dcc->min_discard_issue_time;
1180                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1181                 dpolicy->max_interval = dcc->max_discard_issue_time;
1182                 if (dcc->discard_io_aware == DPOLICY_IO_AWARE_ENABLE)
1183                         dpolicy->io_aware = true;
1184                 else if (dcc->discard_io_aware == DPOLICY_IO_AWARE_DISABLE)
1185                         dpolicy->io_aware = false;
1186                 dpolicy->sync = false;
1187                 dpolicy->ordered = true;
1188                 if (utilization(sbi) > dcc->discard_urgent_util) {
1189                         dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1190                         if (atomic_read(&dcc->discard_cmd_cnt))
1191                                 dpolicy->max_interval =
1192                                         dcc->min_discard_issue_time;
1193                 }
1194         } else if (discard_type == DPOLICY_FORCE) {
1195                 dpolicy->min_interval = dcc->min_discard_issue_time;
1196                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1197                 dpolicy->max_interval = dcc->max_discard_issue_time;
1198                 dpolicy->io_aware = false;
1199         } else if (discard_type == DPOLICY_FSTRIM) {
1200                 dpolicy->io_aware = false;
1201         } else if (discard_type == DPOLICY_UMOUNT) {
1202                 dpolicy->io_aware = false;
1203                 /* we need to issue all to keep CP_TRIMMED_FLAG */
1204                 dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1205                 dpolicy->timeout = true;
1206         }
1207 }
1208
1209 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1210                                 struct block_device *bdev, block_t lstart,
1211                                 block_t start, block_t len);
1212
1213 #ifdef CONFIG_BLK_DEV_ZONED
1214 static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi,
1215                                    struct discard_cmd *dc, blk_opf_t flag,
1216                                    struct list_head *wait_list,
1217                                    unsigned int *issued)
1218 {
1219         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1220         struct block_device *bdev = dc->bdev;
1221         struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS);
1222         unsigned long flags;
1223
1224         trace_f2fs_issue_reset_zone(bdev, dc->di.start);
1225
1226         spin_lock_irqsave(&dc->lock, flags);
1227         dc->state = D_SUBMIT;
1228         dc->bio_ref++;
1229         spin_unlock_irqrestore(&dc->lock, flags);
1230
1231         if (issued)
1232                 (*issued)++;
1233
1234         atomic_inc(&dcc->queued_discard);
1235         dc->queued++;
1236         list_move_tail(&dc->list, wait_list);
1237
1238         /* sanity check on discard range */
1239         __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len);
1240
1241         bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start);
1242         bio->bi_private = dc;
1243         bio->bi_end_io = f2fs_submit_discard_endio;
1244         submit_bio(bio);
1245
1246         atomic_inc(&dcc->issued_discard);
1247         f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE);
1248 }
1249 #endif
1250
1251 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1252 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1253                                 struct discard_policy *dpolicy,
1254                                 struct discard_cmd *dc, int *issued)
1255 {
1256         struct block_device *bdev = dc->bdev;
1257         unsigned int max_discard_blocks =
1258                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1259         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1260         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1261                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1262         blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
1263         block_t lstart, start, len, total_len;
1264         int err = 0;
1265
1266         if (dc->state != D_PREP)
1267                 return 0;
1268
1269         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1270                 return 0;
1271
1272 #ifdef CONFIG_BLK_DEV_ZONED
1273         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) {
1274                 int devi = f2fs_bdev_index(sbi, bdev);
1275
1276                 if (devi < 0)
1277                         return -EINVAL;
1278
1279                 if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1280                         __submit_zone_reset_cmd(sbi, dc, flag,
1281                                                 wait_list, issued);
1282                         return 0;
1283                 }
1284         }
1285 #endif
1286
1287         trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
1288
1289         lstart = dc->di.lstart;
1290         start = dc->di.start;
1291         len = dc->di.len;
1292         total_len = len;
1293
1294         dc->di.len = 0;
1295
1296         while (total_len && *issued < dpolicy->max_requests && !err) {
1297                 struct bio *bio = NULL;
1298                 unsigned long flags;
1299                 bool last = true;
1300
1301                 if (len > max_discard_blocks) {
1302                         len = max_discard_blocks;
1303                         last = false;
1304                 }
1305
1306                 (*issued)++;
1307                 if (*issued == dpolicy->max_requests)
1308                         last = true;
1309
1310                 dc->di.len += len;
1311
1312                 if (time_to_inject(sbi, FAULT_DISCARD)) {
1313                         err = -EIO;
1314                 } else {
1315                         err = __blkdev_issue_discard(bdev,
1316                                         SECTOR_FROM_BLOCK(start),
1317                                         SECTOR_FROM_BLOCK(len),
1318                                         GFP_NOFS, &bio);
1319                 }
1320                 if (err) {
1321                         spin_lock_irqsave(&dc->lock, flags);
1322                         if (dc->state == D_PARTIAL)
1323                                 dc->state = D_SUBMIT;
1324                         spin_unlock_irqrestore(&dc->lock, flags);
1325
1326                         break;
1327                 }
1328
1329                 f2fs_bug_on(sbi, !bio);
1330
1331                 /*
1332                  * should keep before submission to avoid D_DONE
1333                  * right away
1334                  */
1335                 spin_lock_irqsave(&dc->lock, flags);
1336                 if (last)
1337                         dc->state = D_SUBMIT;
1338                 else
1339                         dc->state = D_PARTIAL;
1340                 dc->bio_ref++;
1341                 spin_unlock_irqrestore(&dc->lock, flags);
1342
1343                 atomic_inc(&dcc->queued_discard);
1344                 dc->queued++;
1345                 list_move_tail(&dc->list, wait_list);
1346
1347                 /* sanity check on discard range */
1348                 __check_sit_bitmap(sbi, lstart, lstart + len);
1349
1350                 bio->bi_private = dc;
1351                 bio->bi_end_io = f2fs_submit_discard_endio;
1352                 bio->bi_opf |= flag;
1353                 submit_bio(bio);
1354
1355                 atomic_inc(&dcc->issued_discard);
1356
1357                 f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE);
1358
1359                 lstart += len;
1360                 start += len;
1361                 total_len -= len;
1362                 len = total_len;
1363         }
1364
1365         if (!err && len) {
1366                 dcc->undiscard_blks -= len;
1367                 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1368         }
1369         return err;
1370 }
1371
1372 static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
1373                                 struct block_device *bdev, block_t lstart,
1374                                 block_t start, block_t len)
1375 {
1376         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1377         struct rb_node **p = &dcc->root.rb_root.rb_node;
1378         struct rb_node *parent = NULL;
1379         struct discard_cmd *dc;
1380         bool leftmost = true;
1381
1382         /* look up rb tree to find parent node */
1383         while (*p) {
1384                 parent = *p;
1385                 dc = rb_entry(parent, struct discard_cmd, rb_node);
1386
1387                 if (lstart < dc->di.lstart) {
1388                         p = &(*p)->rb_left;
1389                 } else if (lstart >= dc->di.lstart + dc->di.len) {
1390                         p = &(*p)->rb_right;
1391                         leftmost = false;
1392                 } else {
1393                         /* Let's skip to add, if exists */
1394                         return;
1395                 }
1396         }
1397
1398         dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
1399
1400         rb_link_node(&dc->rb_node, parent, p);
1401         rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
1402 }
1403
1404 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1405                                                 struct discard_cmd *dc)
1406 {
1407         list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]);
1408 }
1409
1410 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1411                                 struct discard_cmd *dc, block_t blkaddr)
1412 {
1413         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1414         struct discard_info di = dc->di;
1415         bool modified = false;
1416
1417         if (dc->state == D_DONE || dc->di.len == 1) {
1418                 __remove_discard_cmd(sbi, dc);
1419                 return;
1420         }
1421
1422         dcc->undiscard_blks -= di.len;
1423
1424         if (blkaddr > di.lstart) {
1425                 dc->di.len = blkaddr - dc->di.lstart;
1426                 dcc->undiscard_blks += dc->di.len;
1427                 __relocate_discard_cmd(dcc, dc);
1428                 modified = true;
1429         }
1430
1431         if (blkaddr < di.lstart + di.len - 1) {
1432                 if (modified) {
1433                         __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1,
1434                                         di.start + blkaddr + 1 - di.lstart,
1435                                         di.lstart + di.len - 1 - blkaddr);
1436                 } else {
1437                         dc->di.lstart++;
1438                         dc->di.len--;
1439                         dc->di.start++;
1440                         dcc->undiscard_blks += dc->di.len;
1441                         __relocate_discard_cmd(dcc, dc);
1442                 }
1443         }
1444 }
1445
1446 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1447                                 struct block_device *bdev, block_t lstart,
1448                                 block_t start, block_t len)
1449 {
1450         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1451         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1452         struct discard_cmd *dc;
1453         struct discard_info di = {0};
1454         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1455         unsigned int max_discard_blocks =
1456                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1457         block_t end = lstart + len;
1458
1459         dc = __lookup_discard_cmd_ret(&dcc->root, lstart,
1460                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
1461         if (dc)
1462                 prev_dc = dc;
1463
1464         if (!prev_dc) {
1465                 di.lstart = lstart;
1466                 di.len = next_dc ? next_dc->di.lstart - lstart : len;
1467                 di.len = min(di.len, len);
1468                 di.start = start;
1469         }
1470
1471         while (1) {
1472                 struct rb_node *node;
1473                 bool merged = false;
1474                 struct discard_cmd *tdc = NULL;
1475
1476                 if (prev_dc) {
1477                         di.lstart = prev_dc->di.lstart + prev_dc->di.len;
1478                         if (di.lstart < lstart)
1479                                 di.lstart = lstart;
1480                         if (di.lstart >= end)
1481                                 break;
1482
1483                         if (!next_dc || next_dc->di.lstart > end)
1484                                 di.len = end - di.lstart;
1485                         else
1486                                 di.len = next_dc->di.lstart - di.lstart;
1487                         di.start = start + di.lstart - lstart;
1488                 }
1489
1490                 if (!di.len)
1491                         goto next;
1492
1493                 if (prev_dc && prev_dc->state == D_PREP &&
1494                         prev_dc->bdev == bdev &&
1495                         __is_discard_back_mergeable(&di, &prev_dc->di,
1496                                                         max_discard_blocks)) {
1497                         prev_dc->di.len += di.len;
1498                         dcc->undiscard_blks += di.len;
1499                         __relocate_discard_cmd(dcc, prev_dc);
1500                         di = prev_dc->di;
1501                         tdc = prev_dc;
1502                         merged = true;
1503                 }
1504
1505                 if (next_dc && next_dc->state == D_PREP &&
1506                         next_dc->bdev == bdev &&
1507                         __is_discard_front_mergeable(&di, &next_dc->di,
1508                                                         max_discard_blocks)) {
1509                         next_dc->di.lstart = di.lstart;
1510                         next_dc->di.len += di.len;
1511                         next_dc->di.start = di.start;
1512                         dcc->undiscard_blks += di.len;
1513                         __relocate_discard_cmd(dcc, next_dc);
1514                         if (tdc)
1515                                 __remove_discard_cmd(sbi, tdc);
1516                         merged = true;
1517                 }
1518
1519                 if (!merged)
1520                         __insert_discard_cmd(sbi, bdev,
1521                                                 di.lstart, di.start, di.len);
1522  next:
1523                 prev_dc = next_dc;
1524                 if (!prev_dc)
1525                         break;
1526
1527                 node = rb_next(&prev_dc->rb_node);
1528                 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1529         }
1530 }
1531
1532 #ifdef CONFIG_BLK_DEV_ZONED
1533 static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi,
1534                 struct block_device *bdev, block_t blkstart, block_t lblkstart,
1535                 block_t blklen)
1536 {
1537         trace_f2fs_queue_reset_zone(bdev, blkstart);
1538
1539         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1540         __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
1541         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1542 }
1543 #endif
1544
1545 static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
1546                 struct block_device *bdev, block_t blkstart, block_t blklen)
1547 {
1548         block_t lblkstart = blkstart;
1549
1550         if (!f2fs_bdev_support_discard(bdev))
1551                 return;
1552
1553         trace_f2fs_queue_discard(bdev, blkstart, blklen);
1554
1555         if (f2fs_is_multi_device(sbi)) {
1556                 int devi = f2fs_target_device_index(sbi, blkstart);
1557
1558                 blkstart -= FDEV(devi).start_blk;
1559         }
1560         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1561         __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1562         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1563 }
1564
1565 static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1566                 struct discard_policy *dpolicy, int *issued)
1567 {
1568         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1569         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1570         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1571         struct discard_cmd *dc;
1572         struct blk_plug plug;
1573         bool io_interrupted = false;
1574
1575         mutex_lock(&dcc->cmd_lock);
1576         dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos,
1577                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
1578         if (!dc)
1579                 dc = next_dc;
1580
1581         blk_start_plug(&plug);
1582
1583         while (dc) {
1584                 struct rb_node *node;
1585                 int err = 0;
1586
1587                 if (dc->state != D_PREP)
1588                         goto next;
1589
1590                 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1591                         io_interrupted = true;
1592                         break;
1593                 }
1594
1595                 dcc->next_pos = dc->di.lstart + dc->di.len;
1596                 err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
1597
1598                 if (*issued >= dpolicy->max_requests)
1599                         break;
1600 next:
1601                 node = rb_next(&dc->rb_node);
1602                 if (err)
1603                         __remove_discard_cmd(sbi, dc);
1604                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1605         }
1606
1607         blk_finish_plug(&plug);
1608
1609         if (!dc)
1610                 dcc->next_pos = 0;
1611
1612         mutex_unlock(&dcc->cmd_lock);
1613
1614         if (!(*issued) && io_interrupted)
1615                 *issued = -1;
1616 }
1617 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1618                                         struct discard_policy *dpolicy);
1619
1620 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1621                                         struct discard_policy *dpolicy)
1622 {
1623         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1624         struct list_head *pend_list;
1625         struct discard_cmd *dc, *tmp;
1626         struct blk_plug plug;
1627         int i, issued;
1628         bool io_interrupted = false;
1629
1630         if (dpolicy->timeout)
1631                 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1632
1633 retry:
1634         issued = 0;
1635         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1636                 if (dpolicy->timeout &&
1637                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1638                         break;
1639
1640                 if (i + 1 < dpolicy->granularity)
1641                         break;
1642
1643                 if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) {
1644                         __issue_discard_cmd_orderly(sbi, dpolicy, &issued);
1645                         return issued;
1646                 }
1647
1648                 pend_list = &dcc->pend_list[i];
1649
1650                 mutex_lock(&dcc->cmd_lock);
1651                 if (list_empty(pend_list))
1652                         goto next;
1653                 if (unlikely(dcc->rbtree_check))
1654                         f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
1655                 blk_start_plug(&plug);
1656                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1657                         f2fs_bug_on(sbi, dc->state != D_PREP);
1658
1659                         if (dpolicy->timeout &&
1660                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1661                                 break;
1662
1663                         if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1664                                                 !is_idle(sbi, DISCARD_TIME)) {
1665                                 io_interrupted = true;
1666                                 break;
1667                         }
1668
1669                         __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1670
1671                         if (issued >= dpolicy->max_requests)
1672                                 break;
1673                 }
1674                 blk_finish_plug(&plug);
1675 next:
1676                 mutex_unlock(&dcc->cmd_lock);
1677
1678                 if (issued >= dpolicy->max_requests || io_interrupted)
1679                         break;
1680         }
1681
1682         if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1683                 __wait_all_discard_cmd(sbi, dpolicy);
1684                 goto retry;
1685         }
1686
1687         if (!issued && io_interrupted)
1688                 issued = -1;
1689
1690         return issued;
1691 }
1692
1693 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1694 {
1695         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1696         struct list_head *pend_list;
1697         struct discard_cmd *dc, *tmp;
1698         int i;
1699         bool dropped = false;
1700
1701         mutex_lock(&dcc->cmd_lock);
1702         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1703                 pend_list = &dcc->pend_list[i];
1704                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1705                         f2fs_bug_on(sbi, dc->state != D_PREP);
1706                         __remove_discard_cmd(sbi, dc);
1707                         dropped = true;
1708                 }
1709         }
1710         mutex_unlock(&dcc->cmd_lock);
1711
1712         return dropped;
1713 }
1714
1715 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1716 {
1717         __drop_discard_cmd(sbi);
1718 }
1719
1720 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1721                                                         struct discard_cmd *dc)
1722 {
1723         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1724         unsigned int len = 0;
1725
1726         wait_for_completion_io(&dc->wait);
1727         mutex_lock(&dcc->cmd_lock);
1728         f2fs_bug_on(sbi, dc->state != D_DONE);
1729         dc->ref--;
1730         if (!dc->ref) {
1731                 if (!dc->error)
1732                         len = dc->di.len;
1733                 __remove_discard_cmd(sbi, dc);
1734         }
1735         mutex_unlock(&dcc->cmd_lock);
1736
1737         return len;
1738 }
1739
1740 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1741                                                 struct discard_policy *dpolicy,
1742                                                 block_t start, block_t end)
1743 {
1744         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1745         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1746                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1747         struct discard_cmd *dc = NULL, *iter, *tmp;
1748         unsigned int trimmed = 0;
1749
1750 next:
1751         dc = NULL;
1752
1753         mutex_lock(&dcc->cmd_lock);
1754         list_for_each_entry_safe(iter, tmp, wait_list, list) {
1755                 if (iter->di.lstart + iter->di.len <= start ||
1756                                         end <= iter->di.lstart)
1757                         continue;
1758                 if (iter->di.len < dpolicy->granularity)
1759                         continue;
1760                 if (iter->state == D_DONE && !iter->ref) {
1761                         wait_for_completion_io(&iter->wait);
1762                         if (!iter->error)
1763                                 trimmed += iter->di.len;
1764                         __remove_discard_cmd(sbi, iter);
1765                 } else {
1766                         iter->ref++;
1767                         dc = iter;
1768                         break;
1769                 }
1770         }
1771         mutex_unlock(&dcc->cmd_lock);
1772
1773         if (dc) {
1774                 trimmed += __wait_one_discard_bio(sbi, dc);
1775                 goto next;
1776         }
1777
1778         return trimmed;
1779 }
1780
1781 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1782                                                 struct discard_policy *dpolicy)
1783 {
1784         struct discard_policy dp;
1785         unsigned int discard_blks;
1786
1787         if (dpolicy)
1788                 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1789
1790         /* wait all */
1791         __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY);
1792         discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1793         __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY);
1794         discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1795
1796         return discard_blks;
1797 }
1798
1799 /* This should be covered by global mutex, &sit_i->sentry_lock */
1800 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1801 {
1802         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1803         struct discard_cmd *dc;
1804         bool need_wait = false;
1805
1806         mutex_lock(&dcc->cmd_lock);
1807         dc = __lookup_discard_cmd(sbi, blkaddr);
1808 #ifdef CONFIG_BLK_DEV_ZONED
1809         if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) {
1810                 int devi = f2fs_bdev_index(sbi, dc->bdev);
1811
1812                 if (devi < 0) {
1813                         mutex_unlock(&dcc->cmd_lock);
1814                         return;
1815                 }
1816
1817                 if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1818                         /* force submit zone reset */
1819                         if (dc->state == D_PREP)
1820                                 __submit_zone_reset_cmd(sbi, dc, REQ_SYNC,
1821                                                         &dcc->wait_list, NULL);
1822                         dc->ref++;
1823                         mutex_unlock(&dcc->cmd_lock);
1824                         /* wait zone reset */
1825                         __wait_one_discard_bio(sbi, dc);
1826                         return;
1827                 }
1828         }
1829 #endif
1830         if (dc) {
1831                 if (dc->state == D_PREP) {
1832                         __punch_discard_cmd(sbi, dc, blkaddr);
1833                 } else {
1834                         dc->ref++;
1835                         need_wait = true;
1836                 }
1837         }
1838         mutex_unlock(&dcc->cmd_lock);
1839
1840         if (need_wait)
1841                 __wait_one_discard_bio(sbi, dc);
1842 }
1843
1844 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1845 {
1846         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1847
1848         if (dcc && dcc->f2fs_issue_discard) {
1849                 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1850
1851                 dcc->f2fs_issue_discard = NULL;
1852                 kthread_stop(discard_thread);
1853         }
1854 }
1855
1856 /**
1857  * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT
1858  * @sbi: the f2fs_sb_info data for discard cmd to issue
1859  *
1860  * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped
1861  *
1862  * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
1863  */
1864 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1865 {
1866         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1867         struct discard_policy dpolicy;
1868         bool dropped;
1869
1870         if (!atomic_read(&dcc->discard_cmd_cnt))
1871                 return true;
1872
1873         __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1874                                         dcc->discard_granularity);
1875         __issue_discard_cmd(sbi, &dpolicy);
1876         dropped = __drop_discard_cmd(sbi);
1877
1878         /* just to make sure there is no pending discard commands */
1879         __wait_all_discard_cmd(sbi, NULL);
1880
1881         f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1882         return !dropped;
1883 }
1884
1885 static int issue_discard_thread(void *data)
1886 {
1887         struct f2fs_sb_info *sbi = data;
1888         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1889         wait_queue_head_t *q = &dcc->discard_wait_queue;
1890         struct discard_policy dpolicy;
1891         unsigned int wait_ms = dcc->min_discard_issue_time;
1892         int issued;
1893
1894         set_freezable();
1895
1896         do {
1897                 wait_event_freezable_timeout(*q,
1898                                 kthread_should_stop() || dcc->discard_wake,
1899                                 msecs_to_jiffies(wait_ms));
1900
1901                 if (sbi->gc_mode == GC_URGENT_HIGH ||
1902                         !f2fs_available_free_memory(sbi, DISCARD_CACHE))
1903                         __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE,
1904                                                 MIN_DISCARD_GRANULARITY);
1905                 else
1906                         __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1907                                                 dcc->discard_granularity);
1908
1909                 if (dcc->discard_wake)
1910                         dcc->discard_wake = false;
1911
1912                 /* clean up pending candidates before going to sleep */
1913                 if (atomic_read(&dcc->queued_discard))
1914                         __wait_all_discard_cmd(sbi, NULL);
1915
1916                 if (f2fs_readonly(sbi->sb))
1917                         continue;
1918                 if (kthread_should_stop())
1919                         return 0;
1920                 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
1921                         !atomic_read(&dcc->discard_cmd_cnt)) {
1922                         wait_ms = dpolicy.max_interval;
1923                         continue;
1924                 }
1925
1926                 sb_start_intwrite(sbi->sb);
1927
1928                 issued = __issue_discard_cmd(sbi, &dpolicy);
1929                 if (issued > 0) {
1930                         __wait_all_discard_cmd(sbi, &dpolicy);
1931                         wait_ms = dpolicy.min_interval;
1932                 } else if (issued == -1) {
1933                         wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1934                         if (!wait_ms)
1935                                 wait_ms = dpolicy.mid_interval;
1936                 } else {
1937                         wait_ms = dpolicy.max_interval;
1938                 }
1939                 if (!atomic_read(&dcc->discard_cmd_cnt))
1940                         wait_ms = dpolicy.max_interval;
1941
1942                 sb_end_intwrite(sbi->sb);
1943
1944         } while (!kthread_should_stop());
1945         return 0;
1946 }
1947
1948 #ifdef CONFIG_BLK_DEV_ZONED
1949 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1950                 struct block_device *bdev, block_t blkstart, block_t blklen)
1951 {
1952         sector_t sector, nr_sects;
1953         block_t lblkstart = blkstart;
1954         int devi = 0;
1955         u64 remainder = 0;
1956
1957         if (f2fs_is_multi_device(sbi)) {
1958                 devi = f2fs_target_device_index(sbi, blkstart);
1959                 if (blkstart < FDEV(devi).start_blk ||
1960                     blkstart > FDEV(devi).end_blk) {
1961                         f2fs_err(sbi, "Invalid block %x", blkstart);
1962                         return -EIO;
1963                 }
1964                 blkstart -= FDEV(devi).start_blk;
1965         }
1966
1967         /* For sequential zones, reset the zone write pointer */
1968         if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1969                 sector = SECTOR_FROM_BLOCK(blkstart);
1970                 nr_sects = SECTOR_FROM_BLOCK(blklen);
1971                 div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder);
1972
1973                 if (remainder || nr_sects != bdev_zone_sectors(bdev)) {
1974                         f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1975                                  devi, sbi->s_ndevs ? FDEV(devi).path : "",
1976                                  blkstart, blklen);
1977                         return -EIO;
1978                 }
1979
1980                 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
1981                         unsigned int nofs_flags;
1982                         int ret;
1983
1984                         trace_f2fs_issue_reset_zone(bdev, blkstart);
1985                         nofs_flags = memalloc_nofs_save();
1986                         ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
1987                                                 sector, nr_sects);
1988                         memalloc_nofs_restore(nofs_flags);
1989                         return ret;
1990                 }
1991
1992                 __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
1993                 return 0;
1994         }
1995
1996         /* For conventional zones, use regular discard if supported */
1997         __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1998         return 0;
1999 }
2000 #endif
2001
2002 static int __issue_discard_async(struct f2fs_sb_info *sbi,
2003                 struct block_device *bdev, block_t blkstart, block_t blklen)
2004 {
2005 #ifdef CONFIG_BLK_DEV_ZONED
2006         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
2007                 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
2008 #endif
2009         __queue_discard_cmd(sbi, bdev, blkstart, blklen);
2010         return 0;
2011 }
2012
2013 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
2014                                 block_t blkstart, block_t blklen)
2015 {
2016         sector_t start = blkstart, len = 0;
2017         struct block_device *bdev;
2018         struct seg_entry *se;
2019         unsigned int offset;
2020         block_t i;
2021         int err = 0;
2022
2023         bdev = f2fs_target_device(sbi, blkstart, NULL);
2024
2025         for (i = blkstart; i < blkstart + blklen; i++, len++) {
2026                 if (i != start) {
2027                         struct block_device *bdev2 =
2028                                 f2fs_target_device(sbi, i, NULL);
2029
2030                         if (bdev2 != bdev) {
2031                                 err = __issue_discard_async(sbi, bdev,
2032                                                 start, len);
2033                                 if (err)
2034                                         return err;
2035                                 bdev = bdev2;
2036                                 start = i;
2037                                 len = 0;
2038                         }
2039                 }
2040
2041                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
2042                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
2043
2044                 if (f2fs_block_unit_discard(sbi) &&
2045                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2046                         sbi->discard_blks--;
2047         }
2048
2049         if (len)
2050                 err = __issue_discard_async(sbi, bdev, start, len);
2051         return err;
2052 }
2053
2054 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
2055                                                         bool check_only)
2056 {
2057         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2058         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
2059         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2060         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2061         unsigned long *discard_map = (unsigned long *)se->discard_map;
2062         unsigned long *dmap = SIT_I(sbi)->tmp_map;
2063         unsigned int start = 0, end = -1;
2064         bool force = (cpc->reason & CP_DISCARD);
2065         struct discard_entry *de = NULL;
2066         struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
2067         int i;
2068
2069         if (se->valid_blocks == BLKS_PER_SEG(sbi) ||
2070             !f2fs_hw_support_discard(sbi) ||
2071             !f2fs_block_unit_discard(sbi))
2072                 return false;
2073
2074         if (!force) {
2075                 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
2076                         SM_I(sbi)->dcc_info->nr_discards >=
2077                                 SM_I(sbi)->dcc_info->max_discards)
2078                         return false;
2079         }
2080
2081         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
2082         for (i = 0; i < entries; i++)
2083                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
2084                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
2085
2086         while (force || SM_I(sbi)->dcc_info->nr_discards <=
2087                                 SM_I(sbi)->dcc_info->max_discards) {
2088                 start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1);
2089                 if (start >= BLKS_PER_SEG(sbi))
2090                         break;
2091
2092                 end = __find_rev_next_zero_bit(dmap,
2093                                                 BLKS_PER_SEG(sbi), start + 1);
2094                 if (force && start && end != BLKS_PER_SEG(sbi) &&
2095                     (end - start) < cpc->trim_minlen)
2096                         continue;
2097
2098                 if (check_only)
2099                         return true;
2100
2101                 if (!de) {
2102                         de = f2fs_kmem_cache_alloc(discard_entry_slab,
2103                                                 GFP_F2FS_ZERO, true, NULL);
2104                         de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
2105                         list_add_tail(&de->list, head);
2106                 }
2107
2108                 for (i = start; i < end; i++)
2109                         __set_bit_le(i, (void *)de->discard_map);
2110
2111                 SM_I(sbi)->dcc_info->nr_discards += end - start;
2112         }
2113         return false;
2114 }
2115
2116 static void release_discard_addr(struct discard_entry *entry)
2117 {
2118         list_del(&entry->list);
2119         kmem_cache_free(discard_entry_slab, entry);
2120 }
2121
2122 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
2123 {
2124         struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
2125         struct discard_entry *entry, *this;
2126
2127         /* drop caches */
2128         list_for_each_entry_safe(entry, this, head, list)
2129                 release_discard_addr(entry);
2130 }
2131
2132 /*
2133  * Should call f2fs_clear_prefree_segments after checkpoint is done.
2134  */
2135 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
2136 {
2137         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2138         unsigned int segno;
2139
2140         mutex_lock(&dirty_i->seglist_lock);
2141         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
2142                 __set_test_and_free(sbi, segno, false);
2143         mutex_unlock(&dirty_i->seglist_lock);
2144 }
2145
2146 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
2147                                                 struct cp_control *cpc)
2148 {
2149         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2150         struct list_head *head = &dcc->entry_list;
2151         struct discard_entry *entry, *this;
2152         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2153         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
2154         unsigned int start = 0, end = -1;
2155         unsigned int secno, start_segno;
2156         bool force = (cpc->reason & CP_DISCARD);
2157         bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
2158                                                 DISCARD_UNIT_SECTION;
2159
2160         if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
2161                 section_alignment = true;
2162
2163         mutex_lock(&dirty_i->seglist_lock);
2164
2165         while (1) {
2166                 int i;
2167
2168                 if (section_alignment && end != -1)
2169                         end--;
2170                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2171                 if (start >= MAIN_SEGS(sbi))
2172                         break;
2173                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2174                                                                 start + 1);
2175
2176                 if (section_alignment) {
2177                         start = rounddown(start, SEGS_PER_SEC(sbi));
2178                         end = roundup(end, SEGS_PER_SEC(sbi));
2179                 }
2180
2181                 for (i = start; i < end; i++) {
2182                         if (test_and_clear_bit(i, prefree_map))
2183                                 dirty_i->nr_dirty[PRE]--;
2184                 }
2185
2186                 if (!f2fs_realtime_discard_enable(sbi))
2187                         continue;
2188
2189                 if (force && start >= cpc->trim_start &&
2190                                         (end - 1) <= cpc->trim_end)
2191                         continue;
2192
2193                 /* Should cover 2MB zoned device for zone-based reset */
2194                 if (!f2fs_sb_has_blkzoned(sbi) &&
2195                     (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
2196                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2197                                 SEGS_TO_BLKS(sbi, end - start));
2198                         continue;
2199                 }
2200 next:
2201                 secno = GET_SEC_FROM_SEG(sbi, start);
2202                 start_segno = GET_SEG_FROM_SEC(sbi, secno);
2203                 if (!IS_CURSEC(sbi, secno) &&
2204                         !get_valid_blocks(sbi, start, true))
2205                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2206                                                 BLKS_PER_SEC(sbi));
2207
2208                 start = start_segno + SEGS_PER_SEC(sbi);
2209                 if (start < end)
2210                         goto next;
2211                 else
2212                         end = start - 1;
2213         }
2214         mutex_unlock(&dirty_i->seglist_lock);
2215
2216         if (!f2fs_block_unit_discard(sbi))
2217                 goto wakeup;
2218
2219         /* send small discards */
2220         list_for_each_entry_safe(entry, this, head, list) {
2221                 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2222                 bool is_valid = test_bit_le(0, entry->discard_map);
2223
2224 find_next:
2225                 if (is_valid) {
2226                         next_pos = find_next_zero_bit_le(entry->discard_map,
2227                                                 BLKS_PER_SEG(sbi), cur_pos);
2228                         len = next_pos - cur_pos;
2229
2230                         if (f2fs_sb_has_blkzoned(sbi) ||
2231                             (force && len < cpc->trim_minlen))
2232                                 goto skip;
2233
2234                         f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2235                                                                         len);
2236                         total_len += len;
2237                 } else {
2238                         next_pos = find_next_bit_le(entry->discard_map,
2239                                                 BLKS_PER_SEG(sbi), cur_pos);
2240                 }
2241 skip:
2242                 cur_pos = next_pos;
2243                 is_valid = !is_valid;
2244
2245                 if (cur_pos < BLKS_PER_SEG(sbi))
2246                         goto find_next;
2247
2248                 release_discard_addr(entry);
2249                 dcc->nr_discards -= total_len;
2250         }
2251
2252 wakeup:
2253         wake_up_discard_thread(sbi, false);
2254 }
2255
2256 int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
2257 {
2258         dev_t dev = sbi->sb->s_bdev->bd_dev;
2259         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2260         int err = 0;
2261
2262         if (f2fs_sb_has_readonly(sbi)) {
2263                 f2fs_info(sbi,
2264                         "Skip to start discard thread for readonly image");
2265                 return 0;
2266         }
2267
2268         if (!f2fs_realtime_discard_enable(sbi))
2269                 return 0;
2270
2271         dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2272                                 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2273         if (IS_ERR(dcc->f2fs_issue_discard)) {
2274                 err = PTR_ERR(dcc->f2fs_issue_discard);
2275                 dcc->f2fs_issue_discard = NULL;
2276         }
2277
2278         return err;
2279 }
2280
2281 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2282 {
2283         struct discard_cmd_control *dcc;
2284         int err = 0, i;
2285
2286         if (SM_I(sbi)->dcc_info) {
2287                 dcc = SM_I(sbi)->dcc_info;
2288                 goto init_thread;
2289         }
2290
2291         dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2292         if (!dcc)
2293                 return -ENOMEM;
2294
2295         dcc->discard_io_aware_gran = MAX_PLIST_NUM;
2296         dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2297         dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
2298         dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE;
2299         if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
2300                 dcc->discard_granularity = BLKS_PER_SEG(sbi);
2301         else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
2302                 dcc->discard_granularity = BLKS_PER_SEC(sbi);
2303
2304         INIT_LIST_HEAD(&dcc->entry_list);
2305         for (i = 0; i < MAX_PLIST_NUM; i++)
2306                 INIT_LIST_HEAD(&dcc->pend_list[i]);
2307         INIT_LIST_HEAD(&dcc->wait_list);
2308         INIT_LIST_HEAD(&dcc->fstrim_list);
2309         mutex_init(&dcc->cmd_lock);
2310         atomic_set(&dcc->issued_discard, 0);
2311         atomic_set(&dcc->queued_discard, 0);
2312         atomic_set(&dcc->discard_cmd_cnt, 0);
2313         dcc->nr_discards = 0;
2314         dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi));
2315         dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
2316         dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
2317         dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
2318         dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
2319         dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
2320         dcc->undiscard_blks = 0;
2321         dcc->next_pos = 0;
2322         dcc->root = RB_ROOT_CACHED;
2323         dcc->rbtree_check = false;
2324
2325         init_waitqueue_head(&dcc->discard_wait_queue);
2326         SM_I(sbi)->dcc_info = dcc;
2327 init_thread:
2328         err = f2fs_start_discard_thread(sbi);
2329         if (err) {
2330                 kfree(dcc);
2331                 SM_I(sbi)->dcc_info = NULL;
2332         }
2333
2334         return err;
2335 }
2336
2337 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2338 {
2339         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2340
2341         if (!dcc)
2342                 return;
2343
2344         f2fs_stop_discard_thread(sbi);
2345
2346         /*
2347          * Recovery can cache discard commands, so in error path of
2348          * fill_super(), it needs to give a chance to handle them.
2349          */
2350         f2fs_issue_discard_timeout(sbi);
2351
2352         kfree(dcc);
2353         SM_I(sbi)->dcc_info = NULL;
2354 }
2355
2356 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2357 {
2358         struct sit_info *sit_i = SIT_I(sbi);
2359
2360         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2361                 sit_i->dirty_sentries++;
2362                 return false;
2363         }
2364
2365         return true;
2366 }
2367
2368 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2369                                         unsigned int segno, int modified)
2370 {
2371         struct seg_entry *se = get_seg_entry(sbi, segno);
2372
2373         se->type = type;
2374         if (modified)
2375                 __mark_sit_entry_dirty(sbi, segno);
2376 }
2377
2378 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
2379                                                                 block_t blkaddr)
2380 {
2381         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2382
2383         if (segno == NULL_SEGNO)
2384                 return 0;
2385         return get_seg_entry(sbi, segno)->mtime;
2386 }
2387
2388 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
2389                                                 unsigned long long old_mtime)
2390 {
2391         struct seg_entry *se;
2392         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2393         unsigned long long ctime = get_mtime(sbi, false);
2394         unsigned long long mtime = old_mtime ? old_mtime : ctime;
2395
2396         if (segno == NULL_SEGNO)
2397                 return;
2398
2399         se = get_seg_entry(sbi, segno);
2400
2401         if (!se->mtime)
2402                 se->mtime = mtime;
2403         else
2404                 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
2405                                                 se->valid_blocks + 1);
2406
2407         if (ctime > SIT_I(sbi)->max_mtime)
2408                 SIT_I(sbi)->max_mtime = ctime;
2409 }
2410
2411 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2412 {
2413         struct seg_entry *se;
2414         unsigned int segno, offset;
2415         long int new_vblocks;
2416         bool exist;
2417 #ifdef CONFIG_F2FS_CHECK_FS
2418         bool mir_exist;
2419 #endif
2420
2421         segno = GET_SEGNO(sbi, blkaddr);
2422         if (segno == NULL_SEGNO)
2423                 return;
2424
2425         se = get_seg_entry(sbi, segno);
2426         new_vblocks = se->valid_blocks + del;
2427         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2428
2429         f2fs_bug_on(sbi, (new_vblocks < 0 ||
2430                         (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
2431
2432         se->valid_blocks = new_vblocks;
2433
2434         /* Update valid block bitmap */
2435         if (del > 0) {
2436                 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2437 #ifdef CONFIG_F2FS_CHECK_FS
2438                 mir_exist = f2fs_test_and_set_bit(offset,
2439                                                 se->cur_valid_map_mir);
2440                 if (unlikely(exist != mir_exist)) {
2441                         f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2442                                  blkaddr, exist);
2443                         f2fs_bug_on(sbi, 1);
2444                 }
2445 #endif
2446                 if (unlikely(exist)) {
2447                         f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2448                                  blkaddr);
2449                         f2fs_bug_on(sbi, 1);
2450                         se->valid_blocks--;
2451                         del = 0;
2452                 }
2453
2454                 if (f2fs_block_unit_discard(sbi) &&
2455                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2456                         sbi->discard_blks--;
2457
2458                 /*
2459                  * SSR should never reuse block which is checkpointed
2460                  * or newly invalidated.
2461                  */
2462                 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2463                         if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2464                                 se->ckpt_valid_blocks++;
2465                 }
2466         } else {
2467                 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2468 #ifdef CONFIG_F2FS_CHECK_FS
2469                 mir_exist = f2fs_test_and_clear_bit(offset,
2470                                                 se->cur_valid_map_mir);
2471                 if (unlikely(exist != mir_exist)) {
2472                         f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2473                                  blkaddr, exist);
2474                         f2fs_bug_on(sbi, 1);
2475                 }
2476 #endif
2477                 if (unlikely(!exist)) {
2478                         f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2479                                  blkaddr);
2480                         f2fs_bug_on(sbi, 1);
2481                         se->valid_blocks++;
2482                         del = 0;
2483                 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2484                         /*
2485                          * If checkpoints are off, we must not reuse data that
2486                          * was used in the previous checkpoint. If it was used
2487                          * before, we must track that to know how much space we
2488                          * really have.
2489                          */
2490                         if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2491                                 spin_lock(&sbi->stat_lock);
2492                                 sbi->unusable_block_count++;
2493                                 spin_unlock(&sbi->stat_lock);
2494                         }
2495                 }
2496
2497                 if (f2fs_block_unit_discard(sbi) &&
2498                         f2fs_test_and_clear_bit(offset, se->discard_map))
2499                         sbi->discard_blks++;
2500         }
2501         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2502                 se->ckpt_valid_blocks += del;
2503
2504         __mark_sit_entry_dirty(sbi, segno);
2505
2506         /* update total number of valid blocks to be written in ckpt area */
2507         SIT_I(sbi)->written_valid_blocks += del;
2508
2509         if (__is_large_section(sbi))
2510                 get_sec_entry(sbi, segno)->valid_blocks += del;
2511 }
2512
2513 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2514 {
2515         unsigned int segno = GET_SEGNO(sbi, addr);
2516         struct sit_info *sit_i = SIT_I(sbi);
2517
2518         f2fs_bug_on(sbi, addr == NULL_ADDR);
2519         if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2520                 return;
2521
2522         f2fs_invalidate_internal_cache(sbi, addr);
2523
2524         /* add it into sit main buffer */
2525         down_write(&sit_i->sentry_lock);
2526
2527         update_segment_mtime(sbi, addr, 0);
2528         update_sit_entry(sbi, addr, -1);
2529
2530         /* add it into dirty seglist */
2531         locate_dirty_segment(sbi, segno);
2532
2533         up_write(&sit_i->sentry_lock);
2534 }
2535
2536 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2537 {
2538         struct sit_info *sit_i = SIT_I(sbi);
2539         unsigned int segno, offset;
2540         struct seg_entry *se;
2541         bool is_cp = false;
2542
2543         if (!__is_valid_data_blkaddr(blkaddr))
2544                 return true;
2545
2546         down_read(&sit_i->sentry_lock);
2547
2548         segno = GET_SEGNO(sbi, blkaddr);
2549         se = get_seg_entry(sbi, segno);
2550         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2551
2552         if (f2fs_test_bit(offset, se->ckpt_valid_map))
2553                 is_cp = true;
2554
2555         up_read(&sit_i->sentry_lock);
2556
2557         return is_cp;
2558 }
2559
2560 static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)
2561 {
2562         struct curseg_info *curseg = CURSEG_I(sbi, type);
2563
2564         if (sbi->ckpt->alloc_type[type] == SSR)
2565                 return BLKS_PER_SEG(sbi);
2566         return curseg->next_blkoff;
2567 }
2568
2569 /*
2570  * Calculate the number of current summary pages for writing
2571  */
2572 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2573 {
2574         int valid_sum_count = 0;
2575         int i, sum_in_page;
2576
2577         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2578                 if (sbi->ckpt->alloc_type[i] != SSR && for_ra)
2579                         valid_sum_count +=
2580                                 le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2581                 else
2582                         valid_sum_count += f2fs_curseg_valid_blocks(sbi, i);
2583         }
2584
2585         sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2586                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2587         if (valid_sum_count <= sum_in_page)
2588                 return 1;
2589         else if ((valid_sum_count - sum_in_page) <=
2590                 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2591                 return 2;
2592         return 3;
2593 }
2594
2595 /*
2596  * Caller should put this summary page
2597  */
2598 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2599 {
2600         if (unlikely(f2fs_cp_error(sbi)))
2601                 return ERR_PTR(-EIO);
2602         return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2603 }
2604
2605 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2606                                         void *src, block_t blk_addr)
2607 {
2608         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2609
2610         memcpy(page_address(page), src, PAGE_SIZE);
2611         set_page_dirty(page);
2612         f2fs_put_page(page, 1);
2613 }
2614
2615 static void write_sum_page(struct f2fs_sb_info *sbi,
2616                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
2617 {
2618         f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2619 }
2620
2621 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2622                                                 int type, block_t blk_addr)
2623 {
2624         struct curseg_info *curseg = CURSEG_I(sbi, type);
2625         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2626         struct f2fs_summary_block *src = curseg->sum_blk;
2627         struct f2fs_summary_block *dst;
2628
2629         dst = (struct f2fs_summary_block *)page_address(page);
2630         memset(dst, 0, PAGE_SIZE);
2631
2632         mutex_lock(&curseg->curseg_mutex);
2633
2634         down_read(&curseg->journal_rwsem);
2635         memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2636         up_read(&curseg->journal_rwsem);
2637
2638         memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2639         memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2640
2641         mutex_unlock(&curseg->curseg_mutex);
2642
2643         set_page_dirty(page);
2644         f2fs_put_page(page, 1);
2645 }
2646
2647 static int is_next_segment_free(struct f2fs_sb_info *sbi,
2648                                 struct curseg_info *curseg, int type)
2649 {
2650         unsigned int segno = curseg->segno + 1;
2651         struct free_segmap_info *free_i = FREE_I(sbi);
2652
2653         if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi))
2654                 return !test_bit(segno, free_i->free_segmap);
2655         return 0;
2656 }
2657
2658 /*
2659  * Find a new segment from the free segments bitmap to right order
2660  * This function should be returned with success, otherwise BUG
2661  */
2662 static int get_new_segment(struct f2fs_sb_info *sbi,
2663                         unsigned int *newseg, bool new_sec, bool pinning)
2664 {
2665         struct free_segmap_info *free_i = FREE_I(sbi);
2666         unsigned int segno, secno, zoneno;
2667         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2668         unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2669         unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2670         bool init = true;
2671         int i;
2672         int ret = 0;
2673
2674         spin_lock(&free_i->segmap_lock);
2675
2676         if (time_to_inject(sbi, FAULT_NO_SEGMENT)) {
2677                 ret = -ENOSPC;
2678                 goto out_unlock;
2679         }
2680
2681         if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) {
2682                 segno = find_next_zero_bit(free_i->free_segmap,
2683                         GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2684                 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2685                         goto got_it;
2686         }
2687
2688         /*
2689          * If we format f2fs on zoned storage, let's try to get pinned sections
2690          * from beginning of the storage, which should be a conventional one.
2691          */
2692         if (f2fs_sb_has_blkzoned(sbi)) {
2693                 segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
2694                 hint = GET_SEC_FROM_SEG(sbi, segno);
2695         }
2696
2697 find_other_zone:
2698         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2699         if (secno >= MAIN_SECS(sbi)) {
2700                 secno = find_first_zero_bit(free_i->free_secmap,
2701                                                         MAIN_SECS(sbi));
2702                 if (secno >= MAIN_SECS(sbi)) {
2703                         ret = -ENOSPC;
2704                         goto out_unlock;
2705                 }
2706         }
2707         segno = GET_SEG_FROM_SEC(sbi, secno);
2708         zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2709
2710         /* give up on finding another zone */
2711         if (!init)
2712                 goto got_it;
2713         if (sbi->secs_per_zone == 1)
2714                 goto got_it;
2715         if (zoneno == old_zoneno)
2716                 goto got_it;
2717         for (i = 0; i < NR_CURSEG_TYPE; i++)
2718                 if (CURSEG_I(sbi, i)->zone == zoneno)
2719                         break;
2720
2721         if (i < NR_CURSEG_TYPE) {
2722                 /* zone is in user, try another */
2723                 if (zoneno + 1 >= total_zones)
2724                         hint = 0;
2725                 else
2726                         hint = (zoneno + 1) * sbi->secs_per_zone;
2727                 init = false;
2728                 goto find_other_zone;
2729         }
2730 got_it:
2731         /* set it as dirty segment in free segmap */
2732         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2733
2734         /* no free section in conventional zone */
2735         if (new_sec && pinning &&
2736                 !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
2737                 ret = -EAGAIN;
2738                 goto out_unlock;
2739         }
2740         __set_inuse(sbi, segno);
2741         *newseg = segno;
2742 out_unlock:
2743         spin_unlock(&free_i->segmap_lock);
2744
2745         if (ret == -ENOSPC) {
2746                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT);
2747                 f2fs_bug_on(sbi, 1);
2748         }
2749         return ret;
2750 }
2751
2752 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2753 {
2754         struct curseg_info *curseg = CURSEG_I(sbi, type);
2755         struct summary_footer *sum_footer;
2756         unsigned short seg_type = curseg->seg_type;
2757
2758         /* only happen when get_new_segment() fails */
2759         if (curseg->next_segno == NULL_SEGNO)
2760                 return;
2761
2762         curseg->inited = true;
2763         curseg->segno = curseg->next_segno;
2764         curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2765         curseg->next_blkoff = 0;
2766         curseg->next_segno = NULL_SEGNO;
2767
2768         sum_footer = &(curseg->sum_blk->footer);
2769         memset(sum_footer, 0, sizeof(struct summary_footer));
2770
2771         sanity_check_seg_type(sbi, seg_type);
2772
2773         if (IS_DATASEG(seg_type))
2774                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2775         if (IS_NODESEG(seg_type))
2776                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2777         __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
2778 }
2779
2780 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2781 {
2782         struct curseg_info *curseg = CURSEG_I(sbi, type);
2783         unsigned short seg_type = curseg->seg_type;
2784
2785         sanity_check_seg_type(sbi, seg_type);
2786         if (f2fs_need_rand_seg(sbi))
2787                 return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
2788
2789         if (__is_large_section(sbi))
2790                 return curseg->segno;
2791
2792         /* inmem log may not locate on any segment after mount */
2793         if (!curseg->inited)
2794                 return 0;
2795
2796         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2797                 return 0;
2798
2799         if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))
2800                 return 0;
2801
2802         if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2803                 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2804
2805         /* find segments from 0 to reuse freed segments */
2806         if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2807                 return 0;
2808
2809         return curseg->segno;
2810 }
2811
2812 /*
2813  * Allocate a current working segment.
2814  * This function always allocates a free segment in LFS manner.
2815  */
2816 static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2817 {
2818         struct curseg_info *curseg = CURSEG_I(sbi, type);
2819         unsigned int segno = curseg->segno;
2820         bool pinning = type == CURSEG_COLD_DATA_PINNED;
2821         int ret;
2822
2823         if (curseg->inited)
2824                 write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
2825
2826         segno = __get_next_segno(sbi, type);
2827         ret = get_new_segment(sbi, &segno, new_sec, pinning);
2828         if (ret) {
2829                 if (ret == -ENOSPC)
2830                         curseg->segno = NULL_SEGNO;
2831                 return ret;
2832         }
2833
2834         curseg->next_segno = segno;
2835         reset_curseg(sbi, type, 1);
2836         curseg->alloc_type = LFS;
2837         if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
2838                 curseg->fragment_remained_chunk =
2839                                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
2840         return 0;
2841 }
2842
2843 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
2844                                         int segno, block_t start)
2845 {
2846         struct seg_entry *se = get_seg_entry(sbi, segno);
2847         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2848         unsigned long *target_map = SIT_I(sbi)->tmp_map;
2849         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2850         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2851         int i;
2852
2853         for (i = 0; i < entries; i++)
2854                 target_map[i] = ckpt_map[i] | cur_map[i];
2855
2856         return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start);
2857 }
2858
2859 static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
2860                 struct curseg_info *seg)
2861 {
2862         return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1);
2863 }
2864
2865 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
2866 {
2867         return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi);
2868 }
2869
2870 /*
2871  * This function always allocates a used segment(from dirty seglist) by SSR
2872  * manner, so it should recover the existing segment information of valid blocks
2873  */
2874 static int change_curseg(struct f2fs_sb_info *sbi, int type)
2875 {
2876         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2877         struct curseg_info *curseg = CURSEG_I(sbi, type);
2878         unsigned int new_segno = curseg->next_segno;
2879         struct f2fs_summary_block *sum_node;
2880         struct page *sum_page;
2881
2882         write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
2883
2884         __set_test_and_inuse(sbi, new_segno);
2885
2886         mutex_lock(&dirty_i->seglist_lock);
2887         __remove_dirty_segment(sbi, new_segno, PRE);
2888         __remove_dirty_segment(sbi, new_segno, DIRTY);
2889         mutex_unlock(&dirty_i->seglist_lock);
2890
2891         reset_curseg(sbi, type, 1);
2892         curseg->alloc_type = SSR;
2893         curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
2894
2895         sum_page = f2fs_get_sum_page(sbi, new_segno);
2896         if (IS_ERR(sum_page)) {
2897                 /* GC won't be able to use stale summary pages by cp_error */
2898                 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2899                 return PTR_ERR(sum_page);
2900         }
2901         sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2902         memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2903         f2fs_put_page(sum_page, 1);
2904         return 0;
2905 }
2906
2907 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2908                                 int alloc_mode, unsigned long long age);
2909
2910 static int get_atssr_segment(struct f2fs_sb_info *sbi, int type,
2911                                         int target_type, int alloc_mode,
2912                                         unsigned long long age)
2913 {
2914         struct curseg_info *curseg = CURSEG_I(sbi, type);
2915         int ret = 0;
2916
2917         curseg->seg_type = target_type;
2918
2919         if (get_ssr_segment(sbi, type, alloc_mode, age)) {
2920                 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
2921
2922                 curseg->seg_type = se->type;
2923                 ret = change_curseg(sbi, type);
2924         } else {
2925                 /* allocate cold segment by default */
2926                 curseg->seg_type = CURSEG_COLD_DATA;
2927                 ret = new_curseg(sbi, type, true);
2928         }
2929         stat_inc_seg_type(sbi, curseg);
2930         return ret;
2931 }
2932
2933 static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
2934 {
2935         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
2936         int ret = 0;
2937
2938         if (!sbi->am.atgc_enabled)
2939                 return 0;
2940
2941         f2fs_down_read(&SM_I(sbi)->curseg_lock);
2942
2943         mutex_lock(&curseg->curseg_mutex);
2944         down_write(&SIT_I(sbi)->sentry_lock);
2945
2946         ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC,
2947                                         CURSEG_COLD_DATA, SSR, 0);
2948
2949         up_write(&SIT_I(sbi)->sentry_lock);
2950         mutex_unlock(&curseg->curseg_mutex);
2951
2952         f2fs_up_read(&SM_I(sbi)->curseg_lock);
2953         return ret;
2954 }
2955 int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
2956 {
2957         return __f2fs_init_atgc_curseg(sbi);
2958 }
2959
2960 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2961 {
2962         struct curseg_info *curseg = CURSEG_I(sbi, type);
2963
2964         mutex_lock(&curseg->curseg_mutex);
2965         if (!curseg->inited)
2966                 goto out;
2967
2968         if (get_valid_blocks(sbi, curseg->segno, false)) {
2969                 write_sum_page(sbi, curseg->sum_blk,
2970                                 GET_SUM_BLOCK(sbi, curseg->segno));
2971         } else {
2972                 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2973                 __set_test_and_free(sbi, curseg->segno, true);
2974                 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2975         }
2976 out:
2977         mutex_unlock(&curseg->curseg_mutex);
2978 }
2979
2980 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
2981 {
2982         __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2983
2984         if (sbi->am.atgc_enabled)
2985                 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2986 }
2987
2988 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2989 {
2990         struct curseg_info *curseg = CURSEG_I(sbi, type);
2991
2992         mutex_lock(&curseg->curseg_mutex);
2993         if (!curseg->inited)
2994                 goto out;
2995         if (get_valid_blocks(sbi, curseg->segno, false))
2996                 goto out;
2997
2998         mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2999         __set_test_and_inuse(sbi, curseg->segno);
3000         mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
3001 out:
3002         mutex_unlock(&curseg->curseg_mutex);
3003 }
3004
3005 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
3006 {
3007         __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
3008
3009         if (sbi->am.atgc_enabled)
3010                 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
3011 }
3012
3013 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
3014                                 int alloc_mode, unsigned long long age)
3015 {
3016         struct curseg_info *curseg = CURSEG_I(sbi, type);
3017         unsigned segno = NULL_SEGNO;
3018         unsigned short seg_type = curseg->seg_type;
3019         int i, cnt;
3020         bool reversed = false;
3021
3022         sanity_check_seg_type(sbi, seg_type);
3023
3024         /* f2fs_need_SSR() already forces to do this */
3025         if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
3026                 curseg->next_segno = segno;
3027                 return 1;
3028         }
3029
3030         /* For node segments, let's do SSR more intensively */
3031         if (IS_NODESEG(seg_type)) {
3032                 if (seg_type >= CURSEG_WARM_NODE) {
3033                         reversed = true;
3034                         i = CURSEG_COLD_NODE;
3035                 } else {
3036                         i = CURSEG_HOT_NODE;
3037                 }
3038                 cnt = NR_CURSEG_NODE_TYPE;
3039         } else {
3040                 if (seg_type >= CURSEG_WARM_DATA) {
3041                         reversed = true;
3042                         i = CURSEG_COLD_DATA;
3043                 } else {
3044                         i = CURSEG_HOT_DATA;
3045                 }
3046                 cnt = NR_CURSEG_DATA_TYPE;
3047         }
3048
3049         for (; cnt-- > 0; reversed ? i-- : i++) {
3050                 if (i == seg_type)
3051                         continue;
3052                 if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
3053                         curseg->next_segno = segno;
3054                         return 1;
3055                 }
3056         }
3057
3058         /* find valid_blocks=0 in dirty list */
3059         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
3060                 segno = get_free_segment(sbi);
3061                 if (segno != NULL_SEGNO) {
3062                         curseg->next_segno = segno;
3063                         return 1;
3064                 }
3065         }
3066         return 0;
3067 }
3068
3069 static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
3070 {
3071         struct curseg_info *curseg = CURSEG_I(sbi, type);
3072
3073         if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
3074             curseg->seg_type == CURSEG_WARM_NODE)
3075                 return true;
3076         if (curseg->alloc_type == LFS &&
3077             is_next_segment_free(sbi, curseg, type) &&
3078             likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3079                 return true;
3080         if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
3081                 return true;
3082         return false;
3083 }
3084
3085 int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
3086                                         unsigned int start, unsigned int end)
3087 {
3088         struct curseg_info *curseg = CURSEG_I(sbi, type);
3089         unsigned int segno;
3090         int ret = 0;
3091
3092         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3093         mutex_lock(&curseg->curseg_mutex);
3094         down_write(&SIT_I(sbi)->sentry_lock);
3095
3096         segno = CURSEG_I(sbi, type)->segno;
3097         if (segno < start || segno > end)
3098                 goto unlock;
3099
3100         if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
3101                 ret = change_curseg(sbi, type);
3102         else
3103                 ret = new_curseg(sbi, type, true);
3104
3105         stat_inc_seg_type(sbi, curseg);
3106
3107         locate_dirty_segment(sbi, segno);
3108 unlock:
3109         up_write(&SIT_I(sbi)->sentry_lock);
3110
3111         if (segno != curseg->segno)
3112                 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
3113                             type, segno, curseg->segno);
3114
3115         mutex_unlock(&curseg->curseg_mutex);
3116         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3117         return ret;
3118 }
3119
3120 static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
3121                                                 bool new_sec, bool force)
3122 {
3123         struct curseg_info *curseg = CURSEG_I(sbi, type);
3124         unsigned int old_segno;
3125         int err = 0;
3126
3127         if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited)
3128                 goto allocate;
3129
3130         if (!force && curseg->inited &&
3131             !curseg->next_blkoff &&
3132             !get_valid_blocks(sbi, curseg->segno, new_sec) &&
3133             !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
3134                 return 0;
3135
3136 allocate:
3137         old_segno = curseg->segno;
3138         err = new_curseg(sbi, type, true);
3139         if (err)
3140                 return err;
3141         stat_inc_seg_type(sbi, curseg);
3142         locate_dirty_segment(sbi, old_segno);
3143         return 0;
3144 }
3145
3146 int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
3147 {
3148         int ret;
3149
3150         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3151         down_write(&SIT_I(sbi)->sentry_lock);
3152         ret = __allocate_new_segment(sbi, type, true, force);
3153         up_write(&SIT_I(sbi)->sentry_lock);
3154         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3155
3156         return ret;
3157 }
3158
3159 int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
3160 {
3161         int err;
3162         bool gc_required = true;
3163
3164 retry:
3165         f2fs_lock_op(sbi);
3166         err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
3167         f2fs_unlock_op(sbi);
3168
3169         if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) {
3170                 f2fs_down_write(&sbi->gc_lock);
3171                 err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
3172                 f2fs_up_write(&sbi->gc_lock);
3173
3174                 gc_required = false;
3175                 if (!err)
3176                         goto retry;
3177         }
3178
3179         return err;
3180 }
3181
3182 int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
3183 {
3184         int i;
3185         int err = 0;
3186
3187         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3188         down_write(&SIT_I(sbi)->sentry_lock);
3189         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
3190                 err += __allocate_new_segment(sbi, i, false, false);
3191         up_write(&SIT_I(sbi)->sentry_lock);
3192         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3193
3194         return err;
3195 }
3196
3197 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
3198                                                 struct cp_control *cpc)
3199 {
3200         __u64 trim_start = cpc->trim_start;
3201         bool has_candidate = false;
3202
3203         down_write(&SIT_I(sbi)->sentry_lock);
3204         for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
3205                 if (add_discard_addrs(sbi, cpc, true)) {
3206                         has_candidate = true;
3207                         break;
3208                 }
3209         }
3210         up_write(&SIT_I(sbi)->sentry_lock);
3211
3212         cpc->trim_start = trim_start;
3213         return has_candidate;
3214 }
3215
3216 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
3217                                         struct discard_policy *dpolicy,
3218                                         unsigned int start, unsigned int end)
3219 {
3220         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
3221         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
3222         struct rb_node **insert_p = NULL, *insert_parent = NULL;
3223         struct discard_cmd *dc;
3224         struct blk_plug plug;
3225         int issued;
3226         unsigned int trimmed = 0;
3227
3228 next:
3229         issued = 0;
3230
3231         mutex_lock(&dcc->cmd_lock);
3232         if (unlikely(dcc->rbtree_check))
3233                 f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
3234
3235         dc = __lookup_discard_cmd_ret(&dcc->root, start,
3236                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
3237         if (!dc)
3238                 dc = next_dc;
3239
3240         blk_start_plug(&plug);
3241
3242         while (dc && dc->di.lstart <= end) {
3243                 struct rb_node *node;
3244                 int err = 0;
3245
3246                 if (dc->di.len < dpolicy->granularity)
3247                         goto skip;
3248
3249                 if (dc->state != D_PREP) {
3250                         list_move_tail(&dc->list, &dcc->fstrim_list);
3251                         goto skip;
3252                 }
3253
3254                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
3255
3256                 if (issued >= dpolicy->max_requests) {
3257                         start = dc->di.lstart + dc->di.len;
3258
3259                         if (err)
3260                                 __remove_discard_cmd(sbi, dc);
3261
3262                         blk_finish_plug(&plug);
3263                         mutex_unlock(&dcc->cmd_lock);
3264                         trimmed += __wait_all_discard_cmd(sbi, NULL);
3265                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
3266                         goto next;
3267                 }
3268 skip:
3269                 node = rb_next(&dc->rb_node);
3270                 if (err)
3271                         __remove_discard_cmd(sbi, dc);
3272                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
3273
3274                 if (fatal_signal_pending(current))
3275                         break;
3276         }
3277
3278         blk_finish_plug(&plug);
3279         mutex_unlock(&dcc->cmd_lock);
3280
3281         return trimmed;
3282 }
3283
3284 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
3285 {
3286         __u64 start = F2FS_BYTES_TO_BLK(range->start);
3287         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
3288         unsigned int start_segno, end_segno;
3289         block_t start_block, end_block;
3290         struct cp_control cpc;
3291         struct discard_policy dpolicy;
3292         unsigned long long trimmed = 0;
3293         int err = 0;
3294         bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
3295
3296         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
3297                 return -EINVAL;
3298
3299         if (end < MAIN_BLKADDR(sbi))
3300                 goto out;
3301
3302         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
3303                 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
3304                 return -EFSCORRUPTED;
3305         }
3306
3307         /* start/end segment number in main_area */
3308         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
3309         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
3310                                                 GET_SEGNO(sbi, end);
3311         if (need_align) {
3312                 start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi));
3313                 end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1;
3314         }
3315
3316         cpc.reason = CP_DISCARD;
3317         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
3318         cpc.trim_start = start_segno;
3319         cpc.trim_end = end_segno;
3320
3321         if (sbi->discard_blks == 0)
3322                 goto out;
3323
3324         f2fs_down_write(&sbi->gc_lock);
3325         stat_inc_cp_call_count(sbi, TOTAL_CALL);
3326         err = f2fs_write_checkpoint(sbi, &cpc);
3327         f2fs_up_write(&sbi->gc_lock);
3328         if (err)
3329                 goto out;
3330
3331         /*
3332          * We filed discard candidates, but actually we don't need to wait for
3333          * all of them, since they'll be issued in idle time along with runtime
3334          * discard option. User configuration looks like using runtime discard
3335          * or periodic fstrim instead of it.
3336          */
3337         if (f2fs_realtime_discard_enable(sbi))
3338                 goto out;
3339
3340         start_block = START_BLOCK(sbi, start_segno);
3341         end_block = START_BLOCK(sbi, end_segno + 1);
3342
3343         __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
3344         trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
3345                                         start_block, end_block);
3346
3347         trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
3348                                         start_block, end_block);
3349 out:
3350         if (!err)
3351                 range->len = F2FS_BLK_TO_BYTES(trimmed);
3352         return err;
3353 }
3354
3355 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
3356 {
3357         switch (hint) {
3358         case WRITE_LIFE_SHORT:
3359                 return CURSEG_HOT_DATA;
3360         case WRITE_LIFE_EXTREME:
3361                 return CURSEG_COLD_DATA;
3362         default:
3363                 return CURSEG_WARM_DATA;
3364         }
3365 }
3366
3367 static int __get_segment_type_2(struct f2fs_io_info *fio)
3368 {
3369         if (fio->type == DATA)
3370                 return CURSEG_HOT_DATA;
3371         else
3372                 return CURSEG_HOT_NODE;
3373 }
3374
3375 static int __get_segment_type_4(struct f2fs_io_info *fio)
3376 {
3377         if (fio->type == DATA) {
3378                 struct inode *inode = fio->page->mapping->host;
3379
3380                 if (S_ISDIR(inode->i_mode))
3381                         return CURSEG_HOT_DATA;
3382                 else
3383                         return CURSEG_COLD_DATA;
3384         } else {
3385                 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3386                         return CURSEG_WARM_NODE;
3387                 else
3388                         return CURSEG_COLD_NODE;
3389         }
3390 }
3391
3392 static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
3393 {
3394         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3395         struct extent_info ei = {};
3396
3397         if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
3398                 if (!ei.age)
3399                         return NO_CHECK_TYPE;
3400                 if (ei.age <= sbi->hot_data_age_threshold)
3401                         return CURSEG_HOT_DATA;
3402                 if (ei.age <= sbi->warm_data_age_threshold)
3403                         return CURSEG_WARM_DATA;
3404                 return CURSEG_COLD_DATA;
3405         }
3406         return NO_CHECK_TYPE;
3407 }
3408
3409 static int __get_segment_type_6(struct f2fs_io_info *fio)
3410 {
3411         if (fio->type == DATA) {
3412                 struct inode *inode = fio->page->mapping->host;
3413                 int type;
3414
3415                 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
3416                         return CURSEG_COLD_DATA_PINNED;
3417
3418                 if (page_private_gcing(fio->page)) {
3419                         if (fio->sbi->am.atgc_enabled &&
3420                                 (fio->io_type == FS_DATA_IO) &&
3421                                 (fio->sbi->gc_mode != GC_URGENT_HIGH))
3422                                 return CURSEG_ALL_DATA_ATGC;
3423                         else
3424                                 return CURSEG_COLD_DATA;
3425                 }
3426                 if (file_is_cold(inode) || f2fs_need_compress_data(inode))
3427                         return CURSEG_COLD_DATA;
3428
3429                 type = __get_age_segment_type(inode, fio->page->index);
3430                 if (type != NO_CHECK_TYPE)
3431                         return type;
3432
3433                 if (file_is_hot(inode) ||
3434                                 is_inode_flag_set(inode, FI_HOT_DATA) ||
3435                                 f2fs_is_cow_file(inode))
3436                         return CURSEG_HOT_DATA;
3437                 return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
3438         } else {
3439                 if (IS_DNODE(fio->page))
3440                         return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3441                                                 CURSEG_HOT_NODE;
3442                 return CURSEG_COLD_NODE;
3443         }
3444 }
3445
3446 static int __get_segment_type(struct f2fs_io_info *fio)
3447 {
3448         int type = 0;
3449
3450         switch (F2FS_OPTION(fio->sbi).active_logs) {
3451         case 2:
3452                 type = __get_segment_type_2(fio);
3453                 break;
3454         case 4:
3455                 type = __get_segment_type_4(fio);
3456                 break;
3457         case 6:
3458                 type = __get_segment_type_6(fio);
3459                 break;
3460         default:
3461                 f2fs_bug_on(fio->sbi, true);
3462         }
3463
3464         if (IS_HOT(type))
3465                 fio->temp = HOT;
3466         else if (IS_WARM(type))
3467                 fio->temp = WARM;
3468         else
3469                 fio->temp = COLD;
3470         return type;
3471 }
3472
3473 static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
3474                 struct curseg_info *seg)
3475 {
3476         /* To allocate block chunks in different sizes, use random number */
3477         if (--seg->fragment_remained_chunk > 0)
3478                 return;
3479
3480         seg->fragment_remained_chunk =
3481                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
3482         seg->next_blkoff +=
3483                 get_random_u32_inclusive(1, sbi->max_fragment_hole);
3484 }
3485
3486 static void reset_curseg_fields(struct curseg_info *curseg)
3487 {
3488         curseg->inited = false;
3489         curseg->segno = NULL_SEGNO;
3490         curseg->next_segno = 0;
3491 }
3492
3493 int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3494                 block_t old_blkaddr, block_t *new_blkaddr,
3495                 struct f2fs_summary *sum, int type,
3496                 struct f2fs_io_info *fio)
3497 {
3498         struct sit_info *sit_i = SIT_I(sbi);
3499         struct curseg_info *curseg = CURSEG_I(sbi, type);
3500         unsigned long long old_mtime;
3501         bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
3502         struct seg_entry *se = NULL;
3503         bool segment_full = false;
3504         int ret = 0;
3505
3506         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3507
3508         mutex_lock(&curseg->curseg_mutex);
3509         down_write(&sit_i->sentry_lock);
3510
3511         if (curseg->segno == NULL_SEGNO) {
3512                 ret = -ENOSPC;
3513                 goto out_err;
3514         }
3515
3516         if (from_gc) {
3517                 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
3518                 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
3519                 sanity_check_seg_type(sbi, se->type);
3520                 f2fs_bug_on(sbi, IS_NODESEG(se->type));
3521         }
3522         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3523
3524         f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi));
3525
3526         f2fs_wait_discard_bio(sbi, *new_blkaddr);
3527
3528         curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3529         if (curseg->alloc_type == SSR) {
3530                 curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg);
3531         } else {
3532                 curseg->next_blkoff++;
3533                 if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
3534                         f2fs_randomize_chunk(sbi, curseg);
3535         }
3536         if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno))
3537                 segment_full = true;
3538         stat_inc_block_count(sbi, curseg);
3539
3540         if (from_gc) {
3541                 old_mtime = get_segment_mtime(sbi, old_blkaddr);
3542         } else {
3543                 update_segment_mtime(sbi, old_blkaddr, 0);
3544                 old_mtime = 0;
3545         }
3546         update_segment_mtime(sbi, *new_blkaddr, old_mtime);
3547
3548         /*
3549          * SIT information should be updated before segment allocation,
3550          * since SSR needs latest valid block information.
3551          */
3552         update_sit_entry(sbi, *new_blkaddr, 1);
3553         update_sit_entry(sbi, old_blkaddr, -1);
3554
3555         /*
3556          * If the current segment is full, flush it out and replace it with a
3557          * new segment.
3558          */
3559         if (segment_full) {
3560                 if (type == CURSEG_COLD_DATA_PINNED &&
3561                     !((curseg->segno + 1) % sbi->segs_per_sec)) {
3562                         reset_curseg_fields(curseg);
3563                         goto skip_new_segment;
3564                 }
3565
3566                 if (from_gc) {
3567                         ret = get_atssr_segment(sbi, type, se->type,
3568                                                 AT_SSR, se->mtime);
3569                 } else {
3570                         if (need_new_seg(sbi, type))
3571                                 ret = new_curseg(sbi, type, false);
3572                         else
3573                                 ret = change_curseg(sbi, type);
3574                         stat_inc_seg_type(sbi, curseg);
3575                 }
3576
3577                 if (ret)
3578                         goto out_err;
3579         }
3580
3581 skip_new_segment:
3582         /*
3583          * segment dirty status should be updated after segment allocation,
3584          * so we just need to update status only one time after previous
3585          * segment being closed.
3586          */
3587         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3588         locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3589
3590         if (IS_DATASEG(curseg->seg_type))
3591                 atomic64_inc(&sbi->allocated_data_blocks);
3592
3593         up_write(&sit_i->sentry_lock);
3594
3595         if (page && IS_NODESEG(curseg->seg_type)) {
3596                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3597
3598                 f2fs_inode_chksum_set(sbi, page);
3599         }
3600
3601         if (fio) {
3602                 struct f2fs_bio_info *io;
3603
3604                 INIT_LIST_HEAD(&fio->list);
3605                 fio->in_list = 1;
3606                 io = sbi->write_io[fio->type] + fio->temp;
3607                 spin_lock(&io->io_lock);
3608                 list_add_tail(&fio->list, &io->io_list);
3609                 spin_unlock(&io->io_lock);
3610         }
3611
3612         mutex_unlock(&curseg->curseg_mutex);
3613         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3614         return 0;
3615 out_err:
3616         *new_blkaddr = NULL_ADDR;
3617         up_write(&sit_i->sentry_lock);
3618         mutex_unlock(&curseg->curseg_mutex);
3619         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3620         return ret;
3621
3622 }
3623
3624 void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
3625                                         block_t blkaddr, unsigned int blkcnt)
3626 {
3627         if (!f2fs_is_multi_device(sbi))
3628                 return;
3629
3630         while (1) {
3631                 unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
3632                 unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
3633
3634                 /* update device state for fsync */
3635                 f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
3636
3637                 /* update device state for checkpoint */
3638                 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3639                         spin_lock(&sbi->dev_lock);
3640                         f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3641                         spin_unlock(&sbi->dev_lock);
3642                 }
3643
3644                 if (blkcnt <= blks)
3645                         break;
3646                 blkcnt -= blks;
3647                 blkaddr += blks;
3648         }
3649 }
3650
3651 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3652 {
3653         int type = __get_segment_type(fio);
3654         bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
3655
3656         if (keep_order)
3657                 f2fs_down_read(&fio->sbi->io_order_lock);
3658
3659         if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3660                         &fio->new_blkaddr, sum, type, fio)) {
3661                 if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host))
3662                         fscrypt_finalize_bounce_page(&fio->encrypted_page);
3663                 if (PageWriteback(fio->page))
3664                         end_page_writeback(fio->page);
3665                 if (f2fs_in_warm_node_list(fio->sbi, fio->page))
3666                         f2fs_del_fsync_node_entry(fio->sbi, fio->page);
3667                 goto out;
3668         }
3669         if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3670                 f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr);
3671
3672         /* writeout dirty page into bdev */
3673         f2fs_submit_page_write(fio);
3674
3675         f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
3676 out:
3677         if (keep_order)
3678                 f2fs_up_read(&fio->sbi->io_order_lock);
3679 }
3680
3681 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3682                                         enum iostat_type io_type)
3683 {
3684         struct f2fs_io_info fio = {
3685                 .sbi = sbi,
3686                 .type = META,
3687                 .temp = HOT,
3688                 .op = REQ_OP_WRITE,
3689                 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3690                 .old_blkaddr = page->index,
3691                 .new_blkaddr = page->index,
3692                 .page = page,
3693                 .encrypted_page = NULL,
3694                 .in_list = 0,
3695         };
3696
3697         if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3698                 fio.op_flags &= ~REQ_META;
3699
3700         set_page_writeback(page);
3701         f2fs_submit_page_write(&fio);
3702
3703         stat_inc_meta_count(sbi, page->index);
3704         f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
3705 }
3706
3707 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3708 {
3709         struct f2fs_summary sum;
3710
3711         set_summary(&sum, nid, 0, 0);
3712         do_write_page(&sum, fio);
3713
3714         f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
3715 }
3716
3717 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3718                                         struct f2fs_io_info *fio)
3719 {
3720         struct f2fs_sb_info *sbi = fio->sbi;
3721         struct f2fs_summary sum;
3722
3723         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3724         if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
3725                 f2fs_update_age_extent_cache(dn);
3726         set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3727         do_write_page(&sum, fio);
3728         f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3729
3730         f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
3731 }
3732
3733 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3734 {
3735         int err;
3736         struct f2fs_sb_info *sbi = fio->sbi;
3737         unsigned int segno;
3738
3739         fio->new_blkaddr = fio->old_blkaddr;
3740         /* i/o temperature is needed for passing down write hints */
3741         __get_segment_type(fio);
3742
3743         segno = GET_SEGNO(sbi, fio->new_blkaddr);
3744
3745         if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3746                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3747                 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3748                           __func__, segno);
3749                 err = -EFSCORRUPTED;
3750                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
3751                 goto drop_bio;
3752         }
3753
3754         if (f2fs_cp_error(sbi)) {
3755                 err = -EIO;
3756                 goto drop_bio;
3757         }
3758
3759         if (fio->post_read)
3760                 f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
3761
3762         stat_inc_inplace_blocks(fio->sbi);
3763
3764         if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi))
3765                 err = f2fs_merge_page_bio(fio);
3766         else
3767                 err = f2fs_submit_page_bio(fio);
3768         if (!err) {
3769                 f2fs_update_device_state(fio->sbi, fio->ino,
3770                                                 fio->new_blkaddr, 1);
3771                 f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
3772                                                 fio->io_type, F2FS_BLKSIZE);
3773         }
3774
3775         return err;
3776 drop_bio:
3777         if (fio->bio && *(fio->bio)) {
3778                 struct bio *bio = *(fio->bio);
3779
3780                 bio->bi_status = BLK_STS_IOERR;
3781                 bio_endio(bio);
3782                 *(fio->bio) = NULL;
3783         }
3784         return err;
3785 }
3786
3787 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3788                                                 unsigned int segno)
3789 {
3790         int i;
3791
3792         for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3793                 if (CURSEG_I(sbi, i)->segno == segno)
3794                         break;
3795         }
3796         return i;
3797 }
3798
3799 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3800                                 block_t old_blkaddr, block_t new_blkaddr,
3801                                 bool recover_curseg, bool recover_newaddr,
3802                                 bool from_gc)
3803 {
3804         struct sit_info *sit_i = SIT_I(sbi);
3805         struct curseg_info *curseg;
3806         unsigned int segno, old_cursegno;
3807         struct seg_entry *se;
3808         int type;
3809         unsigned short old_blkoff;
3810         unsigned char old_alloc_type;
3811
3812         segno = GET_SEGNO(sbi, new_blkaddr);
3813         se = get_seg_entry(sbi, segno);
3814         type = se->type;
3815
3816         f2fs_down_write(&SM_I(sbi)->curseg_lock);
3817
3818         if (!recover_curseg) {
3819                 /* for recovery flow */
3820                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3821                         if (old_blkaddr == NULL_ADDR)
3822                                 type = CURSEG_COLD_DATA;
3823                         else
3824                                 type = CURSEG_WARM_DATA;
3825                 }
3826         } else {
3827                 if (IS_CURSEG(sbi, segno)) {
3828                         /* se->type is volatile as SSR allocation */
3829                         type = __f2fs_get_curseg(sbi, segno);
3830                         f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3831                 } else {
3832                         type = CURSEG_WARM_DATA;
3833                 }
3834         }
3835
3836         f2fs_bug_on(sbi, !IS_DATASEG(type));
3837         curseg = CURSEG_I(sbi, type);
3838
3839         mutex_lock(&curseg->curseg_mutex);
3840         down_write(&sit_i->sentry_lock);
3841
3842         old_cursegno = curseg->segno;
3843         old_blkoff = curseg->next_blkoff;
3844         old_alloc_type = curseg->alloc_type;
3845
3846         /* change the current segment */
3847         if (segno != curseg->segno) {
3848                 curseg->next_segno = segno;
3849                 if (change_curseg(sbi, type))
3850                         goto out_unlock;
3851         }
3852
3853         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3854         curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3855
3856         if (!recover_curseg || recover_newaddr) {
3857                 if (!from_gc)
3858                         update_segment_mtime(sbi, new_blkaddr, 0);
3859                 update_sit_entry(sbi, new_blkaddr, 1);
3860         }
3861         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3862                 f2fs_invalidate_internal_cache(sbi, old_blkaddr);
3863                 if (!from_gc)
3864                         update_segment_mtime(sbi, old_blkaddr, 0);
3865                 update_sit_entry(sbi, old_blkaddr, -1);
3866         }
3867
3868         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3869         locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3870
3871         locate_dirty_segment(sbi, old_cursegno);
3872
3873         if (recover_curseg) {
3874                 if (old_cursegno != curseg->segno) {
3875                         curseg->next_segno = old_cursegno;
3876                         if (change_curseg(sbi, type))
3877                                 goto out_unlock;
3878                 }
3879                 curseg->next_blkoff = old_blkoff;
3880                 curseg->alloc_type = old_alloc_type;
3881         }
3882
3883 out_unlock:
3884         up_write(&sit_i->sentry_lock);
3885         mutex_unlock(&curseg->curseg_mutex);
3886         f2fs_up_write(&SM_I(sbi)->curseg_lock);
3887 }
3888
3889 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3890                                 block_t old_addr, block_t new_addr,
3891                                 unsigned char version, bool recover_curseg,
3892                                 bool recover_newaddr)
3893 {
3894         struct f2fs_summary sum;
3895
3896         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3897
3898         f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3899                                         recover_curseg, recover_newaddr, false);
3900
3901         f2fs_update_data_blkaddr(dn, new_addr);
3902 }
3903
3904 void f2fs_wait_on_page_writeback(struct page *page,
3905                                 enum page_type type, bool ordered, bool locked)
3906 {
3907         if (PageWriteback(page)) {
3908                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3909
3910                 /* submit cached LFS IO */
3911                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3912                 /* submit cached IPU IO */
3913                 f2fs_submit_merged_ipu_write(sbi, NULL, page);
3914                 if (ordered) {
3915                         wait_on_page_writeback(page);
3916                         f2fs_bug_on(sbi, locked && PageWriteback(page));
3917                 } else {
3918                         wait_for_stable_page(page);
3919                 }
3920         }
3921 }
3922
3923 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3924 {
3925         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3926         struct page *cpage;
3927
3928         if (!f2fs_post_read_required(inode))
3929                 return;
3930
3931         if (!__is_valid_data_blkaddr(blkaddr))
3932                 return;
3933
3934         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3935         if (cpage) {
3936                 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3937                 f2fs_put_page(cpage, 1);
3938         }
3939 }
3940
3941 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3942                                                                 block_t len)
3943 {
3944         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3945         block_t i;
3946
3947         if (!f2fs_post_read_required(inode))
3948                 return;
3949
3950         for (i = 0; i < len; i++)
3951                 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3952
3953         f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
3954 }
3955
3956 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3957 {
3958         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3959         struct curseg_info *seg_i;
3960         unsigned char *kaddr;
3961         struct page *page;
3962         block_t start;
3963         int i, j, offset;
3964
3965         start = start_sum_block(sbi);
3966
3967         page = f2fs_get_meta_page(sbi, start++);
3968         if (IS_ERR(page))
3969                 return PTR_ERR(page);
3970         kaddr = (unsigned char *)page_address(page);
3971
3972         /* Step 1: restore nat cache */
3973         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3974         memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3975
3976         /* Step 2: restore sit cache */
3977         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3978         memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3979         offset = 2 * SUM_JOURNAL_SIZE;
3980
3981         /* Step 3: restore summary entries */
3982         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3983                 unsigned short blk_off;
3984                 unsigned int segno;
3985
3986                 seg_i = CURSEG_I(sbi, i);
3987                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3988                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3989                 seg_i->next_segno = segno;
3990                 reset_curseg(sbi, i, 0);
3991                 seg_i->alloc_type = ckpt->alloc_type[i];
3992                 seg_i->next_blkoff = blk_off;
3993
3994                 if (seg_i->alloc_type == SSR)
3995                         blk_off = BLKS_PER_SEG(sbi);
3996
3997                 for (j = 0; j < blk_off; j++) {
3998                         struct f2fs_summary *s;
3999
4000                         s = (struct f2fs_summary *)(kaddr + offset);
4001                         seg_i->sum_blk->entries[j] = *s;
4002                         offset += SUMMARY_SIZE;
4003                         if (offset + SUMMARY_SIZE <= PAGE_SIZE -
4004                                                 SUM_FOOTER_SIZE)
4005                                 continue;
4006
4007                         f2fs_put_page(page, 1);
4008                         page = NULL;
4009
4010                         page = f2fs_get_meta_page(sbi, start++);
4011                         if (IS_ERR(page))
4012                                 return PTR_ERR(page);
4013                         kaddr = (unsigned char *)page_address(page);
4014                         offset = 0;
4015                 }
4016         }
4017         f2fs_put_page(page, 1);
4018         return 0;
4019 }
4020
4021 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
4022 {
4023         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4024         struct f2fs_summary_block *sum;
4025         struct curseg_info *curseg;
4026         struct page *new;
4027         unsigned short blk_off;
4028         unsigned int segno = 0;
4029         block_t blk_addr = 0;
4030         int err = 0;
4031
4032         /* get segment number and block addr */
4033         if (IS_DATASEG(type)) {
4034                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
4035                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
4036                                                         CURSEG_HOT_DATA]);
4037                 if (__exist_node_summaries(sbi))
4038                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
4039                 else
4040                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
4041         } else {
4042                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
4043                                                         CURSEG_HOT_NODE]);
4044                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
4045                                                         CURSEG_HOT_NODE]);
4046                 if (__exist_node_summaries(sbi))
4047                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
4048                                                         type - CURSEG_HOT_NODE);
4049                 else
4050                         blk_addr = GET_SUM_BLOCK(sbi, segno);
4051         }
4052
4053         new = f2fs_get_meta_page(sbi, blk_addr);
4054         if (IS_ERR(new))
4055                 return PTR_ERR(new);
4056         sum = (struct f2fs_summary_block *)page_address(new);
4057
4058         if (IS_NODESEG(type)) {
4059                 if (__exist_node_summaries(sbi)) {
4060                         struct f2fs_summary *ns = &sum->entries[0];
4061                         int i;
4062
4063                         for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) {
4064                                 ns->version = 0;
4065                                 ns->ofs_in_node = 0;
4066                         }
4067                 } else {
4068                         err = f2fs_restore_node_summary(sbi, segno, sum);
4069                         if (err)
4070                                 goto out;
4071                 }
4072         }
4073
4074         /* set uncompleted segment to curseg */
4075         curseg = CURSEG_I(sbi, type);
4076         mutex_lock(&curseg->curseg_mutex);
4077
4078         /* update journal info */
4079         down_write(&curseg->journal_rwsem);
4080         memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
4081         up_write(&curseg->journal_rwsem);
4082
4083         memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
4084         memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
4085         curseg->next_segno = segno;
4086         reset_curseg(sbi, type, 0);
4087         curseg->alloc_type = ckpt->alloc_type[type];
4088         curseg->next_blkoff = blk_off;
4089         mutex_unlock(&curseg->curseg_mutex);
4090 out:
4091         f2fs_put_page(new, 1);
4092         return err;
4093 }
4094
4095 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
4096 {
4097         struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
4098         struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
4099         int type = CURSEG_HOT_DATA;
4100         int err;
4101
4102         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
4103                 int npages = f2fs_npages_for_summary_flush(sbi, true);
4104
4105                 if (npages >= 2)
4106                         f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
4107                                                         META_CP, true);
4108
4109                 /* restore for compacted data summary */
4110                 err = read_compacted_summaries(sbi);
4111                 if (err)
4112                         return err;
4113                 type = CURSEG_HOT_NODE;
4114         }
4115
4116         if (__exist_node_summaries(sbi))
4117                 f2fs_ra_meta_pages(sbi,
4118                                 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
4119                                 NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
4120
4121         for (; type <= CURSEG_COLD_NODE; type++) {
4122                 err = read_normal_summaries(sbi, type);
4123                 if (err)
4124                         return err;
4125         }
4126
4127         /* sanity check for summary blocks */
4128         if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
4129                         sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
4130                 f2fs_err(sbi, "invalid journal entries nats %u sits %u",
4131                          nats_in_cursum(nat_j), sits_in_cursum(sit_j));
4132                 return -EINVAL;
4133         }
4134
4135         return 0;
4136 }
4137
4138 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
4139 {
4140         struct page *page;
4141         unsigned char *kaddr;
4142         struct f2fs_summary *summary;
4143         struct curseg_info *seg_i;
4144         int written_size = 0;
4145         int i, j;
4146
4147         page = f2fs_grab_meta_page(sbi, blkaddr++);
4148         kaddr = (unsigned char *)page_address(page);
4149         memset(kaddr, 0, PAGE_SIZE);
4150
4151         /* Step 1: write nat cache */
4152         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4153         memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
4154         written_size += SUM_JOURNAL_SIZE;
4155
4156         /* Step 2: write sit cache */
4157         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4158         memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
4159         written_size += SUM_JOURNAL_SIZE;
4160
4161         /* Step 3: write summary entries */
4162         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4163                 seg_i = CURSEG_I(sbi, i);
4164                 for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) {
4165                         if (!page) {
4166                                 page = f2fs_grab_meta_page(sbi, blkaddr++);
4167                                 kaddr = (unsigned char *)page_address(page);
4168                                 memset(kaddr, 0, PAGE_SIZE);
4169                                 written_size = 0;
4170                         }
4171                         summary = (struct f2fs_summary *)(kaddr + written_size);
4172                         *summary = seg_i->sum_blk->entries[j];
4173                         written_size += SUMMARY_SIZE;
4174
4175                         if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
4176                                                         SUM_FOOTER_SIZE)
4177                                 continue;
4178
4179                         set_page_dirty(page);
4180                         f2fs_put_page(page, 1);
4181                         page = NULL;
4182                 }
4183         }
4184         if (page) {
4185                 set_page_dirty(page);
4186                 f2fs_put_page(page, 1);
4187         }
4188 }
4189
4190 static void write_normal_summaries(struct f2fs_sb_info *sbi,
4191                                         block_t blkaddr, int type)
4192 {
4193         int i, end;
4194
4195         if (IS_DATASEG(type))
4196                 end = type + NR_CURSEG_DATA_TYPE;
4197         else
4198                 end = type + NR_CURSEG_NODE_TYPE;
4199
4200         for (i = type; i < end; i++)
4201                 write_current_sum_page(sbi, i, blkaddr + (i - type));
4202 }
4203
4204 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4205 {
4206         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
4207                 write_compacted_summaries(sbi, start_blk);
4208         else
4209                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
4210 }
4211
4212 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4213 {
4214         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
4215 }
4216
4217 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
4218                                         unsigned int val, int alloc)
4219 {
4220         int i;
4221
4222         if (type == NAT_JOURNAL) {
4223                 for (i = 0; i < nats_in_cursum(journal); i++) {
4224                         if (le32_to_cpu(nid_in_journal(journal, i)) == val)
4225                                 return i;
4226                 }
4227                 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
4228                         return update_nats_in_cursum(journal, 1);
4229         } else if (type == SIT_JOURNAL) {
4230                 for (i = 0; i < sits_in_cursum(journal); i++)
4231                         if (le32_to_cpu(segno_in_journal(journal, i)) == val)
4232                                 return i;
4233                 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
4234                         return update_sits_in_cursum(journal, 1);
4235         }
4236         return -1;
4237 }
4238
4239 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
4240                                         unsigned int segno)
4241 {
4242         return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
4243 }
4244
4245 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
4246                                         unsigned int start)
4247 {
4248         struct sit_info *sit_i = SIT_I(sbi);
4249         struct page *page;
4250         pgoff_t src_off, dst_off;
4251
4252         src_off = current_sit_addr(sbi, start);
4253         dst_off = next_sit_addr(sbi, src_off);
4254
4255         page = f2fs_grab_meta_page(sbi, dst_off);
4256         seg_info_to_sit_page(sbi, page, start);
4257
4258         set_page_dirty(page);
4259         set_to_next_sit(sit_i, start);
4260
4261         return page;
4262 }
4263
4264 static struct sit_entry_set *grab_sit_entry_set(void)
4265 {
4266         struct sit_entry_set *ses =
4267                         f2fs_kmem_cache_alloc(sit_entry_set_slab,
4268                                                 GFP_NOFS, true, NULL);
4269
4270         ses->entry_cnt = 0;
4271         INIT_LIST_HEAD(&ses->set_list);
4272         return ses;
4273 }
4274
4275 static void release_sit_entry_set(struct sit_entry_set *ses)
4276 {
4277         list_del(&ses->set_list);
4278         kmem_cache_free(sit_entry_set_slab, ses);
4279 }
4280
4281 static void adjust_sit_entry_set(struct sit_entry_set *ses,
4282                                                 struct list_head *head)
4283 {
4284         struct sit_entry_set *next = ses;
4285
4286         if (list_is_last(&ses->set_list, head))
4287                 return;
4288
4289         list_for_each_entry_continue(next, head, set_list)
4290                 if (ses->entry_cnt <= next->entry_cnt) {
4291                         list_move_tail(&ses->set_list, &next->set_list);
4292                         return;
4293                 }
4294
4295         list_move_tail(&ses->set_list, head);
4296 }
4297
4298 static void add_sit_entry(unsigned int segno, struct list_head *head)
4299 {
4300         struct sit_entry_set *ses;
4301         unsigned int start_segno = START_SEGNO(segno);
4302
4303         list_for_each_entry(ses, head, set_list) {
4304                 if (ses->start_segno == start_segno) {
4305                         ses->entry_cnt++;
4306                         adjust_sit_entry_set(ses, head);
4307                         return;
4308                 }
4309         }
4310
4311         ses = grab_sit_entry_set();
4312
4313         ses->start_segno = start_segno;
4314         ses->entry_cnt++;
4315         list_add(&ses->set_list, head);
4316 }
4317
4318 static void add_sits_in_set(struct f2fs_sb_info *sbi)
4319 {
4320         struct f2fs_sm_info *sm_info = SM_I(sbi);
4321         struct list_head *set_list = &sm_info->sit_entry_set;
4322         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
4323         unsigned int segno;
4324
4325         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
4326                 add_sit_entry(segno, set_list);
4327 }
4328
4329 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
4330 {
4331         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4332         struct f2fs_journal *journal = curseg->journal;
4333         int i;
4334
4335         down_write(&curseg->journal_rwsem);
4336         for (i = 0; i < sits_in_cursum(journal); i++) {
4337                 unsigned int segno;
4338                 bool dirtied;
4339
4340                 segno = le32_to_cpu(segno_in_journal(journal, i));
4341                 dirtied = __mark_sit_entry_dirty(sbi, segno);
4342
4343                 if (!dirtied)
4344                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
4345         }
4346         update_sits_in_cursum(journal, -i);
4347         up_write(&curseg->journal_rwsem);
4348 }
4349
4350 /*
4351  * CP calls this function, which flushes SIT entries including sit_journal,
4352  * and moves prefree segs to free segs.
4353  */
4354 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
4355 {
4356         struct sit_info *sit_i = SIT_I(sbi);
4357         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
4358         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4359         struct f2fs_journal *journal = curseg->journal;
4360         struct sit_entry_set *ses, *tmp;
4361         struct list_head *head = &SM_I(sbi)->sit_entry_set;
4362         bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
4363         struct seg_entry *se;
4364
4365         down_write(&sit_i->sentry_lock);
4366
4367         if (!sit_i->dirty_sentries)
4368                 goto out;
4369
4370         /*
4371          * add and account sit entries of dirty bitmap in sit entry
4372          * set temporarily
4373          */
4374         add_sits_in_set(sbi);
4375
4376         /*
4377          * if there are no enough space in journal to store dirty sit
4378          * entries, remove all entries from journal and add and account
4379          * them in sit entry set.
4380          */
4381         if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
4382                                                                 !to_journal)
4383                 remove_sits_in_journal(sbi);
4384
4385         /*
4386          * there are two steps to flush sit entries:
4387          * #1, flush sit entries to journal in current cold data summary block.
4388          * #2, flush sit entries to sit page.
4389          */
4390         list_for_each_entry_safe(ses, tmp, head, set_list) {
4391                 struct page *page = NULL;
4392                 struct f2fs_sit_block *raw_sit = NULL;
4393                 unsigned int start_segno = ses->start_segno;
4394                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
4395                                                 (unsigned long)MAIN_SEGS(sbi));
4396                 unsigned int segno = start_segno;
4397
4398                 if (to_journal &&
4399                         !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
4400                         to_journal = false;
4401
4402                 if (to_journal) {
4403                         down_write(&curseg->journal_rwsem);
4404                 } else {
4405                         page = get_next_sit_page(sbi, start_segno);
4406                         raw_sit = page_address(page);
4407                 }
4408
4409                 /* flush dirty sit entries in region of current sit set */
4410                 for_each_set_bit_from(segno, bitmap, end) {
4411                         int offset, sit_offset;
4412
4413                         se = get_seg_entry(sbi, segno);
4414 #ifdef CONFIG_F2FS_CHECK_FS
4415                         if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
4416                                                 SIT_VBLOCK_MAP_SIZE))
4417                                 f2fs_bug_on(sbi, 1);
4418 #endif
4419
4420                         /* add discard candidates */
4421                         if (!(cpc->reason & CP_DISCARD)) {
4422                                 cpc->trim_start = segno;
4423                                 add_discard_addrs(sbi, cpc, false);
4424                         }
4425
4426                         if (to_journal) {
4427                                 offset = f2fs_lookup_journal_in_cursum(journal,
4428                                                         SIT_JOURNAL, segno, 1);
4429                                 f2fs_bug_on(sbi, offset < 0);
4430                                 segno_in_journal(journal, offset) =
4431                                                         cpu_to_le32(segno);
4432                                 seg_info_to_raw_sit(se,
4433                                         &sit_in_journal(journal, offset));
4434                                 check_block_count(sbi, segno,
4435                                         &sit_in_journal(journal, offset));
4436                         } else {
4437                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
4438                                 seg_info_to_raw_sit(se,
4439                                                 &raw_sit->entries[sit_offset]);
4440                                 check_block_count(sbi, segno,
4441                                                 &raw_sit->entries[sit_offset]);
4442                         }
4443
4444                         __clear_bit(segno, bitmap);
4445                         sit_i->dirty_sentries--;
4446                         ses->entry_cnt--;
4447                 }
4448
4449                 if (to_journal)
4450                         up_write(&curseg->journal_rwsem);
4451                 else
4452                         f2fs_put_page(page, 1);
4453
4454                 f2fs_bug_on(sbi, ses->entry_cnt);
4455                 release_sit_entry_set(ses);
4456         }
4457
4458         f2fs_bug_on(sbi, !list_empty(head));
4459         f2fs_bug_on(sbi, sit_i->dirty_sentries);
4460 out:
4461         if (cpc->reason & CP_DISCARD) {
4462                 __u64 trim_start = cpc->trim_start;
4463
4464                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
4465                         add_discard_addrs(sbi, cpc, false);
4466
4467                 cpc->trim_start = trim_start;
4468         }
4469         up_write(&sit_i->sentry_lock);
4470
4471         set_prefree_as_free_segments(sbi);
4472 }
4473
4474 static int build_sit_info(struct f2fs_sb_info *sbi)
4475 {
4476         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4477         struct sit_info *sit_i;
4478         unsigned int sit_segs, start;
4479         char *src_bitmap, *bitmap;
4480         unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4481         unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
4482
4483         /* allocate memory for SIT information */
4484         sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4485         if (!sit_i)
4486                 return -ENOMEM;
4487
4488         SM_I(sbi)->sit_info = sit_i;
4489
4490         sit_i->sentries =
4491                 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4492                                               MAIN_SEGS(sbi)),
4493                               GFP_KERNEL);
4494         if (!sit_i->sentries)
4495                 return -ENOMEM;
4496
4497         main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4498         sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4499                                                                 GFP_KERNEL);
4500         if (!sit_i->dirty_sentries_bitmap)
4501                 return -ENOMEM;
4502
4503 #ifdef CONFIG_F2FS_CHECK_FS
4504         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
4505 #else
4506         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
4507 #endif
4508         sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4509         if (!sit_i->bitmap)
4510                 return -ENOMEM;
4511
4512         bitmap = sit_i->bitmap;
4513
4514         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4515                 sit_i->sentries[start].cur_valid_map = bitmap;
4516                 bitmap += SIT_VBLOCK_MAP_SIZE;
4517
4518                 sit_i->sentries[start].ckpt_valid_map = bitmap;
4519                 bitmap += SIT_VBLOCK_MAP_SIZE;
4520
4521 #ifdef CONFIG_F2FS_CHECK_FS
4522                 sit_i->sentries[start].cur_valid_map_mir = bitmap;
4523                 bitmap += SIT_VBLOCK_MAP_SIZE;
4524 #endif
4525
4526                 if (discard_map) {
4527                         sit_i->sentries[start].discard_map = bitmap;
4528                         bitmap += SIT_VBLOCK_MAP_SIZE;
4529                 }
4530         }
4531
4532         sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4533         if (!sit_i->tmp_map)
4534                 return -ENOMEM;
4535
4536         if (__is_large_section(sbi)) {
4537                 sit_i->sec_entries =
4538                         f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4539                                                       MAIN_SECS(sbi)),
4540                                       GFP_KERNEL);
4541                 if (!sit_i->sec_entries)
4542                         return -ENOMEM;
4543         }
4544
4545         /* get information related with SIT */
4546         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4547
4548         /* setup SIT bitmap from ckeckpoint pack */
4549         sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4550         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4551
4552         sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4553         if (!sit_i->sit_bitmap)
4554                 return -ENOMEM;
4555
4556 #ifdef CONFIG_F2FS_CHECK_FS
4557         sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4558                                         sit_bitmap_size, GFP_KERNEL);
4559         if (!sit_i->sit_bitmap_mir)
4560                 return -ENOMEM;
4561
4562         sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4563                                         main_bitmap_size, GFP_KERNEL);
4564         if (!sit_i->invalid_segmap)
4565                 return -ENOMEM;
4566 #endif
4567
4568         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4569         sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs);
4570         sit_i->written_valid_blocks = 0;
4571         sit_i->bitmap_size = sit_bitmap_size;
4572         sit_i->dirty_sentries = 0;
4573         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4574         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4575         sit_i->mounted_time = ktime_get_boottime_seconds();
4576         init_rwsem(&sit_i->sentry_lock);
4577         return 0;
4578 }
4579
4580 static int build_free_segmap(struct f2fs_sb_info *sbi)
4581 {
4582         struct free_segmap_info *free_i;
4583         unsigned int bitmap_size, sec_bitmap_size;
4584
4585         /* allocate memory for free segmap information */
4586         free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4587         if (!free_i)
4588                 return -ENOMEM;
4589
4590         SM_I(sbi)->free_info = free_i;
4591
4592         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4593         free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4594         if (!free_i->free_segmap)
4595                 return -ENOMEM;
4596
4597         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4598         free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4599         if (!free_i->free_secmap)
4600                 return -ENOMEM;
4601
4602         /* set all segments as dirty temporarily */
4603         memset(free_i->free_segmap, 0xff, bitmap_size);
4604         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4605
4606         /* init free segmap information */
4607         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4608         free_i->free_segments = 0;
4609         free_i->free_sections = 0;
4610         spin_lock_init(&free_i->segmap_lock);
4611         return 0;
4612 }
4613
4614 static int build_curseg(struct f2fs_sb_info *sbi)
4615 {
4616         struct curseg_info *array;
4617         int i;
4618
4619         array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
4620                                         sizeof(*array)), GFP_KERNEL);
4621         if (!array)
4622                 return -ENOMEM;
4623
4624         SM_I(sbi)->curseg_array = array;
4625
4626         for (i = 0; i < NO_CHECK_TYPE; i++) {
4627                 mutex_init(&array[i].curseg_mutex);
4628                 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4629                 if (!array[i].sum_blk)
4630                         return -ENOMEM;
4631                 init_rwsem(&array[i].journal_rwsem);
4632                 array[i].journal = f2fs_kzalloc(sbi,
4633                                 sizeof(struct f2fs_journal), GFP_KERNEL);
4634                 if (!array[i].journal)
4635                         return -ENOMEM;
4636                 if (i < NR_PERSISTENT_LOG)
4637                         array[i].seg_type = CURSEG_HOT_DATA + i;
4638                 else if (i == CURSEG_COLD_DATA_PINNED)
4639                         array[i].seg_type = CURSEG_COLD_DATA;
4640                 else if (i == CURSEG_ALL_DATA_ATGC)
4641                         array[i].seg_type = CURSEG_COLD_DATA;
4642                 reset_curseg_fields(&array[i]);
4643         }
4644         return restore_curseg_summaries(sbi);
4645 }
4646
4647 static int build_sit_entries(struct f2fs_sb_info *sbi)
4648 {
4649         struct sit_info *sit_i = SIT_I(sbi);
4650         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4651         struct f2fs_journal *journal = curseg->journal;
4652         struct seg_entry *se;
4653         struct f2fs_sit_entry sit;
4654         int sit_blk_cnt = SIT_BLK_CNT(sbi);
4655         unsigned int i, start, end;
4656         unsigned int readed, start_blk = 0;
4657         int err = 0;
4658         block_t sit_valid_blocks[2] = {0, 0};
4659
4660         do {
4661                 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
4662                                                         META_SIT, true);
4663
4664                 start = start_blk * sit_i->sents_per_block;
4665                 end = (start_blk + readed) * sit_i->sents_per_block;
4666
4667                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4668                         struct f2fs_sit_block *sit_blk;
4669                         struct page *page;
4670
4671                         se = &sit_i->sentries[start];
4672                         page = get_current_sit_page(sbi, start);
4673                         if (IS_ERR(page))
4674                                 return PTR_ERR(page);
4675                         sit_blk = (struct f2fs_sit_block *)page_address(page);
4676                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4677                         f2fs_put_page(page, 1);
4678
4679                         err = check_block_count(sbi, start, &sit);
4680                         if (err)
4681                                 return err;
4682                         seg_info_from_raw_sit(se, &sit);
4683
4684                         if (se->type >= NR_PERSISTENT_LOG) {
4685                                 f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4686                                                         se->type, start);
4687                                 f2fs_handle_error(sbi,
4688                                                 ERROR_INCONSISTENT_SUM_TYPE);
4689                                 return -EFSCORRUPTED;
4690                         }
4691
4692                         sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4693
4694                         if (!f2fs_block_unit_discard(sbi))
4695                                 goto init_discard_map_done;
4696
4697                         /* build discard map only one time */
4698                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4699                                 memset(se->discard_map, 0xff,
4700                                                 SIT_VBLOCK_MAP_SIZE);
4701                                 goto init_discard_map_done;
4702                         }
4703                         memcpy(se->discard_map, se->cur_valid_map,
4704                                                 SIT_VBLOCK_MAP_SIZE);
4705                         sbi->discard_blks += BLKS_PER_SEG(sbi) -
4706                                                 se->valid_blocks;
4707 init_discard_map_done:
4708                         if (__is_large_section(sbi))
4709                                 get_sec_entry(sbi, start)->valid_blocks +=
4710                                                         se->valid_blocks;
4711                 }
4712                 start_blk += readed;
4713         } while (start_blk < sit_blk_cnt);
4714
4715         down_read(&curseg->journal_rwsem);
4716         for (i = 0; i < sits_in_cursum(journal); i++) {
4717                 unsigned int old_valid_blocks;
4718
4719                 start = le32_to_cpu(segno_in_journal(journal, i));
4720                 if (start >= MAIN_SEGS(sbi)) {
4721                         f2fs_err(sbi, "Wrong journal entry on segno %u",
4722                                  start);
4723                         err = -EFSCORRUPTED;
4724                         f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
4725                         break;
4726                 }
4727
4728                 se = &sit_i->sentries[start];
4729                 sit = sit_in_journal(journal, i);
4730
4731                 old_valid_blocks = se->valid_blocks;
4732
4733                 sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
4734
4735                 err = check_block_count(sbi, start, &sit);
4736                 if (err)
4737                         break;
4738                 seg_info_from_raw_sit(se, &sit);
4739
4740                 if (se->type >= NR_PERSISTENT_LOG) {
4741                         f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4742                                                         se->type, start);
4743                         err = -EFSCORRUPTED;
4744                         f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
4745                         break;
4746                 }
4747
4748                 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4749
4750                 if (f2fs_block_unit_discard(sbi)) {
4751                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4752                                 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4753                         } else {
4754                                 memcpy(se->discard_map, se->cur_valid_map,
4755                                                         SIT_VBLOCK_MAP_SIZE);
4756                                 sbi->discard_blks += old_valid_blocks;
4757                                 sbi->discard_blks -= se->valid_blocks;
4758                         }
4759                 }
4760
4761                 if (__is_large_section(sbi)) {
4762                         get_sec_entry(sbi, start)->valid_blocks +=
4763                                                         se->valid_blocks;
4764                         get_sec_entry(sbi, start)->valid_blocks -=
4765                                                         old_valid_blocks;
4766                 }
4767         }
4768         up_read(&curseg->journal_rwsem);
4769
4770         if (err)
4771                 return err;
4772
4773         if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
4774                 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4775                          sit_valid_blocks[NODE], valid_node_count(sbi));
4776                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
4777                 return -EFSCORRUPTED;
4778         }
4779
4780         if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
4781                                 valid_user_blocks(sbi)) {
4782                 f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
4783                          sit_valid_blocks[DATA], sit_valid_blocks[NODE],
4784                          valid_user_blocks(sbi));
4785                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
4786                 return -EFSCORRUPTED;
4787         }
4788
4789         return 0;
4790 }
4791
4792 static void init_free_segmap(struct f2fs_sb_info *sbi)
4793 {
4794         unsigned int start;
4795         int type;
4796         struct seg_entry *sentry;
4797
4798         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4799                 if (f2fs_usable_blks_in_seg(sbi, start) == 0)
4800                         continue;
4801                 sentry = get_seg_entry(sbi, start);
4802                 if (!sentry->valid_blocks)
4803                         __set_free(sbi, start);
4804                 else
4805                         SIT_I(sbi)->written_valid_blocks +=
4806                                                 sentry->valid_blocks;
4807         }
4808
4809         /* set use the current segments */
4810         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4811                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4812
4813                 __set_test_and_inuse(sbi, curseg_t->segno);
4814         }
4815 }
4816
4817 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4818 {
4819         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4820         struct free_segmap_info *free_i = FREE_I(sbi);
4821         unsigned int segno = 0, offset = 0, secno;
4822         block_t valid_blocks, usable_blks_in_seg;
4823
4824         while (1) {
4825                 /* find dirty segment based on free segmap */
4826                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4827                 if (segno >= MAIN_SEGS(sbi))
4828                         break;
4829                 offset = segno + 1;
4830                 valid_blocks = get_valid_blocks(sbi, segno, false);
4831                 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
4832                 if (valid_blocks == usable_blks_in_seg || !valid_blocks)
4833                         continue;
4834                 if (valid_blocks > usable_blks_in_seg) {
4835                         f2fs_bug_on(sbi, 1);
4836                         continue;
4837                 }
4838                 mutex_lock(&dirty_i->seglist_lock);
4839                 __locate_dirty_segment(sbi, segno, DIRTY);
4840                 mutex_unlock(&dirty_i->seglist_lock);
4841         }
4842
4843         if (!__is_large_section(sbi))
4844                 return;
4845
4846         mutex_lock(&dirty_i->seglist_lock);
4847         for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
4848                 valid_blocks = get_valid_blocks(sbi, segno, true);
4849                 secno = GET_SEC_FROM_SEG(sbi, segno);
4850
4851                 if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
4852                         continue;
4853                 if (IS_CURSEC(sbi, secno))
4854                         continue;
4855                 set_bit(secno, dirty_i->dirty_secmap);
4856         }
4857         mutex_unlock(&dirty_i->seglist_lock);
4858 }
4859
4860 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4861 {
4862         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4863         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4864
4865         dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4866         if (!dirty_i->victim_secmap)
4867                 return -ENOMEM;
4868
4869         dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4870         if (!dirty_i->pinned_secmap)
4871                 return -ENOMEM;
4872
4873         dirty_i->pinned_secmap_cnt = 0;
4874         dirty_i->enable_pin_section = true;
4875         return 0;
4876 }
4877
4878 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4879 {
4880         struct dirty_seglist_info *dirty_i;
4881         unsigned int bitmap_size, i;
4882
4883         /* allocate memory for dirty segments list information */
4884         dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4885                                                                 GFP_KERNEL);
4886         if (!dirty_i)
4887                 return -ENOMEM;
4888
4889         SM_I(sbi)->dirty_info = dirty_i;
4890         mutex_init(&dirty_i->seglist_lock);
4891
4892         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4893
4894         for (i = 0; i < NR_DIRTY_TYPE; i++) {
4895                 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4896                                                                 GFP_KERNEL);
4897                 if (!dirty_i->dirty_segmap[i])
4898                         return -ENOMEM;
4899         }
4900
4901         if (__is_large_section(sbi)) {
4902                 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4903                 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
4904                                                 bitmap_size, GFP_KERNEL);
4905                 if (!dirty_i->dirty_secmap)
4906                         return -ENOMEM;
4907         }
4908
4909         init_dirty_segmap(sbi);
4910         return init_victim_secmap(sbi);
4911 }
4912
4913 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4914 {
4915         int i;
4916
4917         /*
4918          * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
4919          * In LFS curseg, all blkaddr after .next_blkoff should be unused.
4920          */
4921         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
4922                 struct curseg_info *curseg = CURSEG_I(sbi, i);
4923                 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4924                 unsigned int blkofs = curseg->next_blkoff;
4925
4926                 if (f2fs_sb_has_readonly(sbi) &&
4927                         i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
4928                         continue;
4929
4930                 sanity_check_seg_type(sbi, curseg->seg_type);
4931
4932                 if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
4933                         f2fs_err(sbi,
4934                                  "Current segment has invalid alloc_type:%d",
4935                                  curseg->alloc_type);
4936                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
4937                         return -EFSCORRUPTED;
4938                 }
4939
4940                 if (f2fs_test_bit(blkofs, se->cur_valid_map))
4941                         goto out;
4942
4943                 if (curseg->alloc_type == SSR)
4944                         continue;
4945
4946                 for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) {
4947                         if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4948                                 continue;
4949 out:
4950                         f2fs_err(sbi,
4951                                  "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4952                                  i, curseg->segno, curseg->alloc_type,
4953                                  curseg->next_blkoff, blkofs);
4954                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
4955                         return -EFSCORRUPTED;
4956                 }
4957         }
4958         return 0;
4959 }
4960
4961 #ifdef CONFIG_BLK_DEV_ZONED
4962 static const char *f2fs_zone_status[BLK_ZONE_COND_OFFLINE + 1] = {
4963         [BLK_ZONE_COND_NOT_WP]          = "NOT_WP",
4964         [BLK_ZONE_COND_EMPTY]           = "EMPTY",
4965         [BLK_ZONE_COND_IMP_OPEN]        = "IMPLICIT_OPEN",
4966         [BLK_ZONE_COND_EXP_OPEN]        = "EXPLICIT_OPEN",
4967         [BLK_ZONE_COND_CLOSED]          = "CLOSED",
4968         [BLK_ZONE_COND_READONLY]        = "READONLY",
4969         [BLK_ZONE_COND_FULL]            = "FULL",
4970         [BLK_ZONE_COND_OFFLINE]         = "OFFLINE",
4971 };
4972
4973 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
4974                                     struct f2fs_dev_info *fdev,
4975                                     struct blk_zone *zone)
4976 {
4977         unsigned int zone_segno;
4978         block_t zone_block, valid_block_cnt;
4979         unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4980         int ret;
4981         unsigned int nofs_flags;
4982
4983         if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4984                 return 0;
4985
4986         zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
4987         zone_segno = GET_SEGNO(sbi, zone_block);
4988
4989         /*
4990          * Skip check of zones cursegs point to, since
4991          * fix_curseg_write_pointer() checks them.
4992          */
4993         if (zone_segno >= MAIN_SEGS(sbi))
4994                 return 0;
4995
4996         /*
4997          * Get # of valid block of the zone.
4998          */
4999         valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
5000         if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
5001                 f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]",
5002                                 zone_segno, valid_block_cnt,
5003                                 f2fs_zone_status[zone->cond]);
5004                 return 0;
5005         }
5006
5007         if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) ||
5008             (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL))
5009                 return 0;
5010
5011         if (!valid_block_cnt) {
5012                 f2fs_notice(sbi, "Zone without valid block has non-zero write "
5013                             "pointer. Reset the write pointer: cond[%s]",
5014                             f2fs_zone_status[zone->cond]);
5015                 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
5016                                         zone->len >> log_sectors_per_block);
5017                 if (ret)
5018                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5019                                  fdev->path, ret);
5020                 return ret;
5021         }
5022
5023         /*
5024          * If there are valid blocks and the write pointer doesn't match
5025          * with them, we need to report the inconsistency and fill
5026          * the zone till the end to close the zone. This inconsistency
5027          * does not cause write error because the zone will not be
5028          * selected for write operation until it get discarded.
5029          */
5030         f2fs_notice(sbi, "Valid blocks are not aligned with write "
5031                     "pointer: valid block[0x%x,0x%x] cond[%s]",
5032                     zone_segno, valid_block_cnt, f2fs_zone_status[zone->cond]);
5033
5034         nofs_flags = memalloc_nofs_save();
5035         ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
5036                                 zone->start, zone->len);
5037         memalloc_nofs_restore(nofs_flags);
5038         if (ret == -EOPNOTSUPP) {
5039                 ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
5040                                         zone->len - (zone->wp - zone->start),
5041                                         GFP_NOFS, 0);
5042                 if (ret)
5043                         f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
5044                                         fdev->path, ret);
5045         } else if (ret) {
5046                 f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)",
5047                                 fdev->path, ret);
5048         }
5049
5050         return ret;
5051 }
5052
5053 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
5054                                                   block_t zone_blkaddr)
5055 {
5056         int i;
5057
5058         for (i = 0; i < sbi->s_ndevs; i++) {
5059                 if (!bdev_is_zoned(FDEV(i).bdev))
5060                         continue;
5061                 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
5062                                 zone_blkaddr <= FDEV(i).end_blk))
5063                         return &FDEV(i);
5064         }
5065
5066         return NULL;
5067 }
5068
5069 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
5070                               void *data)
5071 {
5072         memcpy(data, zone, sizeof(struct blk_zone));
5073         return 0;
5074 }
5075
5076 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
5077 {
5078         struct curseg_info *cs = CURSEG_I(sbi, type);
5079         struct f2fs_dev_info *zbd;
5080         struct blk_zone zone;
5081         unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
5082         block_t cs_zone_block, wp_block;
5083         unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
5084         sector_t zone_sector;
5085         int err;
5086
5087         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5088         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5089
5090         zbd = get_target_zoned_dev(sbi, cs_zone_block);
5091         if (!zbd)
5092                 return 0;
5093
5094         /* report zone for the sector the curseg points to */
5095         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5096                 << log_sectors_per_block;
5097         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5098                                   report_one_zone_cb, &zone);
5099         if (err != 1) {
5100                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5101                          zbd->path, err);
5102                 return err;
5103         }
5104
5105         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5106                 return 0;
5107
5108         /*
5109          * When safely unmounted in the previous mount, we could use current
5110          * segments. Otherwise, allocate new sections.
5111          */
5112         if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
5113                 wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
5114                 wp_segno = GET_SEGNO(sbi, wp_block);
5115                 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
5116                 wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
5117
5118                 if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
5119                                 wp_sector_off == 0)
5120                         return 0;
5121
5122                 f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
5123                             "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno,
5124                             cs->next_blkoff, wp_segno, wp_blkoff);
5125         }
5126
5127         /* Allocate a new section if it's not new. */
5128         if (cs->next_blkoff) {
5129                 unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff;
5130
5131                 f2fs_allocate_new_section(sbi, type, true);
5132                 f2fs_notice(sbi, "Assign new section to curseg[%d]: "
5133                                 "[0x%x,0x%x] -> [0x%x,0x%x]",
5134                                 type, old_segno, old_blkoff,
5135                                 cs->segno, cs->next_blkoff);
5136         }
5137
5138         /* check consistency of the zone curseg pointed to */
5139         if (check_zone_write_pointer(sbi, zbd, &zone))
5140                 return -EIO;
5141
5142         /* check newly assigned zone */
5143         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5144         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5145
5146         zbd = get_target_zoned_dev(sbi, cs_zone_block);
5147         if (!zbd)
5148                 return 0;
5149
5150         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5151                 << log_sectors_per_block;
5152         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5153                                   report_one_zone_cb, &zone);
5154         if (err != 1) {
5155                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5156                          zbd->path, err);
5157                 return err;
5158         }
5159
5160         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5161                 return 0;
5162
5163         if (zone.wp != zone.start) {
5164                 f2fs_notice(sbi,
5165                             "New zone for curseg[%d] is not yet discarded. "
5166                             "Reset the zone: curseg[0x%x,0x%x]",
5167                             type, cs->segno, cs->next_blkoff);
5168                 err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
5169                                         zone.len >> log_sectors_per_block);
5170                 if (err) {
5171                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5172                                  zbd->path, err);
5173                         return err;
5174                 }
5175         }
5176
5177         return 0;
5178 }
5179
5180 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5181 {
5182         int i, ret;
5183
5184         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5185                 ret = fix_curseg_write_pointer(sbi, i);
5186                 if (ret)
5187                         return ret;
5188         }
5189
5190         return 0;
5191 }
5192
5193 struct check_zone_write_pointer_args {
5194         struct f2fs_sb_info *sbi;
5195         struct f2fs_dev_info *fdev;
5196 };
5197
5198 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
5199                                       void *data)
5200 {
5201         struct check_zone_write_pointer_args *args;
5202
5203         args = (struct check_zone_write_pointer_args *)data;
5204
5205         return check_zone_write_pointer(args->sbi, args->fdev, zone);
5206 }
5207
5208 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5209 {
5210         int i, ret;
5211         struct check_zone_write_pointer_args args;
5212
5213         for (i = 0; i < sbi->s_ndevs; i++) {
5214                 if (!bdev_is_zoned(FDEV(i).bdev))
5215                         continue;
5216
5217                 args.sbi = sbi;
5218                 args.fdev = &FDEV(i);
5219                 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
5220                                           check_zone_write_pointer_cb, &args);
5221                 if (ret < 0)
5222                         return ret;
5223         }
5224
5225         return 0;
5226 }
5227
5228 /*
5229  * Return the number of usable blocks in a segment. The number of blocks
5230  * returned is always equal to the number of blocks in a segment for
5231  * segments fully contained within a sequential zone capacity or a
5232  * conventional zone. For segments partially contained in a sequential
5233  * zone capacity, the number of usable blocks up to the zone capacity
5234  * is returned. 0 is returned in all other cases.
5235  */
5236 static inline unsigned int f2fs_usable_zone_blks_in_seg(
5237                         struct f2fs_sb_info *sbi, unsigned int segno)
5238 {
5239         block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
5240         unsigned int secno;
5241
5242         if (!sbi->unusable_blocks_per_sec)
5243                 return BLKS_PER_SEG(sbi);
5244
5245         secno = GET_SEC_FROM_SEG(sbi, segno);
5246         seg_start = START_BLOCK(sbi, segno);
5247         sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5248         sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
5249
5250         /*
5251          * If segment starts before zone capacity and spans beyond
5252          * zone capacity, then usable blocks are from seg start to
5253          * zone capacity. If the segment starts after the zone capacity,
5254          * then there are no usable blocks.
5255          */
5256         if (seg_start >= sec_cap_blkaddr)
5257                 return 0;
5258         if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr)
5259                 return sec_cap_blkaddr - seg_start;
5260
5261         return BLKS_PER_SEG(sbi);
5262 }
5263 #else
5264 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5265 {
5266         return 0;
5267 }
5268
5269 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5270 {
5271         return 0;
5272 }
5273
5274 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
5275                                                         unsigned int segno)
5276 {
5277         return 0;
5278 }
5279
5280 #endif
5281 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
5282                                         unsigned int segno)
5283 {
5284         if (f2fs_sb_has_blkzoned(sbi))
5285                 return f2fs_usable_zone_blks_in_seg(sbi, segno);
5286
5287         return BLKS_PER_SEG(sbi);
5288 }
5289
5290 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
5291                                         unsigned int segno)
5292 {
5293         if (f2fs_sb_has_blkzoned(sbi))
5294                 return CAP_SEGS_PER_SEC(sbi);
5295
5296         return SEGS_PER_SEC(sbi);
5297 }
5298
5299 /*
5300  * Update min, max modified time for cost-benefit GC algorithm
5301  */
5302 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
5303 {
5304         struct sit_info *sit_i = SIT_I(sbi);
5305         unsigned int segno;
5306
5307         down_write(&sit_i->sentry_lock);
5308
5309         sit_i->min_mtime = ULLONG_MAX;
5310
5311         for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
5312                 unsigned int i;
5313                 unsigned long long mtime = 0;
5314
5315                 for (i = 0; i < SEGS_PER_SEC(sbi); i++)
5316                         mtime += get_seg_entry(sbi, segno + i)->mtime;
5317
5318                 mtime = div_u64(mtime, SEGS_PER_SEC(sbi));
5319
5320                 if (sit_i->min_mtime > mtime)
5321                         sit_i->min_mtime = mtime;
5322         }
5323         sit_i->max_mtime = get_mtime(sbi, false);
5324         sit_i->dirty_max_mtime = 0;
5325         up_write(&sit_i->sentry_lock);
5326 }
5327
5328 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
5329 {
5330         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
5331         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
5332         struct f2fs_sm_info *sm_info;
5333         int err;
5334
5335         sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
5336         if (!sm_info)
5337                 return -ENOMEM;
5338
5339         /* init sm info */
5340         sbi->sm_info = sm_info;
5341         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
5342         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
5343         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
5344         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
5345         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
5346         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
5347         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
5348         sm_info->rec_prefree_segments = sm_info->main_segments *
5349                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
5350         if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
5351                 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
5352
5353         if (!f2fs_lfs_mode(sbi))
5354                 sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
5355         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
5356         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
5357         sm_info->min_seq_blocks = BLKS_PER_SEG(sbi);
5358         sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
5359         sm_info->min_ssr_sections = reserved_sections(sbi);
5360
5361         INIT_LIST_HEAD(&sm_info->sit_entry_set);
5362
5363         init_f2fs_rwsem(&sm_info->curseg_lock);
5364
5365         err = f2fs_create_flush_cmd_control(sbi);
5366         if (err)
5367                 return err;
5368
5369         err = create_discard_cmd_control(sbi);
5370         if (err)
5371                 return err;
5372
5373         err = build_sit_info(sbi);
5374         if (err)
5375                 return err;
5376         err = build_free_segmap(sbi);
5377         if (err)
5378                 return err;
5379         err = build_curseg(sbi);
5380         if (err)
5381                 return err;
5382
5383         /* reinit free segmap based on SIT */
5384         err = build_sit_entries(sbi);
5385         if (err)
5386                 return err;
5387
5388         init_free_segmap(sbi);
5389         err = build_dirty_segmap(sbi);
5390         if (err)
5391                 return err;
5392
5393         err = sanity_check_curseg(sbi);
5394         if (err)
5395                 return err;
5396
5397         init_min_max_mtime(sbi);
5398         return 0;
5399 }
5400
5401 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
5402                 enum dirty_type dirty_type)
5403 {
5404         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5405
5406         mutex_lock(&dirty_i->seglist_lock);
5407         kvfree(dirty_i->dirty_segmap[dirty_type]);
5408         dirty_i->nr_dirty[dirty_type] = 0;
5409         mutex_unlock(&dirty_i->seglist_lock);
5410 }
5411
5412 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
5413 {
5414         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5415
5416         kvfree(dirty_i->pinned_secmap);
5417         kvfree(dirty_i->victim_secmap);
5418 }
5419
5420 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
5421 {
5422         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5423         int i;
5424
5425         if (!dirty_i)
5426                 return;
5427
5428         /* discard pre-free/dirty segments list */
5429         for (i = 0; i < NR_DIRTY_TYPE; i++)
5430                 discard_dirty_segmap(sbi, i);
5431
5432         if (__is_large_section(sbi)) {
5433                 mutex_lock(&dirty_i->seglist_lock);
5434                 kvfree(dirty_i->dirty_secmap);
5435                 mutex_unlock(&dirty_i->seglist_lock);
5436         }
5437
5438         destroy_victim_secmap(sbi);
5439         SM_I(sbi)->dirty_info = NULL;
5440         kfree(dirty_i);
5441 }
5442
5443 static void destroy_curseg(struct f2fs_sb_info *sbi)
5444 {
5445         struct curseg_info *array = SM_I(sbi)->curseg_array;
5446         int i;
5447
5448         if (!array)
5449                 return;
5450         SM_I(sbi)->curseg_array = NULL;
5451         for (i = 0; i < NR_CURSEG_TYPE; i++) {
5452                 kfree(array[i].sum_blk);
5453                 kfree(array[i].journal);
5454         }
5455         kfree(array);
5456 }
5457
5458 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
5459 {
5460         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
5461
5462         if (!free_i)
5463                 return;
5464         SM_I(sbi)->free_info = NULL;
5465         kvfree(free_i->free_segmap);
5466         kvfree(free_i->free_secmap);
5467         kfree(free_i);
5468 }
5469
5470 static void destroy_sit_info(struct f2fs_sb_info *sbi)
5471 {
5472         struct sit_info *sit_i = SIT_I(sbi);
5473
5474         if (!sit_i)
5475                 return;
5476
5477         if (sit_i->sentries)
5478                 kvfree(sit_i->bitmap);
5479         kfree(sit_i->tmp_map);
5480
5481         kvfree(sit_i->sentries);
5482         kvfree(sit_i->sec_entries);
5483         kvfree(sit_i->dirty_sentries_bitmap);
5484
5485         SM_I(sbi)->sit_info = NULL;
5486         kvfree(sit_i->sit_bitmap);
5487 #ifdef CONFIG_F2FS_CHECK_FS
5488         kvfree(sit_i->sit_bitmap_mir);
5489         kvfree(sit_i->invalid_segmap);
5490 #endif
5491         kfree(sit_i);
5492 }
5493
5494 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
5495 {
5496         struct f2fs_sm_info *sm_info = SM_I(sbi);
5497
5498         if (!sm_info)
5499                 return;
5500         f2fs_destroy_flush_cmd_control(sbi, true);
5501         destroy_discard_cmd_control(sbi);
5502         destroy_dirty_segmap(sbi);
5503         destroy_curseg(sbi);
5504         destroy_free_segmap(sbi);
5505         destroy_sit_info(sbi);
5506         sbi->sm_info = NULL;
5507         kfree(sm_info);
5508 }
5509
5510 int __init f2fs_create_segment_manager_caches(void)
5511 {
5512         discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
5513                         sizeof(struct discard_entry));
5514         if (!discard_entry_slab)
5515                 goto fail;
5516
5517         discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
5518                         sizeof(struct discard_cmd));
5519         if (!discard_cmd_slab)
5520                 goto destroy_discard_entry;
5521
5522         sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
5523                         sizeof(struct sit_entry_set));
5524         if (!sit_entry_set_slab)
5525                 goto destroy_discard_cmd;
5526
5527         revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
5528                         sizeof(struct revoke_entry));
5529         if (!revoke_entry_slab)
5530                 goto destroy_sit_entry_set;
5531         return 0;
5532
5533 destroy_sit_entry_set:
5534         kmem_cache_destroy(sit_entry_set_slab);
5535 destroy_discard_cmd:
5536         kmem_cache_destroy(discard_cmd_slab);
5537 destroy_discard_entry:
5538         kmem_cache_destroy(discard_entry_slab);
5539 fail:
5540         return -ENOMEM;
5541 }
5542
5543 void f2fs_destroy_segment_manager_caches(void)
5544 {
5545         kmem_cache_destroy(sit_entry_set_slab);
5546         kmem_cache_destroy(discard_cmd_slab);
5547         kmem_cache_destroy(discard_entry_slab);
5548         kmem_cache_destroy(revoke_entry_slab);
5549 }