GNU Linux-libre 4.19.211-gnu1
[releases.git] / fs / btrfs / delayed-inode.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2011 Fujitsu.  All rights reserved.
4  * Written by Miao Xie <miaox@cn.fujitsu.com>
5  */
6
7 #include <linux/slab.h>
8 #include <linux/iversion.h>
9 #include <linux/sched/mm.h>
10 #include "delayed-inode.h"
11 #include "disk-io.h"
12 #include "transaction.h"
13 #include "ctree.h"
14 #include "qgroup.h"
15
16 #define BTRFS_DELAYED_WRITEBACK         512
17 #define BTRFS_DELAYED_BACKGROUND        128
18 #define BTRFS_DELAYED_BATCH             16
19
20 static struct kmem_cache *delayed_node_cache;
21
22 int __init btrfs_delayed_inode_init(void)
23 {
24         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
25                                         sizeof(struct btrfs_delayed_node),
26                                         0,
27                                         SLAB_MEM_SPREAD,
28                                         NULL);
29         if (!delayed_node_cache)
30                 return -ENOMEM;
31         return 0;
32 }
33
34 void __cold btrfs_delayed_inode_exit(void)
35 {
36         kmem_cache_destroy(delayed_node_cache);
37 }
38
39 static inline void btrfs_init_delayed_node(
40                                 struct btrfs_delayed_node *delayed_node,
41                                 struct btrfs_root *root, u64 inode_id)
42 {
43         delayed_node->root = root;
44         delayed_node->inode_id = inode_id;
45         refcount_set(&delayed_node->refs, 0);
46         delayed_node->ins_root = RB_ROOT;
47         delayed_node->del_root = RB_ROOT;
48         mutex_init(&delayed_node->mutex);
49         INIT_LIST_HEAD(&delayed_node->n_list);
50         INIT_LIST_HEAD(&delayed_node->p_list);
51 }
52
53 static inline int btrfs_is_continuous_delayed_item(
54                                         struct btrfs_delayed_item *item1,
55                                         struct btrfs_delayed_item *item2)
56 {
57         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
58             item1->key.objectid == item2->key.objectid &&
59             item1->key.type == item2->key.type &&
60             item1->key.offset + 1 == item2->key.offset)
61                 return 1;
62         return 0;
63 }
64
65 static struct btrfs_delayed_node *btrfs_get_delayed_node(
66                 struct btrfs_inode *btrfs_inode)
67 {
68         struct btrfs_root *root = btrfs_inode->root;
69         u64 ino = btrfs_ino(btrfs_inode);
70         struct btrfs_delayed_node *node;
71
72         node = READ_ONCE(btrfs_inode->delayed_node);
73         if (node) {
74                 refcount_inc(&node->refs);
75                 return node;
76         }
77
78         spin_lock(&root->inode_lock);
79         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
80
81         if (node) {
82                 if (btrfs_inode->delayed_node) {
83                         refcount_inc(&node->refs);      /* can be accessed */
84                         BUG_ON(btrfs_inode->delayed_node != node);
85                         spin_unlock(&root->inode_lock);
86                         return node;
87                 }
88
89                 /*
90                  * It's possible that we're racing into the middle of removing
91                  * this node from the radix tree.  In this case, the refcount
92                  * was zero and it should never go back to one.  Just return
93                  * NULL like it was never in the radix at all; our release
94                  * function is in the process of removing it.
95                  *
96                  * Some implementations of refcount_inc refuse to bump the
97                  * refcount once it has hit zero.  If we don't do this dance
98                  * here, refcount_inc() may decide to just WARN_ONCE() instead
99                  * of actually bumping the refcount.
100                  *
101                  * If this node is properly in the radix, we want to bump the
102                  * refcount twice, once for the inode and once for this get
103                  * operation.
104                  */
105                 if (refcount_inc_not_zero(&node->refs)) {
106                         refcount_inc(&node->refs);
107                         btrfs_inode->delayed_node = node;
108                 } else {
109                         node = NULL;
110                 }
111
112                 spin_unlock(&root->inode_lock);
113                 return node;
114         }
115         spin_unlock(&root->inode_lock);
116
117         return NULL;
118 }
119
120 /* Will return either the node or PTR_ERR(-ENOMEM) */
121 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
122                 struct btrfs_inode *btrfs_inode)
123 {
124         struct btrfs_delayed_node *node;
125         struct btrfs_root *root = btrfs_inode->root;
126         u64 ino = btrfs_ino(btrfs_inode);
127         int ret;
128
129 again:
130         node = btrfs_get_delayed_node(btrfs_inode);
131         if (node)
132                 return node;
133
134         node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
135         if (!node)
136                 return ERR_PTR(-ENOMEM);
137         btrfs_init_delayed_node(node, root, ino);
138
139         /* cached in the btrfs inode and can be accessed */
140         refcount_set(&node->refs, 2);
141
142         ret = radix_tree_preload(GFP_NOFS);
143         if (ret) {
144                 kmem_cache_free(delayed_node_cache, node);
145                 return ERR_PTR(ret);
146         }
147
148         spin_lock(&root->inode_lock);
149         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
150         if (ret == -EEXIST) {
151                 spin_unlock(&root->inode_lock);
152                 kmem_cache_free(delayed_node_cache, node);
153                 radix_tree_preload_end();
154                 goto again;
155         }
156         btrfs_inode->delayed_node = node;
157         spin_unlock(&root->inode_lock);
158         radix_tree_preload_end();
159
160         return node;
161 }
162
163 /*
164  * Call it when holding delayed_node->mutex
165  *
166  * If mod = 1, add this node into the prepared list.
167  */
168 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
169                                      struct btrfs_delayed_node *node,
170                                      int mod)
171 {
172         spin_lock(&root->lock);
173         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
174                 if (!list_empty(&node->p_list))
175                         list_move_tail(&node->p_list, &root->prepare_list);
176                 else if (mod)
177                         list_add_tail(&node->p_list, &root->prepare_list);
178         } else {
179                 list_add_tail(&node->n_list, &root->node_list);
180                 list_add_tail(&node->p_list, &root->prepare_list);
181                 refcount_inc(&node->refs);      /* inserted into list */
182                 root->nodes++;
183                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
184         }
185         spin_unlock(&root->lock);
186 }
187
188 /* Call it when holding delayed_node->mutex */
189 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
190                                        struct btrfs_delayed_node *node)
191 {
192         spin_lock(&root->lock);
193         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
194                 root->nodes--;
195                 refcount_dec(&node->refs);      /* not in the list */
196                 list_del_init(&node->n_list);
197                 if (!list_empty(&node->p_list))
198                         list_del_init(&node->p_list);
199                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
200         }
201         spin_unlock(&root->lock);
202 }
203
204 static struct btrfs_delayed_node *btrfs_first_delayed_node(
205                         struct btrfs_delayed_root *delayed_root)
206 {
207         struct list_head *p;
208         struct btrfs_delayed_node *node = NULL;
209
210         spin_lock(&delayed_root->lock);
211         if (list_empty(&delayed_root->node_list))
212                 goto out;
213
214         p = delayed_root->node_list.next;
215         node = list_entry(p, struct btrfs_delayed_node, n_list);
216         refcount_inc(&node->refs);
217 out:
218         spin_unlock(&delayed_root->lock);
219
220         return node;
221 }
222
223 static struct btrfs_delayed_node *btrfs_next_delayed_node(
224                                                 struct btrfs_delayed_node *node)
225 {
226         struct btrfs_delayed_root *delayed_root;
227         struct list_head *p;
228         struct btrfs_delayed_node *next = NULL;
229
230         delayed_root = node->root->fs_info->delayed_root;
231         spin_lock(&delayed_root->lock);
232         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
233                 /* not in the list */
234                 if (list_empty(&delayed_root->node_list))
235                         goto out;
236                 p = delayed_root->node_list.next;
237         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
238                 goto out;
239         else
240                 p = node->n_list.next;
241
242         next = list_entry(p, struct btrfs_delayed_node, n_list);
243         refcount_inc(&next->refs);
244 out:
245         spin_unlock(&delayed_root->lock);
246
247         return next;
248 }
249
250 static void __btrfs_release_delayed_node(
251                                 struct btrfs_delayed_node *delayed_node,
252                                 int mod)
253 {
254         struct btrfs_delayed_root *delayed_root;
255
256         if (!delayed_node)
257                 return;
258
259         delayed_root = delayed_node->root->fs_info->delayed_root;
260
261         mutex_lock(&delayed_node->mutex);
262         if (delayed_node->count)
263                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
264         else
265                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
266         mutex_unlock(&delayed_node->mutex);
267
268         if (refcount_dec_and_test(&delayed_node->refs)) {
269                 struct btrfs_root *root = delayed_node->root;
270
271                 spin_lock(&root->inode_lock);
272                 /*
273                  * Once our refcount goes to zero, nobody is allowed to bump it
274                  * back up.  We can delete it now.
275                  */
276                 ASSERT(refcount_read(&delayed_node->refs) == 0);
277                 radix_tree_delete(&root->delayed_nodes_tree,
278                                   delayed_node->inode_id);
279                 spin_unlock(&root->inode_lock);
280                 kmem_cache_free(delayed_node_cache, delayed_node);
281         }
282 }
283
284 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
285 {
286         __btrfs_release_delayed_node(node, 0);
287 }
288
289 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
290                                         struct btrfs_delayed_root *delayed_root)
291 {
292         struct list_head *p;
293         struct btrfs_delayed_node *node = NULL;
294
295         spin_lock(&delayed_root->lock);
296         if (list_empty(&delayed_root->prepare_list))
297                 goto out;
298
299         p = delayed_root->prepare_list.next;
300         list_del_init(p);
301         node = list_entry(p, struct btrfs_delayed_node, p_list);
302         refcount_inc(&node->refs);
303 out:
304         spin_unlock(&delayed_root->lock);
305
306         return node;
307 }
308
309 static inline void btrfs_release_prepared_delayed_node(
310                                         struct btrfs_delayed_node *node)
311 {
312         __btrfs_release_delayed_node(node, 1);
313 }
314
315 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
316 {
317         struct btrfs_delayed_item *item;
318         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
319         if (item) {
320                 item->data_len = data_len;
321                 item->ins_or_del = 0;
322                 item->bytes_reserved = 0;
323                 item->delayed_node = NULL;
324                 refcount_set(&item->refs, 1);
325         }
326         return item;
327 }
328
329 /*
330  * __btrfs_lookup_delayed_item - look up the delayed item by key
331  * @delayed_node: pointer to the delayed node
332  * @key:          the key to look up
333  * @prev:         used to store the prev item if the right item isn't found
334  * @next:         used to store the next item if the right item isn't found
335  *
336  * Note: if we don't find the right item, we will return the prev item and
337  * the next item.
338  */
339 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
340                                 struct rb_root *root,
341                                 struct btrfs_key *key,
342                                 struct btrfs_delayed_item **prev,
343                                 struct btrfs_delayed_item **next)
344 {
345         struct rb_node *node, *prev_node = NULL;
346         struct btrfs_delayed_item *delayed_item = NULL;
347         int ret = 0;
348
349         node = root->rb_node;
350
351         while (node) {
352                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
353                                         rb_node);
354                 prev_node = node;
355                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
356                 if (ret < 0)
357                         node = node->rb_right;
358                 else if (ret > 0)
359                         node = node->rb_left;
360                 else
361                         return delayed_item;
362         }
363
364         if (prev) {
365                 if (!prev_node)
366                         *prev = NULL;
367                 else if (ret < 0)
368                         *prev = delayed_item;
369                 else if ((node = rb_prev(prev_node)) != NULL) {
370                         *prev = rb_entry(node, struct btrfs_delayed_item,
371                                          rb_node);
372                 } else
373                         *prev = NULL;
374         }
375
376         if (next) {
377                 if (!prev_node)
378                         *next = NULL;
379                 else if (ret > 0)
380                         *next = delayed_item;
381                 else if ((node = rb_next(prev_node)) != NULL) {
382                         *next = rb_entry(node, struct btrfs_delayed_item,
383                                          rb_node);
384                 } else
385                         *next = NULL;
386         }
387         return NULL;
388 }
389
390 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
391                                         struct btrfs_delayed_node *delayed_node,
392                                         struct btrfs_key *key)
393 {
394         return __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
395                                            NULL, NULL);
396 }
397
398 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
399                                     struct btrfs_delayed_item *ins,
400                                     int action)
401 {
402         struct rb_node **p, *node;
403         struct rb_node *parent_node = NULL;
404         struct rb_root *root;
405         struct btrfs_delayed_item *item;
406         int cmp;
407
408         if (action == BTRFS_DELAYED_INSERTION_ITEM)
409                 root = &delayed_node->ins_root;
410         else if (action == BTRFS_DELAYED_DELETION_ITEM)
411                 root = &delayed_node->del_root;
412         else
413                 BUG();
414         p = &root->rb_node;
415         node = &ins->rb_node;
416
417         while (*p) {
418                 parent_node = *p;
419                 item = rb_entry(parent_node, struct btrfs_delayed_item,
420                                  rb_node);
421
422                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
423                 if (cmp < 0)
424                         p = &(*p)->rb_right;
425                 else if (cmp > 0)
426                         p = &(*p)->rb_left;
427                 else
428                         return -EEXIST;
429         }
430
431         rb_link_node(node, parent_node, p);
432         rb_insert_color(node, root);
433         ins->delayed_node = delayed_node;
434         ins->ins_or_del = action;
435
436         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
437             action == BTRFS_DELAYED_INSERTION_ITEM &&
438             ins->key.offset >= delayed_node->index_cnt)
439                         delayed_node->index_cnt = ins->key.offset + 1;
440
441         delayed_node->count++;
442         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
443         return 0;
444 }
445
446 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
447                                               struct btrfs_delayed_item *item)
448 {
449         return __btrfs_add_delayed_item(node, item,
450                                         BTRFS_DELAYED_INSERTION_ITEM);
451 }
452
453 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
454                                              struct btrfs_delayed_item *item)
455 {
456         return __btrfs_add_delayed_item(node, item,
457                                         BTRFS_DELAYED_DELETION_ITEM);
458 }
459
460 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
461 {
462         int seq = atomic_inc_return(&delayed_root->items_seq);
463
464         /* atomic_dec_return implies a barrier */
465         if ((atomic_dec_return(&delayed_root->items) <
466             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
467                 cond_wake_up_nomb(&delayed_root->wait);
468 }
469
470 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
471 {
472         struct rb_root *root;
473         struct btrfs_delayed_root *delayed_root;
474
475         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
476
477         BUG_ON(!delayed_root);
478         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
479                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
480
481         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
482                 root = &delayed_item->delayed_node->ins_root;
483         else
484                 root = &delayed_item->delayed_node->del_root;
485
486         rb_erase(&delayed_item->rb_node, root);
487         delayed_item->delayed_node->count--;
488
489         finish_one_item(delayed_root);
490 }
491
492 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
493 {
494         if (item) {
495                 __btrfs_remove_delayed_item(item);
496                 if (refcount_dec_and_test(&item->refs))
497                         kfree(item);
498         }
499 }
500
501 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
502                                         struct btrfs_delayed_node *delayed_node)
503 {
504         struct rb_node *p;
505         struct btrfs_delayed_item *item = NULL;
506
507         p = rb_first(&delayed_node->ins_root);
508         if (p)
509                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
510
511         return item;
512 }
513
514 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
515                                         struct btrfs_delayed_node *delayed_node)
516 {
517         struct rb_node *p;
518         struct btrfs_delayed_item *item = NULL;
519
520         p = rb_first(&delayed_node->del_root);
521         if (p)
522                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
523
524         return item;
525 }
526
527 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
528                                                 struct btrfs_delayed_item *item)
529 {
530         struct rb_node *p;
531         struct btrfs_delayed_item *next = NULL;
532
533         p = rb_next(&item->rb_node);
534         if (p)
535                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
536
537         return next;
538 }
539
540 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
541                                                struct btrfs_root *root,
542                                                struct btrfs_delayed_item *item)
543 {
544         struct btrfs_block_rsv *src_rsv;
545         struct btrfs_block_rsv *dst_rsv;
546         struct btrfs_fs_info *fs_info = root->fs_info;
547         u64 num_bytes;
548         int ret;
549
550         if (!trans->bytes_reserved)
551                 return 0;
552
553         src_rsv = trans->block_rsv;
554         dst_rsv = &fs_info->delayed_block_rsv;
555
556         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
557
558         /*
559          * Here we migrate space rsv from transaction rsv, since have already
560          * reserved space when starting a transaction.  So no need to reserve
561          * qgroup space here.
562          */
563         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
564         if (!ret) {
565                 trace_btrfs_space_reservation(fs_info, "delayed_item",
566                                               item->key.objectid,
567                                               num_bytes, 1);
568                 item->bytes_reserved = num_bytes;
569         }
570
571         return ret;
572 }
573
574 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
575                                                 struct btrfs_delayed_item *item)
576 {
577         struct btrfs_block_rsv *rsv;
578         struct btrfs_fs_info *fs_info = root->fs_info;
579
580         if (!item->bytes_reserved)
581                 return;
582
583         rsv = &fs_info->delayed_block_rsv;
584         /*
585          * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
586          * to release/reserve qgroup space.
587          */
588         trace_btrfs_space_reservation(fs_info, "delayed_item",
589                                       item->key.objectid, item->bytes_reserved,
590                                       0);
591         btrfs_block_rsv_release(fs_info, rsv,
592                                 item->bytes_reserved);
593 }
594
595 static int btrfs_delayed_inode_reserve_metadata(
596                                         struct btrfs_trans_handle *trans,
597                                         struct btrfs_root *root,
598                                         struct btrfs_inode *inode,
599                                         struct btrfs_delayed_node *node)
600 {
601         struct btrfs_fs_info *fs_info = root->fs_info;
602         struct btrfs_block_rsv *src_rsv;
603         struct btrfs_block_rsv *dst_rsv;
604         u64 num_bytes;
605         int ret;
606
607         src_rsv = trans->block_rsv;
608         dst_rsv = &fs_info->delayed_block_rsv;
609
610         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
611
612         /*
613          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
614          * which doesn't reserve space for speed.  This is a problem since we
615          * still need to reserve space for this update, so try to reserve the
616          * space.
617          *
618          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
619          * we always reserve enough to update the inode item.
620          */
621         if (!src_rsv || (!trans->bytes_reserved &&
622                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
623                 ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
624                 if (ret < 0)
625                         return ret;
626                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
627                                           BTRFS_RESERVE_NO_FLUSH);
628                 /*
629                  * Since we're under a transaction reserve_metadata_bytes could
630                  * try to commit the transaction which will make it return
631                  * EAGAIN to make us stop the transaction we have, so return
632                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
633                  */
634                 if (ret == -EAGAIN) {
635                         ret = -ENOSPC;
636                         btrfs_qgroup_free_meta_prealloc(root, num_bytes);
637                 }
638                 if (!ret) {
639                         node->bytes_reserved = num_bytes;
640                         trace_btrfs_space_reservation(fs_info,
641                                                       "delayed_inode",
642                                                       btrfs_ino(inode),
643                                                       num_bytes, 1);
644                 } else {
645                         btrfs_qgroup_free_meta_prealloc(root, num_bytes);
646                 }
647                 return ret;
648         }
649
650         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
651         if (!ret) {
652                 trace_btrfs_space_reservation(fs_info, "delayed_inode",
653                                               btrfs_ino(inode), num_bytes, 1);
654                 node->bytes_reserved = num_bytes;
655         }
656
657         return ret;
658 }
659
660 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
661                                                 struct btrfs_delayed_node *node,
662                                                 bool qgroup_free)
663 {
664         struct btrfs_block_rsv *rsv;
665
666         if (!node->bytes_reserved)
667                 return;
668
669         rsv = &fs_info->delayed_block_rsv;
670         trace_btrfs_space_reservation(fs_info, "delayed_inode",
671                                       node->inode_id, node->bytes_reserved, 0);
672         btrfs_block_rsv_release(fs_info, rsv,
673                                 node->bytes_reserved);
674         if (qgroup_free)
675                 btrfs_qgroup_free_meta_prealloc(node->root,
676                                 node->bytes_reserved);
677         else
678                 btrfs_qgroup_convert_reserved_meta(node->root,
679                                 node->bytes_reserved);
680         node->bytes_reserved = 0;
681 }
682
683 /*
684  * This helper will insert some continuous items into the same leaf according
685  * to the free space of the leaf.
686  */
687 static int btrfs_batch_insert_items(struct btrfs_root *root,
688                                     struct btrfs_path *path,
689                                     struct btrfs_delayed_item *item)
690 {
691         struct btrfs_fs_info *fs_info = root->fs_info;
692         struct btrfs_delayed_item *curr, *next;
693         int free_space;
694         int total_data_size = 0, total_size = 0;
695         struct extent_buffer *leaf;
696         char *data_ptr;
697         struct btrfs_key *keys;
698         u32 *data_size;
699         struct list_head head;
700         int slot;
701         int nitems;
702         int i;
703         int ret = 0;
704
705         BUG_ON(!path->nodes[0]);
706
707         leaf = path->nodes[0];
708         free_space = btrfs_leaf_free_space(fs_info, leaf);
709         INIT_LIST_HEAD(&head);
710
711         next = item;
712         nitems = 0;
713
714         /*
715          * count the number of the continuous items that we can insert in batch
716          */
717         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
718                free_space) {
719                 total_data_size += next->data_len;
720                 total_size += next->data_len + sizeof(struct btrfs_item);
721                 list_add_tail(&next->tree_list, &head);
722                 nitems++;
723
724                 curr = next;
725                 next = __btrfs_next_delayed_item(curr);
726                 if (!next)
727                         break;
728
729                 if (!btrfs_is_continuous_delayed_item(curr, next))
730                         break;
731         }
732
733         if (!nitems) {
734                 ret = 0;
735                 goto out;
736         }
737
738         /*
739          * we need allocate some memory space, but it might cause the task
740          * to sleep, so we set all locked nodes in the path to blocking locks
741          * first.
742          */
743         btrfs_set_path_blocking(path);
744
745         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
746         if (!keys) {
747                 ret = -ENOMEM;
748                 goto out;
749         }
750
751         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
752         if (!data_size) {
753                 ret = -ENOMEM;
754                 goto error;
755         }
756
757         /* get keys of all the delayed items */
758         i = 0;
759         list_for_each_entry(next, &head, tree_list) {
760                 keys[i] = next->key;
761                 data_size[i] = next->data_len;
762                 i++;
763         }
764
765         /* reset all the locked nodes in the patch to spinning locks. */
766         btrfs_clear_path_blocking(path, NULL, 0);
767
768         /* insert the keys of the items */
769         setup_items_for_insert(root, path, keys, data_size,
770                                total_data_size, total_size, nitems);
771
772         /* insert the dir index items */
773         slot = path->slots[0];
774         list_for_each_entry_safe(curr, next, &head, tree_list) {
775                 data_ptr = btrfs_item_ptr(leaf, slot, char);
776                 write_extent_buffer(leaf, &curr->data,
777                                     (unsigned long)data_ptr,
778                                     curr->data_len);
779                 slot++;
780
781                 btrfs_delayed_item_release_metadata(root, curr);
782
783                 list_del(&curr->tree_list);
784                 btrfs_release_delayed_item(curr);
785         }
786
787 error:
788         kfree(data_size);
789         kfree(keys);
790 out:
791         return ret;
792 }
793
794 /*
795  * This helper can just do simple insertion that needn't extend item for new
796  * data, such as directory name index insertion, inode insertion.
797  */
798 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
799                                      struct btrfs_root *root,
800                                      struct btrfs_path *path,
801                                      struct btrfs_delayed_item *delayed_item)
802 {
803         struct extent_buffer *leaf;
804         unsigned int nofs_flag;
805         char *ptr;
806         int ret;
807
808         nofs_flag = memalloc_nofs_save();
809         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
810                                       delayed_item->data_len);
811         memalloc_nofs_restore(nofs_flag);
812         if (ret < 0 && ret != -EEXIST)
813                 return ret;
814
815         leaf = path->nodes[0];
816
817         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
818
819         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
820                             delayed_item->data_len);
821         btrfs_mark_buffer_dirty(leaf);
822
823         btrfs_delayed_item_release_metadata(root, delayed_item);
824         return 0;
825 }
826
827 /*
828  * we insert an item first, then if there are some continuous items, we try
829  * to insert those items into the same leaf.
830  */
831 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
832                                       struct btrfs_path *path,
833                                       struct btrfs_root *root,
834                                       struct btrfs_delayed_node *node)
835 {
836         struct btrfs_delayed_item *curr, *prev;
837         int ret = 0;
838
839 do_again:
840         mutex_lock(&node->mutex);
841         curr = __btrfs_first_delayed_insertion_item(node);
842         if (!curr)
843                 goto insert_end;
844
845         ret = btrfs_insert_delayed_item(trans, root, path, curr);
846         if (ret < 0) {
847                 btrfs_release_path(path);
848                 goto insert_end;
849         }
850
851         prev = curr;
852         curr = __btrfs_next_delayed_item(prev);
853         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
854                 /* insert the continuous items into the same leaf */
855                 path->slots[0]++;
856                 btrfs_batch_insert_items(root, path, curr);
857         }
858         btrfs_release_delayed_item(prev);
859         btrfs_mark_buffer_dirty(path->nodes[0]);
860
861         btrfs_release_path(path);
862         mutex_unlock(&node->mutex);
863         goto do_again;
864
865 insert_end:
866         mutex_unlock(&node->mutex);
867         return ret;
868 }
869
870 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
871                                     struct btrfs_root *root,
872                                     struct btrfs_path *path,
873                                     struct btrfs_delayed_item *item)
874 {
875         struct btrfs_delayed_item *curr, *next;
876         struct extent_buffer *leaf;
877         struct btrfs_key key;
878         struct list_head head;
879         int nitems, i, last_item;
880         int ret = 0;
881
882         BUG_ON(!path->nodes[0]);
883
884         leaf = path->nodes[0];
885
886         i = path->slots[0];
887         last_item = btrfs_header_nritems(leaf) - 1;
888         if (i > last_item)
889                 return -ENOENT; /* FIXME: Is errno suitable? */
890
891         next = item;
892         INIT_LIST_HEAD(&head);
893         btrfs_item_key_to_cpu(leaf, &key, i);
894         nitems = 0;
895         /*
896          * count the number of the dir index items that we can delete in batch
897          */
898         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
899                 list_add_tail(&next->tree_list, &head);
900                 nitems++;
901
902                 curr = next;
903                 next = __btrfs_next_delayed_item(curr);
904                 if (!next)
905                         break;
906
907                 if (!btrfs_is_continuous_delayed_item(curr, next))
908                         break;
909
910                 i++;
911                 if (i > last_item)
912                         break;
913                 btrfs_item_key_to_cpu(leaf, &key, i);
914         }
915
916         if (!nitems)
917                 return 0;
918
919         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
920         if (ret)
921                 goto out;
922
923         list_for_each_entry_safe(curr, next, &head, tree_list) {
924                 btrfs_delayed_item_release_metadata(root, curr);
925                 list_del(&curr->tree_list);
926                 btrfs_release_delayed_item(curr);
927         }
928
929 out:
930         return ret;
931 }
932
933 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
934                                       struct btrfs_path *path,
935                                       struct btrfs_root *root,
936                                       struct btrfs_delayed_node *node)
937 {
938         struct btrfs_delayed_item *curr, *prev;
939         unsigned int nofs_flag;
940         int ret = 0;
941
942 do_again:
943         mutex_lock(&node->mutex);
944         curr = __btrfs_first_delayed_deletion_item(node);
945         if (!curr)
946                 goto delete_fail;
947
948         nofs_flag = memalloc_nofs_save();
949         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
950         memalloc_nofs_restore(nofs_flag);
951         if (ret < 0)
952                 goto delete_fail;
953         else if (ret > 0) {
954                 /*
955                  * can't find the item which the node points to, so this node
956                  * is invalid, just drop it.
957                  */
958                 prev = curr;
959                 curr = __btrfs_next_delayed_item(prev);
960                 btrfs_release_delayed_item(prev);
961                 ret = 0;
962                 btrfs_release_path(path);
963                 if (curr) {
964                         mutex_unlock(&node->mutex);
965                         goto do_again;
966                 } else
967                         goto delete_fail;
968         }
969
970         btrfs_batch_delete_items(trans, root, path, curr);
971         btrfs_release_path(path);
972         mutex_unlock(&node->mutex);
973         goto do_again;
974
975 delete_fail:
976         btrfs_release_path(path);
977         mutex_unlock(&node->mutex);
978         return ret;
979 }
980
981 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
982 {
983         struct btrfs_delayed_root *delayed_root;
984
985         if (delayed_node &&
986             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
987                 BUG_ON(!delayed_node->root);
988                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
989                 delayed_node->count--;
990
991                 delayed_root = delayed_node->root->fs_info->delayed_root;
992                 finish_one_item(delayed_root);
993         }
994 }
995
996 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
997 {
998         struct btrfs_delayed_root *delayed_root;
999
1000         ASSERT(delayed_node->root);
1001         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1002         delayed_node->count--;
1003
1004         delayed_root = delayed_node->root->fs_info->delayed_root;
1005         finish_one_item(delayed_root);
1006 }
1007
1008 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1009                                         struct btrfs_root *root,
1010                                         struct btrfs_path *path,
1011                                         struct btrfs_delayed_node *node)
1012 {
1013         struct btrfs_fs_info *fs_info = root->fs_info;
1014         struct btrfs_key key;
1015         struct btrfs_inode_item *inode_item;
1016         struct extent_buffer *leaf;
1017         unsigned int nofs_flag;
1018         int mod;
1019         int ret;
1020
1021         key.objectid = node->inode_id;
1022         key.type = BTRFS_INODE_ITEM_KEY;
1023         key.offset = 0;
1024
1025         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1026                 mod = -1;
1027         else
1028                 mod = 1;
1029
1030         nofs_flag = memalloc_nofs_save();
1031         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1032         memalloc_nofs_restore(nofs_flag);
1033         if (ret > 0)
1034                 ret = -ENOENT;
1035         if (ret < 0)
1036                 goto out;
1037
1038         leaf = path->nodes[0];
1039         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1040                                     struct btrfs_inode_item);
1041         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1042                             sizeof(struct btrfs_inode_item));
1043         btrfs_mark_buffer_dirty(leaf);
1044
1045         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1046                 goto no_iref;
1047
1048         path->slots[0]++;
1049         if (path->slots[0] >= btrfs_header_nritems(leaf))
1050                 goto search;
1051 again:
1052         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1053         if (key.objectid != node->inode_id)
1054                 goto out;
1055
1056         if (key.type != BTRFS_INODE_REF_KEY &&
1057             key.type != BTRFS_INODE_EXTREF_KEY)
1058                 goto out;
1059
1060         /*
1061          * Delayed iref deletion is for the inode who has only one link,
1062          * so there is only one iref. The case that several irefs are
1063          * in the same item doesn't exist.
1064          */
1065         btrfs_del_item(trans, root, path);
1066 out:
1067         btrfs_release_delayed_iref(node);
1068 no_iref:
1069         btrfs_release_path(path);
1070 err_out:
1071         btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
1072         btrfs_release_delayed_inode(node);
1073
1074         /*
1075          * If we fail to update the delayed inode we need to abort the
1076          * transaction, because we could leave the inode with the improper
1077          * counts behind.
1078          */
1079         if (ret && ret != -ENOENT)
1080                 btrfs_abort_transaction(trans, ret);
1081
1082         return ret;
1083
1084 search:
1085         btrfs_release_path(path);
1086
1087         key.type = BTRFS_INODE_EXTREF_KEY;
1088         key.offset = -1;
1089
1090         nofs_flag = memalloc_nofs_save();
1091         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1092         memalloc_nofs_restore(nofs_flag);
1093         if (ret < 0)
1094                 goto err_out;
1095         ASSERT(ret);
1096
1097         ret = 0;
1098         leaf = path->nodes[0];
1099         path->slots[0]--;
1100         goto again;
1101 }
1102
1103 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1104                                              struct btrfs_root *root,
1105                                              struct btrfs_path *path,
1106                                              struct btrfs_delayed_node *node)
1107 {
1108         int ret;
1109
1110         mutex_lock(&node->mutex);
1111         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1112                 mutex_unlock(&node->mutex);
1113                 return 0;
1114         }
1115
1116         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1117         mutex_unlock(&node->mutex);
1118         return ret;
1119 }
1120
1121 static inline int
1122 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1123                                    struct btrfs_path *path,
1124                                    struct btrfs_delayed_node *node)
1125 {
1126         int ret;
1127
1128         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1129         if (ret)
1130                 return ret;
1131
1132         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1133         if (ret)
1134                 return ret;
1135
1136         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1137         return ret;
1138 }
1139
1140 /*
1141  * Called when committing the transaction.
1142  * Returns 0 on success.
1143  * Returns < 0 on error and returns with an aborted transaction with any
1144  * outstanding delayed items cleaned up.
1145  */
1146 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
1147 {
1148         struct btrfs_fs_info *fs_info = trans->fs_info;
1149         struct btrfs_delayed_root *delayed_root;
1150         struct btrfs_delayed_node *curr_node, *prev_node;
1151         struct btrfs_path *path;
1152         struct btrfs_block_rsv *block_rsv;
1153         int ret = 0;
1154         bool count = (nr > 0);
1155
1156         if (trans->aborted)
1157                 return -EIO;
1158
1159         path = btrfs_alloc_path();
1160         if (!path)
1161                 return -ENOMEM;
1162         path->leave_spinning = 1;
1163
1164         block_rsv = trans->block_rsv;
1165         trans->block_rsv = &fs_info->delayed_block_rsv;
1166
1167         delayed_root = fs_info->delayed_root;
1168
1169         curr_node = btrfs_first_delayed_node(delayed_root);
1170         while (curr_node && (!count || (count && nr--))) {
1171                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1172                                                          curr_node);
1173                 if (ret) {
1174                         btrfs_release_delayed_node(curr_node);
1175                         curr_node = NULL;
1176                         btrfs_abort_transaction(trans, ret);
1177                         break;
1178                 }
1179
1180                 prev_node = curr_node;
1181                 curr_node = btrfs_next_delayed_node(curr_node);
1182                 btrfs_release_delayed_node(prev_node);
1183         }
1184
1185         if (curr_node)
1186                 btrfs_release_delayed_node(curr_node);
1187         btrfs_free_path(path);
1188         trans->block_rsv = block_rsv;
1189
1190         return ret;
1191 }
1192
1193 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
1194 {
1195         return __btrfs_run_delayed_items(trans, -1);
1196 }
1197
1198 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
1199 {
1200         return __btrfs_run_delayed_items(trans, nr);
1201 }
1202
1203 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1204                                      struct btrfs_inode *inode)
1205 {
1206         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1207         struct btrfs_path *path;
1208         struct btrfs_block_rsv *block_rsv;
1209         int ret;
1210
1211         if (!delayed_node)
1212                 return 0;
1213
1214         mutex_lock(&delayed_node->mutex);
1215         if (!delayed_node->count) {
1216                 mutex_unlock(&delayed_node->mutex);
1217                 btrfs_release_delayed_node(delayed_node);
1218                 return 0;
1219         }
1220         mutex_unlock(&delayed_node->mutex);
1221
1222         path = btrfs_alloc_path();
1223         if (!path) {
1224                 btrfs_release_delayed_node(delayed_node);
1225                 return -ENOMEM;
1226         }
1227         path->leave_spinning = 1;
1228
1229         block_rsv = trans->block_rsv;
1230         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1231
1232         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1233
1234         btrfs_release_delayed_node(delayed_node);
1235         btrfs_free_path(path);
1236         trans->block_rsv = block_rsv;
1237
1238         return ret;
1239 }
1240
1241 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1242 {
1243         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1244         struct btrfs_trans_handle *trans;
1245         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1246         struct btrfs_path *path;
1247         struct btrfs_block_rsv *block_rsv;
1248         int ret;
1249
1250         if (!delayed_node)
1251                 return 0;
1252
1253         mutex_lock(&delayed_node->mutex);
1254         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1255                 mutex_unlock(&delayed_node->mutex);
1256                 btrfs_release_delayed_node(delayed_node);
1257                 return 0;
1258         }
1259         mutex_unlock(&delayed_node->mutex);
1260
1261         trans = btrfs_join_transaction(delayed_node->root);
1262         if (IS_ERR(trans)) {
1263                 ret = PTR_ERR(trans);
1264                 goto out;
1265         }
1266
1267         path = btrfs_alloc_path();
1268         if (!path) {
1269                 ret = -ENOMEM;
1270                 goto trans_out;
1271         }
1272         path->leave_spinning = 1;
1273
1274         block_rsv = trans->block_rsv;
1275         trans->block_rsv = &fs_info->delayed_block_rsv;
1276
1277         mutex_lock(&delayed_node->mutex);
1278         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1279                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1280                                                    path, delayed_node);
1281         else
1282                 ret = 0;
1283         mutex_unlock(&delayed_node->mutex);
1284
1285         btrfs_free_path(path);
1286         trans->block_rsv = block_rsv;
1287 trans_out:
1288         btrfs_end_transaction(trans);
1289         btrfs_btree_balance_dirty(fs_info);
1290 out:
1291         btrfs_release_delayed_node(delayed_node);
1292
1293         return ret;
1294 }
1295
1296 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1297 {
1298         struct btrfs_delayed_node *delayed_node;
1299
1300         delayed_node = READ_ONCE(inode->delayed_node);
1301         if (!delayed_node)
1302                 return;
1303
1304         inode->delayed_node = NULL;
1305         btrfs_release_delayed_node(delayed_node);
1306 }
1307
1308 struct btrfs_async_delayed_work {
1309         struct btrfs_delayed_root *delayed_root;
1310         int nr;
1311         struct btrfs_work work;
1312 };
1313
1314 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1315 {
1316         struct btrfs_async_delayed_work *async_work;
1317         struct btrfs_delayed_root *delayed_root;
1318         struct btrfs_trans_handle *trans;
1319         struct btrfs_path *path;
1320         struct btrfs_delayed_node *delayed_node = NULL;
1321         struct btrfs_root *root;
1322         struct btrfs_block_rsv *block_rsv;
1323         int total_done = 0;
1324
1325         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1326         delayed_root = async_work->delayed_root;
1327
1328         path = btrfs_alloc_path();
1329         if (!path)
1330                 goto out;
1331
1332         do {
1333                 if (atomic_read(&delayed_root->items) <
1334                     BTRFS_DELAYED_BACKGROUND / 2)
1335                         break;
1336
1337                 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1338                 if (!delayed_node)
1339                         break;
1340
1341                 path->leave_spinning = 1;
1342                 root = delayed_node->root;
1343
1344                 trans = btrfs_join_transaction(root);
1345                 if (IS_ERR(trans)) {
1346                         btrfs_release_path(path);
1347                         btrfs_release_prepared_delayed_node(delayed_node);
1348                         total_done++;
1349                         continue;
1350                 }
1351
1352                 block_rsv = trans->block_rsv;
1353                 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1354
1355                 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1356
1357                 trans->block_rsv = block_rsv;
1358                 btrfs_end_transaction(trans);
1359                 btrfs_btree_balance_dirty_nodelay(root->fs_info);
1360
1361                 btrfs_release_path(path);
1362                 btrfs_release_prepared_delayed_node(delayed_node);
1363                 total_done++;
1364
1365         } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
1366                  || total_done < async_work->nr);
1367
1368         btrfs_free_path(path);
1369 out:
1370         wake_up(&delayed_root->wait);
1371         kfree(async_work);
1372 }
1373
1374
1375 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1376                                      struct btrfs_fs_info *fs_info, int nr)
1377 {
1378         struct btrfs_async_delayed_work *async_work;
1379
1380         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1381         if (!async_work)
1382                 return -ENOMEM;
1383
1384         async_work->delayed_root = delayed_root;
1385         btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1386                         btrfs_async_run_delayed_root, NULL, NULL);
1387         async_work->nr = nr;
1388
1389         btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1390         return 0;
1391 }
1392
1393 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1394 {
1395         WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1396 }
1397
1398 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1399 {
1400         int val = atomic_read(&delayed_root->items_seq);
1401
1402         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1403                 return 1;
1404
1405         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1406                 return 1;
1407
1408         return 0;
1409 }
1410
1411 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1412 {
1413         struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1414
1415         if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
1416                 btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1417                 return;
1418
1419         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1420                 int seq;
1421                 int ret;
1422
1423                 seq = atomic_read(&delayed_root->items_seq);
1424
1425                 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1426                 if (ret)
1427                         return;
1428
1429                 wait_event_interruptible(delayed_root->wait,
1430                                          could_end_wait(delayed_root, seq));
1431                 return;
1432         }
1433
1434         btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1435 }
1436
1437 /* Will return 0 or -ENOMEM */
1438 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1439                                    const char *name, int name_len,
1440                                    struct btrfs_inode *dir,
1441                                    struct btrfs_disk_key *disk_key, u8 type,
1442                                    u64 index)
1443 {
1444         struct btrfs_delayed_node *delayed_node;
1445         struct btrfs_delayed_item *delayed_item;
1446         struct btrfs_dir_item *dir_item;
1447         int ret;
1448
1449         delayed_node = btrfs_get_or_create_delayed_node(dir);
1450         if (IS_ERR(delayed_node))
1451                 return PTR_ERR(delayed_node);
1452
1453         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1454         if (!delayed_item) {
1455                 ret = -ENOMEM;
1456                 goto release_node;
1457         }
1458
1459         delayed_item->key.objectid = btrfs_ino(dir);
1460         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1461         delayed_item->key.offset = index;
1462
1463         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1464         dir_item->location = *disk_key;
1465         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1466         btrfs_set_stack_dir_data_len(dir_item, 0);
1467         btrfs_set_stack_dir_name_len(dir_item, name_len);
1468         btrfs_set_stack_dir_type(dir_item, type);
1469         memcpy((char *)(dir_item + 1), name, name_len);
1470
1471         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
1472         /*
1473          * we have reserved enough space when we start a new transaction,
1474          * so reserving metadata failure is impossible
1475          */
1476         BUG_ON(ret);
1477
1478         mutex_lock(&delayed_node->mutex);
1479         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1480         if (unlikely(ret)) {
1481                 btrfs_err(trans->fs_info,
1482                           "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1483                           name_len, name, delayed_node->root->objectid,
1484                           delayed_node->inode_id, ret);
1485                 BUG();
1486         }
1487         mutex_unlock(&delayed_node->mutex);
1488
1489 release_node:
1490         btrfs_release_delayed_node(delayed_node);
1491         return ret;
1492 }
1493
1494 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1495                                                struct btrfs_delayed_node *node,
1496                                                struct btrfs_key *key)
1497 {
1498         struct btrfs_delayed_item *item;
1499
1500         mutex_lock(&node->mutex);
1501         item = __btrfs_lookup_delayed_insertion_item(node, key);
1502         if (!item) {
1503                 mutex_unlock(&node->mutex);
1504                 return 1;
1505         }
1506
1507         btrfs_delayed_item_release_metadata(node->root, item);
1508         btrfs_release_delayed_item(item);
1509         mutex_unlock(&node->mutex);
1510         return 0;
1511 }
1512
1513 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1514                                    struct btrfs_inode *dir, u64 index)
1515 {
1516         struct btrfs_delayed_node *node;
1517         struct btrfs_delayed_item *item;
1518         struct btrfs_key item_key;
1519         int ret;
1520
1521         node = btrfs_get_or_create_delayed_node(dir);
1522         if (IS_ERR(node))
1523                 return PTR_ERR(node);
1524
1525         item_key.objectid = btrfs_ino(dir);
1526         item_key.type = BTRFS_DIR_INDEX_KEY;
1527         item_key.offset = index;
1528
1529         ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
1530                                                   &item_key);
1531         if (!ret)
1532                 goto end;
1533
1534         item = btrfs_alloc_delayed_item(0);
1535         if (!item) {
1536                 ret = -ENOMEM;
1537                 goto end;
1538         }
1539
1540         item->key = item_key;
1541
1542         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
1543         /*
1544          * we have reserved enough space when we start a new transaction,
1545          * so reserving metadata failure is impossible.
1546          */
1547         BUG_ON(ret);
1548
1549         mutex_lock(&node->mutex);
1550         ret = __btrfs_add_delayed_deletion_item(node, item);
1551         if (unlikely(ret)) {
1552                 btrfs_err(trans->fs_info,
1553                           "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1554                           index, node->root->objectid, node->inode_id, ret);
1555                 BUG();
1556         }
1557         mutex_unlock(&node->mutex);
1558 end:
1559         btrfs_release_delayed_node(node);
1560         return ret;
1561 }
1562
1563 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1564 {
1565         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1566
1567         if (!delayed_node)
1568                 return -ENOENT;
1569
1570         /*
1571          * Since we have held i_mutex of this directory, it is impossible that
1572          * a new directory index is added into the delayed node and index_cnt
1573          * is updated now. So we needn't lock the delayed node.
1574          */
1575         if (!delayed_node->index_cnt) {
1576                 btrfs_release_delayed_node(delayed_node);
1577                 return -EINVAL;
1578         }
1579
1580         inode->index_cnt = delayed_node->index_cnt;
1581         btrfs_release_delayed_node(delayed_node);
1582         return 0;
1583 }
1584
1585 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1586                                      struct list_head *ins_list,
1587                                      struct list_head *del_list)
1588 {
1589         struct btrfs_delayed_node *delayed_node;
1590         struct btrfs_delayed_item *item;
1591
1592         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1593         if (!delayed_node)
1594                 return false;
1595
1596         /*
1597          * We can only do one readdir with delayed items at a time because of
1598          * item->readdir_list.
1599          */
1600         inode_unlock_shared(inode);
1601         inode_lock(inode);
1602
1603         mutex_lock(&delayed_node->mutex);
1604         item = __btrfs_first_delayed_insertion_item(delayed_node);
1605         while (item) {
1606                 refcount_inc(&item->refs);
1607                 list_add_tail(&item->readdir_list, ins_list);
1608                 item = __btrfs_next_delayed_item(item);
1609         }
1610
1611         item = __btrfs_first_delayed_deletion_item(delayed_node);
1612         while (item) {
1613                 refcount_inc(&item->refs);
1614                 list_add_tail(&item->readdir_list, del_list);
1615                 item = __btrfs_next_delayed_item(item);
1616         }
1617         mutex_unlock(&delayed_node->mutex);
1618         /*
1619          * This delayed node is still cached in the btrfs inode, so refs
1620          * must be > 1 now, and we needn't check it is going to be freed
1621          * or not.
1622          *
1623          * Besides that, this function is used to read dir, we do not
1624          * insert/delete delayed items in this period. So we also needn't
1625          * requeue or dequeue this delayed node.
1626          */
1627         refcount_dec(&delayed_node->refs);
1628
1629         return true;
1630 }
1631
1632 void btrfs_readdir_put_delayed_items(struct inode *inode,
1633                                      struct list_head *ins_list,
1634                                      struct list_head *del_list)
1635 {
1636         struct btrfs_delayed_item *curr, *next;
1637
1638         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1639                 list_del(&curr->readdir_list);
1640                 if (refcount_dec_and_test(&curr->refs))
1641                         kfree(curr);
1642         }
1643
1644         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1645                 list_del(&curr->readdir_list);
1646                 if (refcount_dec_and_test(&curr->refs))
1647                         kfree(curr);
1648         }
1649
1650         /*
1651          * The VFS is going to do up_read(), so we need to downgrade back to a
1652          * read lock.
1653          */
1654         downgrade_write(&inode->i_rwsem);
1655 }
1656
1657 int btrfs_should_delete_dir_index(struct list_head *del_list,
1658                                   u64 index)
1659 {
1660         struct btrfs_delayed_item *curr;
1661         int ret = 0;
1662
1663         list_for_each_entry(curr, del_list, readdir_list) {
1664                 if (curr->key.offset > index)
1665                         break;
1666                 if (curr->key.offset == index) {
1667                         ret = 1;
1668                         break;
1669                 }
1670         }
1671         return ret;
1672 }
1673
1674 /*
1675  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1676  *
1677  */
1678 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1679                                     struct list_head *ins_list)
1680 {
1681         struct btrfs_dir_item *di;
1682         struct btrfs_delayed_item *curr, *next;
1683         struct btrfs_key location;
1684         char *name;
1685         int name_len;
1686         int over = 0;
1687         unsigned char d_type;
1688
1689         if (list_empty(ins_list))
1690                 return 0;
1691
1692         /*
1693          * Changing the data of the delayed item is impossible. So
1694          * we needn't lock them. And we have held i_mutex of the
1695          * directory, nobody can delete any directory indexes now.
1696          */
1697         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1698                 list_del(&curr->readdir_list);
1699
1700                 if (curr->key.offset < ctx->pos) {
1701                         if (refcount_dec_and_test(&curr->refs))
1702                                 kfree(curr);
1703                         continue;
1704                 }
1705
1706                 ctx->pos = curr->key.offset;
1707
1708                 di = (struct btrfs_dir_item *)curr->data;
1709                 name = (char *)(di + 1);
1710                 name_len = btrfs_stack_dir_name_len(di);
1711
1712                 d_type = btrfs_filetype_table[di->type];
1713                 btrfs_disk_key_to_cpu(&location, &di->location);
1714
1715                 over = !dir_emit(ctx, name, name_len,
1716                                location.objectid, d_type);
1717
1718                 if (refcount_dec_and_test(&curr->refs))
1719                         kfree(curr);
1720
1721                 if (over)
1722                         return 1;
1723                 ctx->pos++;
1724         }
1725         return 0;
1726 }
1727
1728 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1729                                   struct btrfs_inode_item *inode_item,
1730                                   struct inode *inode)
1731 {
1732         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1733         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1734         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1735         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1736         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1737         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1738         btrfs_set_stack_inode_generation(inode_item,
1739                                          BTRFS_I(inode)->generation);
1740         btrfs_set_stack_inode_sequence(inode_item,
1741                                        inode_peek_iversion(inode));
1742         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1743         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1744         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1745         btrfs_set_stack_inode_block_group(inode_item, 0);
1746
1747         btrfs_set_stack_timespec_sec(&inode_item->atime,
1748                                      inode->i_atime.tv_sec);
1749         btrfs_set_stack_timespec_nsec(&inode_item->atime,
1750                                       inode->i_atime.tv_nsec);
1751
1752         btrfs_set_stack_timespec_sec(&inode_item->mtime,
1753                                      inode->i_mtime.tv_sec);
1754         btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1755                                       inode->i_mtime.tv_nsec);
1756
1757         btrfs_set_stack_timespec_sec(&inode_item->ctime,
1758                                      inode->i_ctime.tv_sec);
1759         btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1760                                       inode->i_ctime.tv_nsec);
1761
1762         btrfs_set_stack_timespec_sec(&inode_item->otime,
1763                                      BTRFS_I(inode)->i_otime.tv_sec);
1764         btrfs_set_stack_timespec_nsec(&inode_item->otime,
1765                                      BTRFS_I(inode)->i_otime.tv_nsec);
1766 }
1767
1768 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1769 {
1770         struct btrfs_delayed_node *delayed_node;
1771         struct btrfs_inode_item *inode_item;
1772
1773         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1774         if (!delayed_node)
1775                 return -ENOENT;
1776
1777         mutex_lock(&delayed_node->mutex);
1778         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1779                 mutex_unlock(&delayed_node->mutex);
1780                 btrfs_release_delayed_node(delayed_node);
1781                 return -ENOENT;
1782         }
1783
1784         inode_item = &delayed_node->inode_item;
1785
1786         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1787         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1788         btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1789         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1790         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1791         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1792         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1793         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1794
1795         inode_set_iversion_queried(inode,
1796                                    btrfs_stack_inode_sequence(inode_item));
1797         inode->i_rdev = 0;
1798         *rdev = btrfs_stack_inode_rdev(inode_item);
1799         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1800
1801         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1802         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1803
1804         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1805         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1806
1807         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1808         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1809
1810         BTRFS_I(inode)->i_otime.tv_sec =
1811                 btrfs_stack_timespec_sec(&inode_item->otime);
1812         BTRFS_I(inode)->i_otime.tv_nsec =
1813                 btrfs_stack_timespec_nsec(&inode_item->otime);
1814
1815         inode->i_generation = BTRFS_I(inode)->generation;
1816         BTRFS_I(inode)->index_cnt = (u64)-1;
1817
1818         mutex_unlock(&delayed_node->mutex);
1819         btrfs_release_delayed_node(delayed_node);
1820         return 0;
1821 }
1822
1823 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1824                                struct btrfs_root *root, struct inode *inode)
1825 {
1826         struct btrfs_delayed_node *delayed_node;
1827         int ret = 0;
1828
1829         delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
1830         if (IS_ERR(delayed_node))
1831                 return PTR_ERR(delayed_node);
1832
1833         mutex_lock(&delayed_node->mutex);
1834         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1835                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1836                 goto release_node;
1837         }
1838
1839         ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
1840                                                    delayed_node);
1841         if (ret)
1842                 goto release_node;
1843
1844         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1845         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1846         delayed_node->count++;
1847         atomic_inc(&root->fs_info->delayed_root->items);
1848 release_node:
1849         mutex_unlock(&delayed_node->mutex);
1850         btrfs_release_delayed_node(delayed_node);
1851         return ret;
1852 }
1853
1854 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1855 {
1856         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1857         struct btrfs_delayed_node *delayed_node;
1858
1859         /*
1860          * we don't do delayed inode updates during log recovery because it
1861          * leads to enospc problems.  This means we also can't do
1862          * delayed inode refs
1863          */
1864         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1865                 return -EAGAIN;
1866
1867         delayed_node = btrfs_get_or_create_delayed_node(inode);
1868         if (IS_ERR(delayed_node))
1869                 return PTR_ERR(delayed_node);
1870
1871         /*
1872          * We don't reserve space for inode ref deletion is because:
1873          * - We ONLY do async inode ref deletion for the inode who has only
1874          *   one link(i_nlink == 1), it means there is only one inode ref.
1875          *   And in most case, the inode ref and the inode item are in the
1876          *   same leaf, and we will deal with them at the same time.
1877          *   Since we are sure we will reserve the space for the inode item,
1878          *   it is unnecessary to reserve space for inode ref deletion.
1879          * - If the inode ref and the inode item are not in the same leaf,
1880          *   We also needn't worry about enospc problem, because we reserve
1881          *   much more space for the inode update than it needs.
1882          * - At the worst, we can steal some space from the global reservation.
1883          *   It is very rare.
1884          */
1885         mutex_lock(&delayed_node->mutex);
1886         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1887                 goto release_node;
1888
1889         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1890         delayed_node->count++;
1891         atomic_inc(&fs_info->delayed_root->items);
1892 release_node:
1893         mutex_unlock(&delayed_node->mutex);
1894         btrfs_release_delayed_node(delayed_node);
1895         return 0;
1896 }
1897
1898 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1899 {
1900         struct btrfs_root *root = delayed_node->root;
1901         struct btrfs_fs_info *fs_info = root->fs_info;
1902         struct btrfs_delayed_item *curr_item, *prev_item;
1903
1904         mutex_lock(&delayed_node->mutex);
1905         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1906         while (curr_item) {
1907                 btrfs_delayed_item_release_metadata(root, curr_item);
1908                 prev_item = curr_item;
1909                 curr_item = __btrfs_next_delayed_item(prev_item);
1910                 btrfs_release_delayed_item(prev_item);
1911         }
1912
1913         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1914         while (curr_item) {
1915                 btrfs_delayed_item_release_metadata(root, curr_item);
1916                 prev_item = curr_item;
1917                 curr_item = __btrfs_next_delayed_item(prev_item);
1918                 btrfs_release_delayed_item(prev_item);
1919         }
1920
1921         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1922                 btrfs_release_delayed_iref(delayed_node);
1923
1924         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1925                 btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
1926                 btrfs_release_delayed_inode(delayed_node);
1927         }
1928         mutex_unlock(&delayed_node->mutex);
1929 }
1930
1931 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
1932 {
1933         struct btrfs_delayed_node *delayed_node;
1934
1935         delayed_node = btrfs_get_delayed_node(inode);
1936         if (!delayed_node)
1937                 return;
1938
1939         __btrfs_kill_delayed_node(delayed_node);
1940         btrfs_release_delayed_node(delayed_node);
1941 }
1942
1943 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1944 {
1945         u64 inode_id = 0;
1946         struct btrfs_delayed_node *delayed_nodes[8];
1947         int i, n;
1948
1949         while (1) {
1950                 spin_lock(&root->inode_lock);
1951                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1952                                            (void **)delayed_nodes, inode_id,
1953                                            ARRAY_SIZE(delayed_nodes));
1954                 if (!n) {
1955                         spin_unlock(&root->inode_lock);
1956                         break;
1957                 }
1958
1959                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1960                 for (i = 0; i < n; i++) {
1961                         /*
1962                          * Don't increase refs in case the node is dead and
1963                          * about to be removed from the tree in the loop below
1964                          */
1965                         if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
1966                                 delayed_nodes[i] = NULL;
1967                 }
1968                 spin_unlock(&root->inode_lock);
1969
1970                 for (i = 0; i < n; i++) {
1971                         if (!delayed_nodes[i])
1972                                 continue;
1973                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1974                         btrfs_release_delayed_node(delayed_nodes[i]);
1975                 }
1976         }
1977 }
1978
1979 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
1980 {
1981         struct btrfs_delayed_node *curr_node, *prev_node;
1982
1983         curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
1984         while (curr_node) {
1985                 __btrfs_kill_delayed_node(curr_node);
1986
1987                 prev_node = curr_node;
1988                 curr_node = btrfs_next_delayed_node(curr_node);
1989                 btrfs_release_delayed_node(prev_node);
1990         }
1991 }
1992