GNU Linux-libre 6.9-gnu
[releases.git] / fs / nilfs2 / btree.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * NILFS B-tree.
4  *
5  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6  *
7  * Written by Koji Sato.
8  */
9
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/pagevec.h>
14 #include "nilfs.h"
15 #include "page.h"
16 #include "btnode.h"
17 #include "btree.h"
18 #include "alloc.h"
19 #include "dat.h"
20
21 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
22
23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
24 {
25         struct nilfs_btree_path *path;
26         int level = NILFS_BTREE_LEVEL_DATA;
27
28         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
29         if (path == NULL)
30                 goto out;
31
32         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
33                 path[level].bp_bh = NULL;
34                 path[level].bp_sib_bh = NULL;
35                 path[level].bp_index = 0;
36                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
37                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
38                 path[level].bp_op = NULL;
39         }
40
41 out:
42         return path;
43 }
44
45 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
46 {
47         int level = NILFS_BTREE_LEVEL_DATA;
48
49         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
50                 brelse(path[level].bp_bh);
51
52         kmem_cache_free(nilfs_btree_path_cache, path);
53 }
54
55 /*
56  * B-tree node operations
57  */
58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
59                                      __u64 ptr, struct buffer_head **bhp)
60 {
61         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
62         struct address_space *btnc = btnc_inode->i_mapping;
63         struct buffer_head *bh;
64
65         bh = nilfs_btnode_create_block(btnc, ptr);
66         if (!bh)
67                 return -ENOMEM;
68
69         set_buffer_nilfs_volatile(bh);
70         *bhp = bh;
71         return 0;
72 }
73
74 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
75 {
76         return node->bn_flags;
77 }
78
79 static void
80 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
81 {
82         node->bn_flags = flags;
83 }
84
85 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
86 {
87         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
88 }
89
90 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
91 {
92         return node->bn_level;
93 }
94
95 static void
96 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
97 {
98         node->bn_level = level;
99 }
100
101 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
102 {
103         return le16_to_cpu(node->bn_nchildren);
104 }
105
106 static void
107 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
108 {
109         node->bn_nchildren = cpu_to_le16(nchildren);
110 }
111
112 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
113 {
114         return i_blocksize(btree->b_inode);
115 }
116
117 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
118 {
119         return btree->b_nchildren_per_block;
120 }
121
122 static __le64 *
123 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
124 {
125         return (__le64 *)((char *)(node + 1) +
126                           (nilfs_btree_node_root(node) ?
127                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
128 }
129
130 static __le64 *
131 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
132 {
133         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
134 }
135
136 static __u64
137 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
138 {
139         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
140 }
141
142 static void
143 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
144 {
145         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
146 }
147
148 static __u64
149 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
150                          int ncmax)
151 {
152         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
153 }
154
155 static void
156 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
157                          int ncmax)
158 {
159         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
160 }
161
162 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
163                                   int level, int nchildren, int ncmax,
164                                   const __u64 *keys, const __u64 *ptrs)
165 {
166         __le64 *dkeys;
167         __le64 *dptrs;
168         int i;
169
170         nilfs_btree_node_set_flags(node, flags);
171         nilfs_btree_node_set_level(node, level);
172         nilfs_btree_node_set_nchildren(node, nchildren);
173
174         dkeys = nilfs_btree_node_dkeys(node);
175         dptrs = nilfs_btree_node_dptrs(node, ncmax);
176         for (i = 0; i < nchildren; i++) {
177                 dkeys[i] = cpu_to_le64(keys[i]);
178                 dptrs[i] = cpu_to_le64(ptrs[i]);
179         }
180 }
181
182 /* Assume the buffer heads corresponding to left and right are locked. */
183 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
184                                        struct nilfs_btree_node *right,
185                                        int n, int lncmax, int rncmax)
186 {
187         __le64 *ldkeys, *rdkeys;
188         __le64 *ldptrs, *rdptrs;
189         int lnchildren, rnchildren;
190
191         ldkeys = nilfs_btree_node_dkeys(left);
192         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
193         lnchildren = nilfs_btree_node_get_nchildren(left);
194
195         rdkeys = nilfs_btree_node_dkeys(right);
196         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
197         rnchildren = nilfs_btree_node_get_nchildren(right);
198
199         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
200         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
201         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
202         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
203
204         lnchildren += n;
205         rnchildren -= n;
206         nilfs_btree_node_set_nchildren(left, lnchildren);
207         nilfs_btree_node_set_nchildren(right, rnchildren);
208 }
209
210 /* Assume that the buffer heads corresponding to left and right are locked. */
211 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
212                                         struct nilfs_btree_node *right,
213                                         int n, int lncmax, int rncmax)
214 {
215         __le64 *ldkeys, *rdkeys;
216         __le64 *ldptrs, *rdptrs;
217         int lnchildren, rnchildren;
218
219         ldkeys = nilfs_btree_node_dkeys(left);
220         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
221         lnchildren = nilfs_btree_node_get_nchildren(left);
222
223         rdkeys = nilfs_btree_node_dkeys(right);
224         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
225         rnchildren = nilfs_btree_node_get_nchildren(right);
226
227         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
228         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
229         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
230         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
231
232         lnchildren -= n;
233         rnchildren += n;
234         nilfs_btree_node_set_nchildren(left, lnchildren);
235         nilfs_btree_node_set_nchildren(right, rnchildren);
236 }
237
238 /* Assume that the buffer head corresponding to node is locked. */
239 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
240                                     __u64 key, __u64 ptr, int ncmax)
241 {
242         __le64 *dkeys;
243         __le64 *dptrs;
244         int nchildren;
245
246         dkeys = nilfs_btree_node_dkeys(node);
247         dptrs = nilfs_btree_node_dptrs(node, ncmax);
248         nchildren = nilfs_btree_node_get_nchildren(node);
249         if (index < nchildren) {
250                 memmove(dkeys + index + 1, dkeys + index,
251                         (nchildren - index) * sizeof(*dkeys));
252                 memmove(dptrs + index + 1, dptrs + index,
253                         (nchildren - index) * sizeof(*dptrs));
254         }
255         dkeys[index] = cpu_to_le64(key);
256         dptrs[index] = cpu_to_le64(ptr);
257         nchildren++;
258         nilfs_btree_node_set_nchildren(node, nchildren);
259 }
260
261 /* Assume that the buffer head corresponding to node is locked. */
262 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
263                                     __u64 *keyp, __u64 *ptrp, int ncmax)
264 {
265         __u64 key;
266         __u64 ptr;
267         __le64 *dkeys;
268         __le64 *dptrs;
269         int nchildren;
270
271         dkeys = nilfs_btree_node_dkeys(node);
272         dptrs = nilfs_btree_node_dptrs(node, ncmax);
273         key = le64_to_cpu(dkeys[index]);
274         ptr = le64_to_cpu(dptrs[index]);
275         nchildren = nilfs_btree_node_get_nchildren(node);
276         if (keyp != NULL)
277                 *keyp = key;
278         if (ptrp != NULL)
279                 *ptrp = ptr;
280
281         if (index < nchildren - 1) {
282                 memmove(dkeys + index, dkeys + index + 1,
283                         (nchildren - index - 1) * sizeof(*dkeys));
284                 memmove(dptrs + index, dptrs + index + 1,
285                         (nchildren - index - 1) * sizeof(*dptrs));
286         }
287         nchildren--;
288         nilfs_btree_node_set_nchildren(node, nchildren);
289 }
290
291 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
292                                    __u64 key, int *indexp)
293 {
294         __u64 nkey;
295         int index, low, high, s;
296
297         /* binary search */
298         low = 0;
299         high = nilfs_btree_node_get_nchildren(node) - 1;
300         index = 0;
301         s = 0;
302         while (low <= high) {
303                 index = (low + high) / 2;
304                 nkey = nilfs_btree_node_get_key(node, index);
305                 if (nkey == key) {
306                         s = 0;
307                         goto out;
308                 } else if (nkey < key) {
309                         low = index + 1;
310                         s = -1;
311                 } else {
312                         high = index - 1;
313                         s = 1;
314                 }
315         }
316
317         /* adjust index */
318         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
319                 if (s > 0 && index > 0)
320                         index--;
321         } else if (s < 0)
322                 index++;
323
324  out:
325         *indexp = index;
326
327         return s == 0;
328 }
329
330 /**
331  * nilfs_btree_node_broken - verify consistency of btree node
332  * @node: btree node block to be examined
333  * @size: node size (in bytes)
334  * @inode: host inode of btree
335  * @blocknr: block number
336  *
337  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
338  */
339 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
340                                    size_t size, struct inode *inode,
341                                    sector_t blocknr)
342 {
343         int level, flags, nchildren;
344         int ret = 0;
345
346         level = nilfs_btree_node_get_level(node);
347         flags = nilfs_btree_node_get_flags(node);
348         nchildren = nilfs_btree_node_get_nchildren(node);
349
350         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
351                      level >= NILFS_BTREE_LEVEL_MAX ||
352                      (flags & NILFS_BTREE_NODE_ROOT) ||
353                      nchildren < 0 ||
354                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
355                 nilfs_crit(inode->i_sb,
356                            "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
357                            inode->i_ino, (unsigned long long)blocknr, level,
358                            flags, nchildren);
359                 ret = 1;
360         }
361         return ret;
362 }
363
364 /**
365  * nilfs_btree_root_broken - verify consistency of btree root node
366  * @node: btree root node to be examined
367  * @inode: host inode of btree
368  *
369  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
370  */
371 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
372                                    struct inode *inode)
373 {
374         int level, flags, nchildren;
375         int ret = 0;
376
377         level = nilfs_btree_node_get_level(node);
378         flags = nilfs_btree_node_get_flags(node);
379         nchildren = nilfs_btree_node_get_nchildren(node);
380
381         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
382                      level >= NILFS_BTREE_LEVEL_MAX ||
383                      nchildren < 0 ||
384                      nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
385                 nilfs_crit(inode->i_sb,
386                            "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
387                            inode->i_ino, level, flags, nchildren);
388                 ret = 1;
389         }
390         return ret;
391 }
392
393 int nilfs_btree_broken_node_block(struct buffer_head *bh)
394 {
395         struct inode *inode;
396         int ret;
397
398         if (buffer_nilfs_checked(bh))
399                 return 0;
400
401         inode = bh->b_folio->mapping->host;
402         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
403                                       bh->b_size, inode, bh->b_blocknr);
404         if (likely(!ret))
405                 set_buffer_nilfs_checked(bh);
406         return ret;
407 }
408
409 static struct nilfs_btree_node *
410 nilfs_btree_get_root(const struct nilfs_bmap *btree)
411 {
412         return (struct nilfs_btree_node *)btree->b_u.u_data;
413 }
414
415 static struct nilfs_btree_node *
416 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
417 {
418         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
419 }
420
421 static struct nilfs_btree_node *
422 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
423 {
424         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
425 }
426
427 static int nilfs_btree_height(const struct nilfs_bmap *btree)
428 {
429         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
430 }
431
432 static struct nilfs_btree_node *
433 nilfs_btree_get_node(const struct nilfs_bmap *btree,
434                      const struct nilfs_btree_path *path,
435                      int level, int *ncmaxp)
436 {
437         struct nilfs_btree_node *node;
438
439         if (level == nilfs_btree_height(btree) - 1) {
440                 node = nilfs_btree_get_root(btree);
441                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
442         } else {
443                 node = nilfs_btree_get_nonroot_node(path, level);
444                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
445         }
446         return node;
447 }
448
449 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
450                                 struct nilfs_btree_node *node, int level)
451 {
452         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
453                 dump_stack();
454                 nilfs_crit(btree->b_inode->i_sb,
455                            "btree level mismatch (ino=%lu): %d != %d",
456                            btree->b_inode->i_ino,
457                            nilfs_btree_node_get_level(node), level);
458                 return 1;
459         }
460         return 0;
461 }
462
463 struct nilfs_btree_readahead_info {
464         struct nilfs_btree_node *node;  /* parent node */
465         int max_ra_blocks;              /* max nof blocks to read ahead */
466         int index;                      /* current index on the parent node */
467         int ncmax;                      /* nof children in the parent node */
468 };
469
470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
471                                    struct buffer_head **bhp,
472                                    const struct nilfs_btree_readahead_info *ra)
473 {
474         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
475         struct address_space *btnc = btnc_inode->i_mapping;
476         struct buffer_head *bh, *ra_bh;
477         sector_t submit_ptr = 0;
478         int ret;
479
480         ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
481                                         &submit_ptr);
482         if (ret) {
483                 if (likely(ret == -EEXIST))
484                         goto out_check;
485                 if (ret == -ENOENT) {
486                         /*
487                          * Block address translation failed due to invalid
488                          * value of 'ptr'.  In this case, return internal code
489                          * -EINVAL (broken bmap) to notify bmap layer of fatal
490                          * metadata corruption.
491                          */
492                         ret = -EINVAL;
493                 }
494                 return ret;
495         }
496
497         if (ra) {
498                 int i, n;
499                 __u64 ptr2;
500
501                 /* read ahead sibling nodes */
502                 for (n = ra->max_ra_blocks, i = ra->index + 1;
503                      n > 0 && i < ra->ncmax; n--, i++) {
504                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
505
506                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
507                                                 REQ_OP_READ | REQ_RAHEAD,
508                                                 &ra_bh, &submit_ptr);
509                         if (likely(!ret || ret == -EEXIST))
510                                 brelse(ra_bh);
511                         else if (ret != -EBUSY)
512                                 break;
513                         if (!buffer_locked(bh))
514                                 goto out_no_wait;
515                 }
516         }
517
518         wait_on_buffer(bh);
519
520  out_no_wait:
521         if (!buffer_uptodate(bh)) {
522                 nilfs_err(btree->b_inode->i_sb,
523                           "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
524                           btree->b_inode->i_ino, (unsigned long long)ptr);
525                 brelse(bh);
526                 return -EIO;
527         }
528
529  out_check:
530         if (nilfs_btree_broken_node_block(bh)) {
531                 clear_buffer_uptodate(bh);
532                 brelse(bh);
533                 return -EINVAL;
534         }
535
536         *bhp = bh;
537         return 0;
538 }
539
540 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
541                                    struct buffer_head **bhp)
542 {
543         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
544 }
545
546 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
547                                  struct nilfs_btree_path *path,
548                                  __u64 key, __u64 *ptrp, int minlevel,
549                                  int readahead)
550 {
551         struct nilfs_btree_node *node;
552         struct nilfs_btree_readahead_info p, *ra;
553         __u64 ptr;
554         int level, index, found, ncmax, ret;
555
556         node = nilfs_btree_get_root(btree);
557         level = nilfs_btree_node_get_level(node);
558         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
559                 return -ENOENT;
560
561         found = nilfs_btree_node_lookup(node, key, &index);
562         ptr = nilfs_btree_node_get_ptr(node, index,
563                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
564         path[level].bp_bh = NULL;
565         path[level].bp_index = index;
566
567         ncmax = nilfs_btree_nchildren_per_block(btree);
568
569         while (--level >= minlevel) {
570                 ra = NULL;
571                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
572                         p.node = nilfs_btree_get_node(btree, path, level + 1,
573                                                       &p.ncmax);
574                         p.index = index;
575                         p.max_ra_blocks = 7;
576                         ra = &p;
577                 }
578                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
579                                               ra);
580                 if (ret < 0)
581                         return ret;
582
583                 node = nilfs_btree_get_nonroot_node(path, level);
584                 if (nilfs_btree_bad_node(btree, node, level))
585                         return -EINVAL;
586                 if (!found)
587                         found = nilfs_btree_node_lookup(node, key, &index);
588                 else
589                         index = 0;
590                 if (index < ncmax) {
591                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
592                 } else {
593                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
594                         /* insert */
595                         ptr = NILFS_BMAP_INVALID_PTR;
596                 }
597                 path[level].bp_index = index;
598         }
599         if (!found)
600                 return -ENOENT;
601
602         if (ptrp != NULL)
603                 *ptrp = ptr;
604
605         return 0;
606 }
607
608 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
609                                       struct nilfs_btree_path *path,
610                                       __u64 *keyp, __u64 *ptrp)
611 {
612         struct nilfs_btree_node *node;
613         __u64 ptr;
614         int index, level, ncmax, ret;
615
616         node = nilfs_btree_get_root(btree);
617         index = nilfs_btree_node_get_nchildren(node) - 1;
618         if (index < 0)
619                 return -ENOENT;
620         level = nilfs_btree_node_get_level(node);
621         ptr = nilfs_btree_node_get_ptr(node, index,
622                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
623         path[level].bp_bh = NULL;
624         path[level].bp_index = index;
625         ncmax = nilfs_btree_nchildren_per_block(btree);
626
627         for (level--; level > 0; level--) {
628                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
629                 if (ret < 0)
630                         return ret;
631                 node = nilfs_btree_get_nonroot_node(path, level);
632                 if (nilfs_btree_bad_node(btree, node, level))
633                         return -EINVAL;
634                 index = nilfs_btree_node_get_nchildren(node) - 1;
635                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
636                 path[level].bp_index = index;
637         }
638
639         if (keyp != NULL)
640                 *keyp = nilfs_btree_node_get_key(node, index);
641         if (ptrp != NULL)
642                 *ptrp = ptr;
643
644         return 0;
645 }
646
647 /**
648  * nilfs_btree_get_next_key - get next valid key from btree path array
649  * @btree: bmap struct of btree
650  * @path: array of nilfs_btree_path struct
651  * @minlevel: start level
652  * @nextkey: place to store the next valid key
653  *
654  * Return Value: If a next key was found, 0 is returned. Otherwise,
655  * -ENOENT is returned.
656  */
657 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
658                                     const struct nilfs_btree_path *path,
659                                     int minlevel, __u64 *nextkey)
660 {
661         struct nilfs_btree_node *node;
662         int maxlevel = nilfs_btree_height(btree) - 1;
663         int index, next_adj, level;
664
665         /* Next index is already set to bp_index for leaf nodes. */
666         next_adj = 0;
667         for (level = minlevel; level <= maxlevel; level++) {
668                 if (level == maxlevel)
669                         node = nilfs_btree_get_root(btree);
670                 else
671                         node = nilfs_btree_get_nonroot_node(path, level);
672
673                 index = path[level].bp_index + next_adj;
674                 if (index < nilfs_btree_node_get_nchildren(node)) {
675                         /* Next key is in this node */
676                         *nextkey = nilfs_btree_node_get_key(node, index);
677                         return 0;
678                 }
679                 /* For non-leaf nodes, next index is stored at bp_index + 1. */
680                 next_adj = 1;
681         }
682         return -ENOENT;
683 }
684
685 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
686                               __u64 key, int level, __u64 *ptrp)
687 {
688         struct nilfs_btree_path *path;
689         int ret;
690
691         path = nilfs_btree_alloc_path();
692         if (path == NULL)
693                 return -ENOMEM;
694
695         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
696
697         nilfs_btree_free_path(path);
698
699         return ret;
700 }
701
702 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
703                                      __u64 key, __u64 *ptrp,
704                                      unsigned int maxblocks)
705 {
706         struct nilfs_btree_path *path;
707         struct nilfs_btree_node *node;
708         struct inode *dat = NULL;
709         __u64 ptr, ptr2;
710         sector_t blocknr;
711         int level = NILFS_BTREE_LEVEL_NODE_MIN;
712         int ret, cnt, index, maxlevel, ncmax;
713         struct nilfs_btree_readahead_info p;
714
715         path = nilfs_btree_alloc_path();
716         if (path == NULL)
717                 return -ENOMEM;
718
719         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
720         if (ret < 0)
721                 goto out;
722
723         if (NILFS_BMAP_USE_VBN(btree)) {
724                 dat = nilfs_bmap_get_dat(btree);
725                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
726                 if (ret < 0)
727                         goto dat_error;
728                 ptr = blocknr;
729         }
730         cnt = 1;
731         if (cnt == maxblocks)
732                 goto end;
733
734         maxlevel = nilfs_btree_height(btree) - 1;
735         node = nilfs_btree_get_node(btree, path, level, &ncmax);
736         index = path[level].bp_index + 1;
737         for (;;) {
738                 while (index < nilfs_btree_node_get_nchildren(node)) {
739                         if (nilfs_btree_node_get_key(node, index) !=
740                             key + cnt)
741                                 goto end;
742                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
743                         if (dat) {
744                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
745                                 if (ret < 0)
746                                         goto dat_error;
747                                 ptr2 = blocknr;
748                         }
749                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
750                                 goto end;
751                         index++;
752                 }
753                 if (level == maxlevel)
754                         break;
755
756                 /* look-up right sibling node */
757                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
758                 p.index = path[level + 1].bp_index + 1;
759                 p.max_ra_blocks = 7;
760                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
761                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
762                         break;
763                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
764                 path[level + 1].bp_index = p.index;
765
766                 brelse(path[level].bp_bh);
767                 path[level].bp_bh = NULL;
768
769                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
770                                               &p);
771                 if (ret < 0)
772                         goto out;
773                 node = nilfs_btree_get_nonroot_node(path, level);
774                 ncmax = nilfs_btree_nchildren_per_block(btree);
775                 index = 0;
776                 path[level].bp_index = index;
777         }
778  end:
779         *ptrp = ptr;
780         ret = cnt;
781  out:
782         nilfs_btree_free_path(path);
783         return ret;
784
785  dat_error:
786         if (ret == -ENOENT)
787                 ret = -EINVAL;  /* Notify bmap layer of metadata corruption */
788         goto out;
789 }
790
791 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
792                                     struct nilfs_btree_path *path,
793                                     int level, __u64 key)
794 {
795         if (level < nilfs_btree_height(btree) - 1) {
796                 do {
797                         nilfs_btree_node_set_key(
798                                 nilfs_btree_get_nonroot_node(path, level),
799                                 path[level].bp_index, key);
800                         if (!buffer_dirty(path[level].bp_bh))
801                                 mark_buffer_dirty(path[level].bp_bh);
802                 } while ((path[level].bp_index == 0) &&
803                          (++level < nilfs_btree_height(btree) - 1));
804         }
805
806         /* root */
807         if (level == nilfs_btree_height(btree) - 1) {
808                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
809                                          path[level].bp_index, key);
810         }
811 }
812
813 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
814                                   struct nilfs_btree_path *path,
815                                   int level, __u64 *keyp, __u64 *ptrp)
816 {
817         struct nilfs_btree_node *node;
818         int ncblk;
819
820         if (level < nilfs_btree_height(btree) - 1) {
821                 node = nilfs_btree_get_nonroot_node(path, level);
822                 ncblk = nilfs_btree_nchildren_per_block(btree);
823                 nilfs_btree_node_insert(node, path[level].bp_index,
824                                         *keyp, *ptrp, ncblk);
825                 if (!buffer_dirty(path[level].bp_bh))
826                         mark_buffer_dirty(path[level].bp_bh);
827
828                 if (path[level].bp_index == 0)
829                         nilfs_btree_promote_key(btree, path, level + 1,
830                                                 nilfs_btree_node_get_key(node,
831                                                                          0));
832         } else {
833                 node = nilfs_btree_get_root(btree);
834                 nilfs_btree_node_insert(node, path[level].bp_index,
835                                         *keyp, *ptrp,
836                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
837         }
838 }
839
840 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
841                                    struct nilfs_btree_path *path,
842                                    int level, __u64 *keyp, __u64 *ptrp)
843 {
844         struct nilfs_btree_node *node, *left;
845         int nchildren, lnchildren, n, move, ncblk;
846
847         node = nilfs_btree_get_nonroot_node(path, level);
848         left = nilfs_btree_get_sib_node(path, level);
849         nchildren = nilfs_btree_node_get_nchildren(node);
850         lnchildren = nilfs_btree_node_get_nchildren(left);
851         ncblk = nilfs_btree_nchildren_per_block(btree);
852         move = 0;
853
854         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
855         if (n > path[level].bp_index) {
856                 /* move insert point */
857                 n--;
858                 move = 1;
859         }
860
861         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
862
863         if (!buffer_dirty(path[level].bp_bh))
864                 mark_buffer_dirty(path[level].bp_bh);
865         if (!buffer_dirty(path[level].bp_sib_bh))
866                 mark_buffer_dirty(path[level].bp_sib_bh);
867
868         nilfs_btree_promote_key(btree, path, level + 1,
869                                 nilfs_btree_node_get_key(node, 0));
870
871         if (move) {
872                 brelse(path[level].bp_bh);
873                 path[level].bp_bh = path[level].bp_sib_bh;
874                 path[level].bp_sib_bh = NULL;
875                 path[level].bp_index += lnchildren;
876                 path[level + 1].bp_index--;
877         } else {
878                 brelse(path[level].bp_sib_bh);
879                 path[level].bp_sib_bh = NULL;
880                 path[level].bp_index -= n;
881         }
882
883         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
884 }
885
886 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
887                                     struct nilfs_btree_path *path,
888                                     int level, __u64 *keyp, __u64 *ptrp)
889 {
890         struct nilfs_btree_node *node, *right;
891         int nchildren, rnchildren, n, move, ncblk;
892
893         node = nilfs_btree_get_nonroot_node(path, level);
894         right = nilfs_btree_get_sib_node(path, level);
895         nchildren = nilfs_btree_node_get_nchildren(node);
896         rnchildren = nilfs_btree_node_get_nchildren(right);
897         ncblk = nilfs_btree_nchildren_per_block(btree);
898         move = 0;
899
900         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
901         if (n > nchildren - path[level].bp_index) {
902                 /* move insert point */
903                 n--;
904                 move = 1;
905         }
906
907         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
908
909         if (!buffer_dirty(path[level].bp_bh))
910                 mark_buffer_dirty(path[level].bp_bh);
911         if (!buffer_dirty(path[level].bp_sib_bh))
912                 mark_buffer_dirty(path[level].bp_sib_bh);
913
914         path[level + 1].bp_index++;
915         nilfs_btree_promote_key(btree, path, level + 1,
916                                 nilfs_btree_node_get_key(right, 0));
917         path[level + 1].bp_index--;
918
919         if (move) {
920                 brelse(path[level].bp_bh);
921                 path[level].bp_bh = path[level].bp_sib_bh;
922                 path[level].bp_sib_bh = NULL;
923                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
924                 path[level + 1].bp_index++;
925         } else {
926                 brelse(path[level].bp_sib_bh);
927                 path[level].bp_sib_bh = NULL;
928         }
929
930         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
931 }
932
933 static void nilfs_btree_split(struct nilfs_bmap *btree,
934                               struct nilfs_btree_path *path,
935                               int level, __u64 *keyp, __u64 *ptrp)
936 {
937         struct nilfs_btree_node *node, *right;
938         int nchildren, n, move, ncblk;
939
940         node = nilfs_btree_get_nonroot_node(path, level);
941         right = nilfs_btree_get_sib_node(path, level);
942         nchildren = nilfs_btree_node_get_nchildren(node);
943         ncblk = nilfs_btree_nchildren_per_block(btree);
944         move = 0;
945
946         n = (nchildren + 1) / 2;
947         if (n > nchildren - path[level].bp_index) {
948                 n--;
949                 move = 1;
950         }
951
952         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
953
954         if (!buffer_dirty(path[level].bp_bh))
955                 mark_buffer_dirty(path[level].bp_bh);
956         if (!buffer_dirty(path[level].bp_sib_bh))
957                 mark_buffer_dirty(path[level].bp_sib_bh);
958
959         if (move) {
960                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
961                 nilfs_btree_node_insert(right, path[level].bp_index,
962                                         *keyp, *ptrp, ncblk);
963
964                 *keyp = nilfs_btree_node_get_key(right, 0);
965                 *ptrp = path[level].bp_newreq.bpr_ptr;
966
967                 brelse(path[level].bp_bh);
968                 path[level].bp_bh = path[level].bp_sib_bh;
969                 path[level].bp_sib_bh = NULL;
970         } else {
971                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
972
973                 *keyp = nilfs_btree_node_get_key(right, 0);
974                 *ptrp = path[level].bp_newreq.bpr_ptr;
975
976                 brelse(path[level].bp_sib_bh);
977                 path[level].bp_sib_bh = NULL;
978         }
979
980         path[level + 1].bp_index++;
981 }
982
983 static void nilfs_btree_grow(struct nilfs_bmap *btree,
984                              struct nilfs_btree_path *path,
985                              int level, __u64 *keyp, __u64 *ptrp)
986 {
987         struct nilfs_btree_node *root, *child;
988         int n, ncblk;
989
990         root = nilfs_btree_get_root(btree);
991         child = nilfs_btree_get_sib_node(path, level);
992         ncblk = nilfs_btree_nchildren_per_block(btree);
993
994         n = nilfs_btree_node_get_nchildren(root);
995
996         nilfs_btree_node_move_right(root, child, n,
997                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
998         nilfs_btree_node_set_level(root, level + 1);
999
1000         if (!buffer_dirty(path[level].bp_sib_bh))
1001                 mark_buffer_dirty(path[level].bp_sib_bh);
1002
1003         path[level].bp_bh = path[level].bp_sib_bh;
1004         path[level].bp_sib_bh = NULL;
1005
1006         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1007
1008         *keyp = nilfs_btree_node_get_key(child, 0);
1009         *ptrp = path[level].bp_newreq.bpr_ptr;
1010 }
1011
1012 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1013                                    const struct nilfs_btree_path *path)
1014 {
1015         struct nilfs_btree_node *node;
1016         int level, ncmax;
1017
1018         if (path == NULL)
1019                 return NILFS_BMAP_INVALID_PTR;
1020
1021         /* left sibling */
1022         level = NILFS_BTREE_LEVEL_NODE_MIN;
1023         if (path[level].bp_index > 0) {
1024                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1025                 return nilfs_btree_node_get_ptr(node,
1026                                                 path[level].bp_index - 1,
1027                                                 ncmax);
1028         }
1029
1030         /* parent */
1031         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1032         if (level <= nilfs_btree_height(btree) - 1) {
1033                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1034                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1035                                                 ncmax);
1036         }
1037
1038         return NILFS_BMAP_INVALID_PTR;
1039 }
1040
1041 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1042                                        const struct nilfs_btree_path *path,
1043                                        __u64 key)
1044 {
1045         __u64 ptr;
1046
1047         ptr = nilfs_bmap_find_target_seq(btree, key);
1048         if (ptr != NILFS_BMAP_INVALID_PTR)
1049                 /* sequential access */
1050                 return ptr;
1051
1052         ptr = nilfs_btree_find_near(btree, path);
1053         if (ptr != NILFS_BMAP_INVALID_PTR)
1054                 /* near */
1055                 return ptr;
1056
1057         /* block group */
1058         return nilfs_bmap_find_target_in_group(btree);
1059 }
1060
1061 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1062                                       struct nilfs_btree_path *path,
1063                                       int *levelp, __u64 key, __u64 ptr,
1064                                       struct nilfs_bmap_stats *stats)
1065 {
1066         struct buffer_head *bh;
1067         struct nilfs_btree_node *node, *parent, *sib;
1068         __u64 sibptr;
1069         int pindex, level, ncmax, ncblk, ret;
1070         struct inode *dat = NULL;
1071
1072         stats->bs_nblocks = 0;
1073         level = NILFS_BTREE_LEVEL_DATA;
1074
1075         /* allocate a new ptr for data block */
1076         if (NILFS_BMAP_USE_VBN(btree)) {
1077                 path[level].bp_newreq.bpr_ptr =
1078                         nilfs_btree_find_target_v(btree, path, key);
1079                 dat = nilfs_bmap_get_dat(btree);
1080         }
1081
1082         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1083         if (ret < 0)
1084                 goto err_out_data;
1085
1086         ncblk = nilfs_btree_nchildren_per_block(btree);
1087
1088         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1089              level < nilfs_btree_height(btree) - 1;
1090              level++) {
1091                 node = nilfs_btree_get_nonroot_node(path, level);
1092                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1093                         path[level].bp_op = nilfs_btree_do_insert;
1094                         stats->bs_nblocks++;
1095                         goto out;
1096                 }
1097
1098                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1099                 pindex = path[level + 1].bp_index;
1100
1101                 /* left sibling */
1102                 if (pindex > 0) {
1103                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1104                                                           ncmax);
1105                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1106                         if (ret < 0)
1107                                 goto err_out_child_node;
1108                         sib = (struct nilfs_btree_node *)bh->b_data;
1109                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1110                                 path[level].bp_sib_bh = bh;
1111                                 path[level].bp_op = nilfs_btree_carry_left;
1112                                 stats->bs_nblocks++;
1113                                 goto out;
1114                         } else {
1115                                 brelse(bh);
1116                         }
1117                 }
1118
1119                 /* right sibling */
1120                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1121                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1122                                                           ncmax);
1123                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1124                         if (ret < 0)
1125                                 goto err_out_child_node;
1126                         sib = (struct nilfs_btree_node *)bh->b_data;
1127                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1128                                 path[level].bp_sib_bh = bh;
1129                                 path[level].bp_op = nilfs_btree_carry_right;
1130                                 stats->bs_nblocks++;
1131                                 goto out;
1132                         } else {
1133                                 brelse(bh);
1134                         }
1135                 }
1136
1137                 /* split */
1138                 path[level].bp_newreq.bpr_ptr =
1139                         path[level - 1].bp_newreq.bpr_ptr + 1;
1140                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1141                                                    &path[level].bp_newreq, dat);
1142                 if (ret < 0)
1143                         goto err_out_child_node;
1144                 ret = nilfs_btree_get_new_block(btree,
1145                                                 path[level].bp_newreq.bpr_ptr,
1146                                                 &bh);
1147                 if (ret < 0)
1148                         goto err_out_curr_node;
1149
1150                 stats->bs_nblocks++;
1151
1152                 sib = (struct nilfs_btree_node *)bh->b_data;
1153                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1154                 path[level].bp_sib_bh = bh;
1155                 path[level].bp_op = nilfs_btree_split;
1156         }
1157
1158         /* root */
1159         node = nilfs_btree_get_root(btree);
1160         if (nilfs_btree_node_get_nchildren(node) <
1161             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1162                 path[level].bp_op = nilfs_btree_do_insert;
1163                 stats->bs_nblocks++;
1164                 goto out;
1165         }
1166
1167         /* grow */
1168         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1169         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1170         if (ret < 0)
1171                 goto err_out_child_node;
1172         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1173                                         &bh);
1174         if (ret < 0)
1175                 goto err_out_curr_node;
1176
1177         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1178                               0, level, 0, ncblk, NULL, NULL);
1179         path[level].bp_sib_bh = bh;
1180         path[level].bp_op = nilfs_btree_grow;
1181
1182         level++;
1183         path[level].bp_op = nilfs_btree_do_insert;
1184
1185         /* a newly-created node block and a data block are added */
1186         stats->bs_nblocks += 2;
1187
1188         /* success */
1189  out:
1190         *levelp = level;
1191         return ret;
1192
1193         /* error */
1194  err_out_curr_node:
1195         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1196  err_out_child_node:
1197         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1198                 nilfs_btnode_delete(path[level].bp_sib_bh);
1199                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1200
1201         }
1202
1203         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1204  err_out_data:
1205         *levelp = level;
1206         stats->bs_nblocks = 0;
1207         return ret;
1208 }
1209
1210 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1211                                       struct nilfs_btree_path *path,
1212                                       int maxlevel, __u64 key, __u64 ptr)
1213 {
1214         struct inode *dat = NULL;
1215         int level;
1216
1217         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1218         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1219         if (NILFS_BMAP_USE_VBN(btree)) {
1220                 nilfs_bmap_set_target_v(btree, key, ptr);
1221                 dat = nilfs_bmap_get_dat(btree);
1222         }
1223
1224         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1225                 nilfs_bmap_commit_alloc_ptr(btree,
1226                                             &path[level - 1].bp_newreq, dat);
1227                 path[level].bp_op(btree, path, level, &key, &ptr);
1228         }
1229
1230         if (!nilfs_bmap_dirty(btree))
1231                 nilfs_bmap_set_dirty(btree);
1232 }
1233
1234 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1235 {
1236         struct nilfs_btree_path *path;
1237         struct nilfs_bmap_stats stats;
1238         int level, ret;
1239
1240         path = nilfs_btree_alloc_path();
1241         if (path == NULL)
1242                 return -ENOMEM;
1243
1244         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1245                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1246         if (ret != -ENOENT) {
1247                 if (ret == 0)
1248                         ret = -EEXIST;
1249                 goto out;
1250         }
1251
1252         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1253         if (ret < 0)
1254                 goto out;
1255         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1256         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1257
1258  out:
1259         nilfs_btree_free_path(path);
1260         return ret;
1261 }
1262
1263 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1264                                   struct nilfs_btree_path *path,
1265                                   int level, __u64 *keyp, __u64 *ptrp)
1266 {
1267         struct nilfs_btree_node *node;
1268         int ncblk;
1269
1270         if (level < nilfs_btree_height(btree) - 1) {
1271                 node = nilfs_btree_get_nonroot_node(path, level);
1272                 ncblk = nilfs_btree_nchildren_per_block(btree);
1273                 nilfs_btree_node_delete(node, path[level].bp_index,
1274                                         keyp, ptrp, ncblk);
1275                 if (!buffer_dirty(path[level].bp_bh))
1276                         mark_buffer_dirty(path[level].bp_bh);
1277                 if (path[level].bp_index == 0)
1278                         nilfs_btree_promote_key(btree, path, level + 1,
1279                                 nilfs_btree_node_get_key(node, 0));
1280         } else {
1281                 node = nilfs_btree_get_root(btree);
1282                 nilfs_btree_node_delete(node, path[level].bp_index,
1283                                         keyp, ptrp,
1284                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1285         }
1286 }
1287
1288 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1289                                     struct nilfs_btree_path *path,
1290                                     int level, __u64 *keyp, __u64 *ptrp)
1291 {
1292         struct nilfs_btree_node *node, *left;
1293         int nchildren, lnchildren, n, ncblk;
1294
1295         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1296
1297         node = nilfs_btree_get_nonroot_node(path, level);
1298         left = nilfs_btree_get_sib_node(path, level);
1299         nchildren = nilfs_btree_node_get_nchildren(node);
1300         lnchildren = nilfs_btree_node_get_nchildren(left);
1301         ncblk = nilfs_btree_nchildren_per_block(btree);
1302
1303         n = (nchildren + lnchildren) / 2 - nchildren;
1304
1305         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1306
1307         if (!buffer_dirty(path[level].bp_bh))
1308                 mark_buffer_dirty(path[level].bp_bh);
1309         if (!buffer_dirty(path[level].bp_sib_bh))
1310                 mark_buffer_dirty(path[level].bp_sib_bh);
1311
1312         nilfs_btree_promote_key(btree, path, level + 1,
1313                                 nilfs_btree_node_get_key(node, 0));
1314
1315         brelse(path[level].bp_sib_bh);
1316         path[level].bp_sib_bh = NULL;
1317         path[level].bp_index += n;
1318 }
1319
1320 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1321                                      struct nilfs_btree_path *path,
1322                                      int level, __u64 *keyp, __u64 *ptrp)
1323 {
1324         struct nilfs_btree_node *node, *right;
1325         int nchildren, rnchildren, n, ncblk;
1326
1327         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1328
1329         node = nilfs_btree_get_nonroot_node(path, level);
1330         right = nilfs_btree_get_sib_node(path, level);
1331         nchildren = nilfs_btree_node_get_nchildren(node);
1332         rnchildren = nilfs_btree_node_get_nchildren(right);
1333         ncblk = nilfs_btree_nchildren_per_block(btree);
1334
1335         n = (nchildren + rnchildren) / 2 - nchildren;
1336
1337         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1338
1339         if (!buffer_dirty(path[level].bp_bh))
1340                 mark_buffer_dirty(path[level].bp_bh);
1341         if (!buffer_dirty(path[level].bp_sib_bh))
1342                 mark_buffer_dirty(path[level].bp_sib_bh);
1343
1344         path[level + 1].bp_index++;
1345         nilfs_btree_promote_key(btree, path, level + 1,
1346                                 nilfs_btree_node_get_key(right, 0));
1347         path[level + 1].bp_index--;
1348
1349         brelse(path[level].bp_sib_bh);
1350         path[level].bp_sib_bh = NULL;
1351 }
1352
1353 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1354                                     struct nilfs_btree_path *path,
1355                                     int level, __u64 *keyp, __u64 *ptrp)
1356 {
1357         struct nilfs_btree_node *node, *left;
1358         int n, ncblk;
1359
1360         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1361
1362         node = nilfs_btree_get_nonroot_node(path, level);
1363         left = nilfs_btree_get_sib_node(path, level);
1364         ncblk = nilfs_btree_nchildren_per_block(btree);
1365
1366         n = nilfs_btree_node_get_nchildren(node);
1367
1368         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1369
1370         if (!buffer_dirty(path[level].bp_sib_bh))
1371                 mark_buffer_dirty(path[level].bp_sib_bh);
1372
1373         nilfs_btnode_delete(path[level].bp_bh);
1374         path[level].bp_bh = path[level].bp_sib_bh;
1375         path[level].bp_sib_bh = NULL;
1376         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1377 }
1378
1379 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1380                                      struct nilfs_btree_path *path,
1381                                      int level, __u64 *keyp, __u64 *ptrp)
1382 {
1383         struct nilfs_btree_node *node, *right;
1384         int n, ncblk;
1385
1386         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1387
1388         node = nilfs_btree_get_nonroot_node(path, level);
1389         right = nilfs_btree_get_sib_node(path, level);
1390         ncblk = nilfs_btree_nchildren_per_block(btree);
1391
1392         n = nilfs_btree_node_get_nchildren(right);
1393
1394         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1395
1396         if (!buffer_dirty(path[level].bp_bh))
1397                 mark_buffer_dirty(path[level].bp_bh);
1398
1399         nilfs_btnode_delete(path[level].bp_sib_bh);
1400         path[level].bp_sib_bh = NULL;
1401         path[level + 1].bp_index++;
1402 }
1403
1404 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1405                                struct nilfs_btree_path *path,
1406                                int level, __u64 *keyp, __u64 *ptrp)
1407 {
1408         struct nilfs_btree_node *root, *child;
1409         int n, ncblk;
1410
1411         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1412
1413         root = nilfs_btree_get_root(btree);
1414         child = nilfs_btree_get_nonroot_node(path, level);
1415         ncblk = nilfs_btree_nchildren_per_block(btree);
1416
1417         nilfs_btree_node_delete(root, 0, NULL, NULL,
1418                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1419         nilfs_btree_node_set_level(root, level);
1420         n = nilfs_btree_node_get_nchildren(child);
1421         nilfs_btree_node_move_left(root, child, n,
1422                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1423
1424         nilfs_btnode_delete(path[level].bp_bh);
1425         path[level].bp_bh = NULL;
1426 }
1427
1428 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1429                             struct nilfs_btree_path *path,
1430                             int level, __u64 *keyp, __u64 *ptrp)
1431 {
1432 }
1433
1434 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1435                                       struct nilfs_btree_path *path,
1436                                       int *levelp,
1437                                       struct nilfs_bmap_stats *stats,
1438                                       struct inode *dat)
1439 {
1440         struct buffer_head *bh;
1441         struct nilfs_btree_node *node, *parent, *sib;
1442         __u64 sibptr;
1443         int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1444
1445         ret = 0;
1446         stats->bs_nblocks = 0;
1447         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1448         ncblk = nilfs_btree_nchildren_per_block(btree);
1449
1450         for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1451              level < nilfs_btree_height(btree) - 1;
1452              level++) {
1453                 node = nilfs_btree_get_nonroot_node(path, level);
1454                 path[level].bp_oldreq.bpr_ptr =
1455                         nilfs_btree_node_get_ptr(node, dindex, ncblk);
1456                 ret = nilfs_bmap_prepare_end_ptr(btree,
1457                                                  &path[level].bp_oldreq, dat);
1458                 if (ret < 0)
1459                         goto err_out_child_node;
1460
1461                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1462                         path[level].bp_op = nilfs_btree_do_delete;
1463                         stats->bs_nblocks++;
1464                         goto out;
1465                 }
1466
1467                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1468                 pindex = path[level + 1].bp_index;
1469                 dindex = pindex;
1470
1471                 if (pindex > 0) {
1472                         /* left sibling */
1473                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1474                                                           ncmax);
1475                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1476                         if (ret < 0)
1477                                 goto err_out_curr_node;
1478                         sib = (struct nilfs_btree_node *)bh->b_data;
1479                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1480                                 path[level].bp_sib_bh = bh;
1481                                 path[level].bp_op = nilfs_btree_borrow_left;
1482                                 stats->bs_nblocks++;
1483                                 goto out;
1484                         } else {
1485                                 path[level].bp_sib_bh = bh;
1486                                 path[level].bp_op = nilfs_btree_concat_left;
1487                                 stats->bs_nblocks++;
1488                                 /* continue; */
1489                         }
1490                 } else if (pindex <
1491                            nilfs_btree_node_get_nchildren(parent) - 1) {
1492                         /* right sibling */
1493                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1494                                                           ncmax);
1495                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1496                         if (ret < 0)
1497                                 goto err_out_curr_node;
1498                         sib = (struct nilfs_btree_node *)bh->b_data;
1499                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1500                                 path[level].bp_sib_bh = bh;
1501                                 path[level].bp_op = nilfs_btree_borrow_right;
1502                                 stats->bs_nblocks++;
1503                                 goto out;
1504                         } else {
1505                                 path[level].bp_sib_bh = bh;
1506                                 path[level].bp_op = nilfs_btree_concat_right;
1507                                 stats->bs_nblocks++;
1508                                 /*
1509                                  * When merging right sibling node
1510                                  * into the current node, pointer to
1511                                  * the right sibling node must be
1512                                  * terminated instead.  The adjustment
1513                                  * below is required for that.
1514                                  */
1515                                 dindex = pindex + 1;
1516                                 /* continue; */
1517                         }
1518                 } else {
1519                         /* no siblings */
1520                         /* the only child of the root node */
1521                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1522                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1523                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1524                                 path[level].bp_op = nilfs_btree_shrink;
1525                                 stats->bs_nblocks += 2;
1526                                 level++;
1527                                 path[level].bp_op = nilfs_btree_nop;
1528                                 goto shrink_root_child;
1529                         } else {
1530                                 path[level].bp_op = nilfs_btree_do_delete;
1531                                 stats->bs_nblocks++;
1532                                 goto out;
1533                         }
1534                 }
1535         }
1536
1537         /* child of the root node is deleted */
1538         path[level].bp_op = nilfs_btree_do_delete;
1539         stats->bs_nblocks++;
1540
1541 shrink_root_child:
1542         node = nilfs_btree_get_root(btree);
1543         path[level].bp_oldreq.bpr_ptr =
1544                 nilfs_btree_node_get_ptr(node, dindex,
1545                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1546
1547         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1548         if (ret < 0)
1549                 goto err_out_child_node;
1550
1551         /* success */
1552  out:
1553         *levelp = level;
1554         return ret;
1555
1556         /* error */
1557  err_out_curr_node:
1558         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1559  err_out_child_node:
1560         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1561                 brelse(path[level].bp_sib_bh);
1562                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1563         }
1564         *levelp = level;
1565         stats->bs_nblocks = 0;
1566         return ret;
1567 }
1568
1569 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1570                                       struct nilfs_btree_path *path,
1571                                       int maxlevel, struct inode *dat)
1572 {
1573         int level;
1574
1575         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1576                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1577                 path[level].bp_op(btree, path, level, NULL, NULL);
1578         }
1579
1580         if (!nilfs_bmap_dirty(btree))
1581                 nilfs_bmap_set_dirty(btree);
1582 }
1583
1584 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1585
1586 {
1587         struct nilfs_btree_path *path;
1588         struct nilfs_bmap_stats stats;
1589         struct inode *dat;
1590         int level, ret;
1591
1592         path = nilfs_btree_alloc_path();
1593         if (path == NULL)
1594                 return -ENOMEM;
1595
1596         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1597                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1598         if (ret < 0)
1599                 goto out;
1600
1601
1602         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1603
1604         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1605         if (ret < 0)
1606                 goto out;
1607         nilfs_btree_commit_delete(btree, path, level, dat);
1608         nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1609
1610 out:
1611         nilfs_btree_free_path(path);
1612         return ret;
1613 }
1614
1615 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1616                                 __u64 *keyp)
1617 {
1618         struct nilfs_btree_path *path;
1619         const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1620         int ret;
1621
1622         path = nilfs_btree_alloc_path();
1623         if (!path)
1624                 return -ENOMEM;
1625
1626         ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1627         if (!ret)
1628                 *keyp = start;
1629         else if (ret == -ENOENT)
1630                 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1631
1632         nilfs_btree_free_path(path);
1633         return ret;
1634 }
1635
1636 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1637 {
1638         struct nilfs_btree_path *path;
1639         int ret;
1640
1641         path = nilfs_btree_alloc_path();
1642         if (path == NULL)
1643                 return -ENOMEM;
1644
1645         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1646
1647         nilfs_btree_free_path(path);
1648
1649         return ret;
1650 }
1651
1652 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1653 {
1654         struct buffer_head *bh;
1655         struct nilfs_btree_node *root, *node;
1656         __u64 maxkey, nextmaxkey;
1657         __u64 ptr;
1658         int nchildren, ret;
1659
1660         root = nilfs_btree_get_root(btree);
1661         switch (nilfs_btree_height(btree)) {
1662         case 2:
1663                 bh = NULL;
1664                 node = root;
1665                 break;
1666         case 3:
1667                 nchildren = nilfs_btree_node_get_nchildren(root);
1668                 if (nchildren > 1)
1669                         return 0;
1670                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1671                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1672                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1673                 if (ret < 0)
1674                         return ret;
1675                 node = (struct nilfs_btree_node *)bh->b_data;
1676                 break;
1677         default:
1678                 return 0;
1679         }
1680
1681         nchildren = nilfs_btree_node_get_nchildren(node);
1682         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1683         nextmaxkey = (nchildren > 1) ?
1684                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1685         brelse(bh);
1686
1687         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1688 }
1689
1690 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1691                                    __u64 *keys, __u64 *ptrs, int nitems)
1692 {
1693         struct buffer_head *bh;
1694         struct nilfs_btree_node *node, *root;
1695         __le64 *dkeys;
1696         __le64 *dptrs;
1697         __u64 ptr;
1698         int nchildren, ncmax, i, ret;
1699
1700         root = nilfs_btree_get_root(btree);
1701         switch (nilfs_btree_height(btree)) {
1702         case 2:
1703                 bh = NULL;
1704                 node = root;
1705                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1706                 break;
1707         case 3:
1708                 nchildren = nilfs_btree_node_get_nchildren(root);
1709                 WARN_ON(nchildren > 1);
1710                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1711                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1712                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1713                 if (ret < 0)
1714                         return ret;
1715                 node = (struct nilfs_btree_node *)bh->b_data;
1716                 ncmax = nilfs_btree_nchildren_per_block(btree);
1717                 break;
1718         default:
1719                 node = NULL;
1720                 return -EINVAL;
1721         }
1722
1723         nchildren = nilfs_btree_node_get_nchildren(node);
1724         if (nchildren < nitems)
1725                 nitems = nchildren;
1726         dkeys = nilfs_btree_node_dkeys(node);
1727         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1728         for (i = 0; i < nitems; i++) {
1729                 keys[i] = le64_to_cpu(dkeys[i]);
1730                 ptrs[i] = le64_to_cpu(dptrs[i]);
1731         }
1732
1733         brelse(bh);
1734
1735         return nitems;
1736 }
1737
1738 static int
1739 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1740                                        union nilfs_bmap_ptr_req *dreq,
1741                                        union nilfs_bmap_ptr_req *nreq,
1742                                        struct buffer_head **bhp,
1743                                        struct nilfs_bmap_stats *stats)
1744 {
1745         struct buffer_head *bh;
1746         struct inode *dat = NULL;
1747         int ret;
1748
1749         stats->bs_nblocks = 0;
1750
1751         /* for data */
1752         /* cannot find near ptr */
1753         if (NILFS_BMAP_USE_VBN(btree)) {
1754                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1755                 dat = nilfs_bmap_get_dat(btree);
1756         }
1757
1758         ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1759         if (ret < 0)
1760                 return ret;
1761
1762         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1763         if (ret < 0)
1764                 return ret;
1765
1766         *bhp = NULL;
1767         stats->bs_nblocks++;
1768         if (nreq != NULL) {
1769                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1770                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1771                 if (ret < 0)
1772                         goto err_out_dreq;
1773
1774                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1775                 if (ret < 0)
1776                         goto err_out_nreq;
1777
1778                 *bhp = bh;
1779                 stats->bs_nblocks++;
1780         }
1781
1782         /* success */
1783         return 0;
1784
1785         /* error */
1786  err_out_nreq:
1787         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1788  err_out_dreq:
1789         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1790         stats->bs_nblocks = 0;
1791         return ret;
1792
1793 }
1794
1795 static void
1796 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1797                                       __u64 key, __u64 ptr,
1798                                       const __u64 *keys, const __u64 *ptrs,
1799                                       int n,
1800                                       union nilfs_bmap_ptr_req *dreq,
1801                                       union nilfs_bmap_ptr_req *nreq,
1802                                       struct buffer_head *bh)
1803 {
1804         struct nilfs_btree_node *node;
1805         struct inode *dat;
1806         __u64 tmpptr;
1807         int ncblk;
1808
1809         /* free resources */
1810         if (btree->b_ops->bop_clear != NULL)
1811                 btree->b_ops->bop_clear(btree);
1812
1813         /* ptr must be a pointer to a buffer head. */
1814         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1815
1816         /* convert and insert */
1817         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1818         __nilfs_btree_init(btree);
1819         if (nreq != NULL) {
1820                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1821                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1822
1823                 /* create child node at level 1 */
1824                 node = (struct nilfs_btree_node *)bh->b_data;
1825                 ncblk = nilfs_btree_nchildren_per_block(btree);
1826                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1827                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1828                 if (!buffer_dirty(bh))
1829                         mark_buffer_dirty(bh);
1830                 if (!nilfs_bmap_dirty(btree))
1831                         nilfs_bmap_set_dirty(btree);
1832
1833                 brelse(bh);
1834
1835                 /* create root node at level 2 */
1836                 node = nilfs_btree_get_root(btree);
1837                 tmpptr = nreq->bpr_ptr;
1838                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1839                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1840                                       &keys[0], &tmpptr);
1841         } else {
1842                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1843
1844                 /* create root node at level 1 */
1845                 node = nilfs_btree_get_root(btree);
1846                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1847                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1848                                       keys, ptrs);
1849                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1850                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1851                 if (!nilfs_bmap_dirty(btree))
1852                         nilfs_bmap_set_dirty(btree);
1853         }
1854
1855         if (NILFS_BMAP_USE_VBN(btree))
1856                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1857 }
1858
1859 /**
1860  * nilfs_btree_convert_and_insert -
1861  * @bmap:
1862  * @key:
1863  * @ptr:
1864  * @keys:
1865  * @ptrs:
1866  * @n:
1867  */
1868 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1869                                    __u64 key, __u64 ptr,
1870                                    const __u64 *keys, const __u64 *ptrs, int n)
1871 {
1872         struct buffer_head *bh = NULL;
1873         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1874         struct nilfs_bmap_stats stats;
1875         int ret;
1876
1877         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1878                 di = &dreq;
1879                 ni = NULL;
1880         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1881                            nilfs_btree_node_size(btree))) {
1882                 di = &dreq;
1883                 ni = &nreq;
1884         } else {
1885                 di = NULL;
1886                 ni = NULL;
1887                 BUG();
1888         }
1889
1890         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1891                                                      &stats);
1892         if (ret < 0)
1893                 return ret;
1894         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1895                                               di, ni, bh);
1896         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1897         return 0;
1898 }
1899
1900 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1901                                    struct nilfs_btree_path *path,
1902                                    int level,
1903                                    struct buffer_head *bh)
1904 {
1905         while ((++level < nilfs_btree_height(btree) - 1) &&
1906                !buffer_dirty(path[level].bp_bh))
1907                 mark_buffer_dirty(path[level].bp_bh);
1908
1909         return 0;
1910 }
1911
1912 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1913                                         struct nilfs_btree_path *path,
1914                                         int level, struct inode *dat)
1915 {
1916         struct nilfs_btree_node *parent;
1917         int ncmax, ret;
1918
1919         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1920         path[level].bp_oldreq.bpr_ptr =
1921                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1922                                          ncmax);
1923         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1924         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1925                                        &path[level].bp_newreq.bpr_req);
1926         if (ret < 0)
1927                 return ret;
1928
1929         if (buffer_nilfs_node(path[level].bp_bh)) {
1930                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1931                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1932                 path[level].bp_ctxt.bh = path[level].bp_bh;
1933                 ret = nilfs_btnode_prepare_change_key(
1934                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1935                         &path[level].bp_ctxt);
1936                 if (ret < 0) {
1937                         nilfs_dat_abort_update(dat,
1938                                                &path[level].bp_oldreq.bpr_req,
1939                                                &path[level].bp_newreq.bpr_req);
1940                         return ret;
1941                 }
1942         }
1943
1944         return 0;
1945 }
1946
1947 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1948                                         struct nilfs_btree_path *path,
1949                                         int level, struct inode *dat)
1950 {
1951         struct nilfs_btree_node *parent;
1952         int ncmax;
1953
1954         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1955                                 &path[level].bp_newreq.bpr_req,
1956                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1957
1958         if (buffer_nilfs_node(path[level].bp_bh)) {
1959                 nilfs_btnode_commit_change_key(
1960                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1961                         &path[level].bp_ctxt);
1962                 path[level].bp_bh = path[level].bp_ctxt.bh;
1963         }
1964         set_buffer_nilfs_volatile(path[level].bp_bh);
1965
1966         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1967         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1968                                  path[level].bp_newreq.bpr_ptr, ncmax);
1969 }
1970
1971 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1972                                        struct nilfs_btree_path *path,
1973                                        int level, struct inode *dat)
1974 {
1975         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1976                                &path[level].bp_newreq.bpr_req);
1977         if (buffer_nilfs_node(path[level].bp_bh))
1978                 nilfs_btnode_abort_change_key(
1979                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1980                         &path[level].bp_ctxt);
1981 }
1982
1983 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1984                                            struct nilfs_btree_path *path,
1985                                            int minlevel, int *maxlevelp,
1986                                            struct inode *dat)
1987 {
1988         int level, ret;
1989
1990         level = minlevel;
1991         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1992                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1993                 if (ret < 0)
1994                         return ret;
1995         }
1996         while ((++level < nilfs_btree_height(btree) - 1) &&
1997                !buffer_dirty(path[level].bp_bh)) {
1998
1999                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
2000                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
2001                 if (ret < 0)
2002                         goto out;
2003         }
2004
2005         /* success */
2006         *maxlevelp = level - 1;
2007         return 0;
2008
2009         /* error */
2010  out:
2011         while (--level > minlevel)
2012                 nilfs_btree_abort_update_v(btree, path, level, dat);
2013         if (!buffer_nilfs_volatile(path[level].bp_bh))
2014                 nilfs_btree_abort_update_v(btree, path, level, dat);
2015         return ret;
2016 }
2017
2018 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2019                                            struct nilfs_btree_path *path,
2020                                            int minlevel, int maxlevel,
2021                                            struct buffer_head *bh,
2022                                            struct inode *dat)
2023 {
2024         int level;
2025
2026         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2027                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2028
2029         for (level = minlevel + 1; level <= maxlevel; level++)
2030                 nilfs_btree_commit_update_v(btree, path, level, dat);
2031 }
2032
2033 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2034                                    struct nilfs_btree_path *path,
2035                                    int level, struct buffer_head *bh)
2036 {
2037         int maxlevel = 0, ret;
2038         struct nilfs_btree_node *parent;
2039         struct inode *dat = nilfs_bmap_get_dat(btree);
2040         __u64 ptr;
2041         int ncmax;
2042
2043         get_bh(bh);
2044         path[level].bp_bh = bh;
2045         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2046                                               dat);
2047         if (ret < 0)
2048                 goto out;
2049
2050         if (buffer_nilfs_volatile(path[level].bp_bh)) {
2051                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2052                 ptr = nilfs_btree_node_get_ptr(parent,
2053                                                path[level + 1].bp_index,
2054                                                ncmax);
2055                 ret = nilfs_dat_mark_dirty(dat, ptr);
2056                 if (ret < 0)
2057                         goto out;
2058         }
2059
2060         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2061
2062  out:
2063         brelse(path[level].bp_bh);
2064         path[level].bp_bh = NULL;
2065         return ret;
2066 }
2067
2068 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2069                                  struct buffer_head *bh)
2070 {
2071         struct nilfs_btree_path *path;
2072         struct nilfs_btree_node *node;
2073         __u64 key;
2074         int level, ret;
2075
2076         WARN_ON(!buffer_dirty(bh));
2077
2078         path = nilfs_btree_alloc_path();
2079         if (path == NULL)
2080                 return -ENOMEM;
2081
2082         if (buffer_nilfs_node(bh)) {
2083                 node = (struct nilfs_btree_node *)bh->b_data;
2084                 key = nilfs_btree_node_get_key(node, 0);
2085                 level = nilfs_btree_node_get_level(node);
2086         } else {
2087                 key = nilfs_bmap_data_get_key(btree, bh);
2088                 level = NILFS_BTREE_LEVEL_DATA;
2089         }
2090
2091         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2092         if (ret < 0) {
2093                 if (unlikely(ret == -ENOENT))
2094                         nilfs_crit(btree->b_inode->i_sb,
2095                                    "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2096                                    btree->b_inode->i_ino,
2097                                    (unsigned long long)key, level);
2098                 goto out;
2099         }
2100
2101         ret = NILFS_BMAP_USE_VBN(btree) ?
2102                 nilfs_btree_propagate_v(btree, path, level, bh) :
2103                 nilfs_btree_propagate_p(btree, path, level, bh);
2104
2105  out:
2106         nilfs_btree_free_path(path);
2107
2108         return ret;
2109 }
2110
2111 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2112                                     struct buffer_head *bh)
2113 {
2114         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2115 }
2116
2117 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2118                                          struct list_head *lists,
2119                                          struct buffer_head *bh)
2120 {
2121         struct list_head *head;
2122         struct buffer_head *cbh;
2123         struct nilfs_btree_node *node, *cnode;
2124         __u64 key, ckey;
2125         int level;
2126
2127         get_bh(bh);
2128         node = (struct nilfs_btree_node *)bh->b_data;
2129         key = nilfs_btree_node_get_key(node, 0);
2130         level = nilfs_btree_node_get_level(node);
2131         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2132             level >= NILFS_BTREE_LEVEL_MAX) {
2133                 dump_stack();
2134                 nilfs_warn(btree->b_inode->i_sb,
2135                            "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2136                            level, (unsigned long long)key,
2137                            btree->b_inode->i_ino,
2138                            (unsigned long long)bh->b_blocknr);
2139                 return;
2140         }
2141
2142         list_for_each(head, &lists[level]) {
2143                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2144                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2145                 ckey = nilfs_btree_node_get_key(cnode, 0);
2146                 if (key < ckey)
2147                         break;
2148         }
2149         list_add_tail(&bh->b_assoc_buffers, head);
2150 }
2151
2152 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2153                                              struct list_head *listp)
2154 {
2155         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2156         struct address_space *btcache = btnc_inode->i_mapping;
2157         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2158         struct folio_batch fbatch;
2159         struct buffer_head *bh, *head;
2160         pgoff_t index = 0;
2161         int level, i;
2162
2163         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2164              level < NILFS_BTREE_LEVEL_MAX;
2165              level++)
2166                 INIT_LIST_HEAD(&lists[level]);
2167
2168         folio_batch_init(&fbatch);
2169
2170         while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1,
2171                                 PAGECACHE_TAG_DIRTY, &fbatch)) {
2172                 for (i = 0; i < folio_batch_count(&fbatch); i++) {
2173                         bh = head = folio_buffers(fbatch.folios[i]);
2174                         do {
2175                                 if (buffer_dirty(bh))
2176                                         nilfs_btree_add_dirty_buffer(btree,
2177                                                                      lists, bh);
2178                         } while ((bh = bh->b_this_page) != head);
2179                 }
2180                 folio_batch_release(&fbatch);
2181                 cond_resched();
2182         }
2183
2184         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2185              level < NILFS_BTREE_LEVEL_MAX;
2186              level++)
2187                 list_splice_tail(&lists[level], listp);
2188 }
2189
2190 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2191                                 struct nilfs_btree_path *path,
2192                                 int level,
2193                                 struct buffer_head **bh,
2194                                 sector_t blocknr,
2195                                 union nilfs_binfo *binfo)
2196 {
2197         struct nilfs_btree_node *parent;
2198         __u64 key;
2199         __u64 ptr;
2200         int ncmax, ret;
2201
2202         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2203         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2204                                        ncmax);
2205         if (buffer_nilfs_node(*bh)) {
2206                 path[level].bp_ctxt.oldkey = ptr;
2207                 path[level].bp_ctxt.newkey = blocknr;
2208                 path[level].bp_ctxt.bh = *bh;
2209                 ret = nilfs_btnode_prepare_change_key(
2210                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2211                         &path[level].bp_ctxt);
2212                 if (ret < 0)
2213                         return ret;
2214                 nilfs_btnode_commit_change_key(
2215                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2216                         &path[level].bp_ctxt);
2217                 *bh = path[level].bp_ctxt.bh;
2218         }
2219
2220         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2221                                  ncmax);
2222
2223         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2224         /* on-disk format */
2225         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2226         binfo->bi_dat.bi_level = level;
2227         memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
2228
2229         return 0;
2230 }
2231
2232 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2233                                 struct nilfs_btree_path *path,
2234                                 int level,
2235                                 struct buffer_head **bh,
2236                                 sector_t blocknr,
2237                                 union nilfs_binfo *binfo)
2238 {
2239         struct nilfs_btree_node *parent;
2240         struct inode *dat = nilfs_bmap_get_dat(btree);
2241         __u64 key;
2242         __u64 ptr;
2243         union nilfs_bmap_ptr_req req;
2244         int ncmax, ret;
2245
2246         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2247         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2248                                        ncmax);
2249         req.bpr_ptr = ptr;
2250         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2251         if (ret < 0)
2252                 return ret;
2253         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2254
2255         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2256         /* on-disk format */
2257         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2258         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2259
2260         return 0;
2261 }
2262
2263 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2264                               struct buffer_head **bh,
2265                               sector_t blocknr,
2266                               union nilfs_binfo *binfo)
2267 {
2268         struct nilfs_btree_path *path;
2269         struct nilfs_btree_node *node;
2270         __u64 key;
2271         int level, ret;
2272
2273         path = nilfs_btree_alloc_path();
2274         if (path == NULL)
2275                 return -ENOMEM;
2276
2277         if (buffer_nilfs_node(*bh)) {
2278                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2279                 key = nilfs_btree_node_get_key(node, 0);
2280                 level = nilfs_btree_node_get_level(node);
2281         } else {
2282                 key = nilfs_bmap_data_get_key(btree, *bh);
2283                 level = NILFS_BTREE_LEVEL_DATA;
2284         }
2285
2286         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2287         if (ret < 0) {
2288                 WARN_ON(ret == -ENOENT);
2289                 goto out;
2290         }
2291
2292         ret = NILFS_BMAP_USE_VBN(btree) ?
2293                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2294                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2295
2296  out:
2297         nilfs_btree_free_path(path);
2298
2299         return ret;
2300 }
2301
2302 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2303                                  struct buffer_head **bh,
2304                                  sector_t blocknr,
2305                                  union nilfs_binfo *binfo)
2306 {
2307         struct nilfs_btree_node *node;
2308         __u64 key;
2309         int ret;
2310
2311         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2312                              blocknr);
2313         if (ret < 0)
2314                 return ret;
2315
2316         if (buffer_nilfs_node(*bh)) {
2317                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2318                 key = nilfs_btree_node_get_key(node, 0);
2319         } else
2320                 key = nilfs_bmap_data_get_key(btree, *bh);
2321
2322         /* on-disk format */
2323         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2324         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2325
2326         return 0;
2327 }
2328
2329 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2330 {
2331         struct buffer_head *bh;
2332         struct nilfs_btree_path *path;
2333         __u64 ptr;
2334         int ret;
2335
2336         path = nilfs_btree_alloc_path();
2337         if (path == NULL)
2338                 return -ENOMEM;
2339
2340         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2341         if (ret < 0) {
2342                 WARN_ON(ret == -ENOENT);
2343                 goto out;
2344         }
2345         ret = nilfs_btree_get_block(btree, ptr, &bh);
2346         if (ret < 0) {
2347                 WARN_ON(ret == -ENOENT);
2348                 goto out;
2349         }
2350
2351         if (!buffer_dirty(bh))
2352                 mark_buffer_dirty(bh);
2353         brelse(bh);
2354         if (!nilfs_bmap_dirty(btree))
2355                 nilfs_bmap_set_dirty(btree);
2356
2357  out:
2358         nilfs_btree_free_path(path);
2359         return ret;
2360 }
2361
2362 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2363         .bop_lookup             =       nilfs_btree_lookup,
2364         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2365         .bop_insert             =       nilfs_btree_insert,
2366         .bop_delete             =       nilfs_btree_delete,
2367         .bop_clear              =       NULL,
2368
2369         .bop_propagate          =       nilfs_btree_propagate,
2370
2371         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2372
2373         .bop_assign             =       nilfs_btree_assign,
2374         .bop_mark               =       nilfs_btree_mark,
2375
2376         .bop_seek_key           =       nilfs_btree_seek_key,
2377         .bop_last_key           =       nilfs_btree_last_key,
2378
2379         .bop_check_insert       =       NULL,
2380         .bop_check_delete       =       nilfs_btree_check_delete,
2381         .bop_gather_data        =       nilfs_btree_gather_data,
2382 };
2383
2384 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2385         .bop_lookup             =       NULL,
2386         .bop_lookup_contig      =       NULL,
2387         .bop_insert             =       NULL,
2388         .bop_delete             =       NULL,
2389         .bop_clear              =       NULL,
2390
2391         .bop_propagate          =       nilfs_btree_propagate_gc,
2392
2393         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2394
2395         .bop_assign             =       nilfs_btree_assign_gc,
2396         .bop_mark               =       NULL,
2397
2398         .bop_seek_key           =       NULL,
2399         .bop_last_key           =       NULL,
2400
2401         .bop_check_insert       =       NULL,
2402         .bop_check_delete       =       NULL,
2403         .bop_gather_data        =       NULL,
2404 };
2405
2406 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2407 {
2408         bmap->b_ops = &nilfs_btree_ops;
2409         bmap->b_nchildren_per_block =
2410                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2411 }
2412
2413 int nilfs_btree_init(struct nilfs_bmap *bmap)
2414 {
2415         int ret = 0;
2416
2417         __nilfs_btree_init(bmap);
2418
2419         if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2420                 ret = -EIO;
2421         else
2422                 ret = nilfs_attach_btree_node_cache(
2423                         &NILFS_BMAP_I(bmap)->vfs_inode);
2424
2425         return ret;
2426 }
2427
2428 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2429 {
2430         bmap->b_ops = &nilfs_btree_ops_gc;
2431         bmap->b_nchildren_per_block =
2432                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2433 }