GNU Linux-libre 6.0.2-gnu
[releases.git] / fs / nilfs2 / btree.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * NILFS B-tree.
4  *
5  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6  *
7  * Written by Koji Sato.
8  */
9
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/pagevec.h>
14 #include "nilfs.h"
15 #include "page.h"
16 #include "btnode.h"
17 #include "btree.h"
18 #include "alloc.h"
19 #include "dat.h"
20
21 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
22
23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
24 {
25         struct nilfs_btree_path *path;
26         int level = NILFS_BTREE_LEVEL_DATA;
27
28         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
29         if (path == NULL)
30                 goto out;
31
32         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
33                 path[level].bp_bh = NULL;
34                 path[level].bp_sib_bh = NULL;
35                 path[level].bp_index = 0;
36                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
37                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
38                 path[level].bp_op = NULL;
39         }
40
41 out:
42         return path;
43 }
44
45 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
46 {
47         int level = NILFS_BTREE_LEVEL_DATA;
48
49         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
50                 brelse(path[level].bp_bh);
51
52         kmem_cache_free(nilfs_btree_path_cache, path);
53 }
54
55 /*
56  * B-tree node operations
57  */
58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
59                                      __u64 ptr, struct buffer_head **bhp)
60 {
61         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
62         struct address_space *btnc = btnc_inode->i_mapping;
63         struct buffer_head *bh;
64
65         bh = nilfs_btnode_create_block(btnc, ptr);
66         if (!bh)
67                 return -ENOMEM;
68
69         set_buffer_nilfs_volatile(bh);
70         *bhp = bh;
71         return 0;
72 }
73
74 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
75 {
76         return node->bn_flags;
77 }
78
79 static void
80 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
81 {
82         node->bn_flags = flags;
83 }
84
85 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
86 {
87         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
88 }
89
90 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
91 {
92         return node->bn_level;
93 }
94
95 static void
96 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
97 {
98         node->bn_level = level;
99 }
100
101 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
102 {
103         return le16_to_cpu(node->bn_nchildren);
104 }
105
106 static void
107 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
108 {
109         node->bn_nchildren = cpu_to_le16(nchildren);
110 }
111
112 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
113 {
114         return i_blocksize(btree->b_inode);
115 }
116
117 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
118 {
119         return btree->b_nchildren_per_block;
120 }
121
122 static __le64 *
123 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
124 {
125         return (__le64 *)((char *)(node + 1) +
126                           (nilfs_btree_node_root(node) ?
127                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
128 }
129
130 static __le64 *
131 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
132 {
133         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
134 }
135
136 static __u64
137 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
138 {
139         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
140 }
141
142 static void
143 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
144 {
145         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
146 }
147
148 static __u64
149 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
150                          int ncmax)
151 {
152         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
153 }
154
155 static void
156 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
157                          int ncmax)
158 {
159         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
160 }
161
162 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
163                                   int level, int nchildren, int ncmax,
164                                   const __u64 *keys, const __u64 *ptrs)
165 {
166         __le64 *dkeys;
167         __le64 *dptrs;
168         int i;
169
170         nilfs_btree_node_set_flags(node, flags);
171         nilfs_btree_node_set_level(node, level);
172         nilfs_btree_node_set_nchildren(node, nchildren);
173
174         dkeys = nilfs_btree_node_dkeys(node);
175         dptrs = nilfs_btree_node_dptrs(node, ncmax);
176         for (i = 0; i < nchildren; i++) {
177                 dkeys[i] = cpu_to_le64(keys[i]);
178                 dptrs[i] = cpu_to_le64(ptrs[i]);
179         }
180 }
181
182 /* Assume the buffer heads corresponding to left and right are locked. */
183 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
184                                        struct nilfs_btree_node *right,
185                                        int n, int lncmax, int rncmax)
186 {
187         __le64 *ldkeys, *rdkeys;
188         __le64 *ldptrs, *rdptrs;
189         int lnchildren, rnchildren;
190
191         ldkeys = nilfs_btree_node_dkeys(left);
192         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
193         lnchildren = nilfs_btree_node_get_nchildren(left);
194
195         rdkeys = nilfs_btree_node_dkeys(right);
196         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
197         rnchildren = nilfs_btree_node_get_nchildren(right);
198
199         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
200         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
201         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
202         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
203
204         lnchildren += n;
205         rnchildren -= n;
206         nilfs_btree_node_set_nchildren(left, lnchildren);
207         nilfs_btree_node_set_nchildren(right, rnchildren);
208 }
209
210 /* Assume that the buffer heads corresponding to left and right are locked. */
211 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
212                                         struct nilfs_btree_node *right,
213                                         int n, int lncmax, int rncmax)
214 {
215         __le64 *ldkeys, *rdkeys;
216         __le64 *ldptrs, *rdptrs;
217         int lnchildren, rnchildren;
218
219         ldkeys = nilfs_btree_node_dkeys(left);
220         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
221         lnchildren = nilfs_btree_node_get_nchildren(left);
222
223         rdkeys = nilfs_btree_node_dkeys(right);
224         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
225         rnchildren = nilfs_btree_node_get_nchildren(right);
226
227         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
228         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
229         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
230         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
231
232         lnchildren -= n;
233         rnchildren += n;
234         nilfs_btree_node_set_nchildren(left, lnchildren);
235         nilfs_btree_node_set_nchildren(right, rnchildren);
236 }
237
238 /* Assume that the buffer head corresponding to node is locked. */
239 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
240                                     __u64 key, __u64 ptr, int ncmax)
241 {
242         __le64 *dkeys;
243         __le64 *dptrs;
244         int nchildren;
245
246         dkeys = nilfs_btree_node_dkeys(node);
247         dptrs = nilfs_btree_node_dptrs(node, ncmax);
248         nchildren = nilfs_btree_node_get_nchildren(node);
249         if (index < nchildren) {
250                 memmove(dkeys + index + 1, dkeys + index,
251                         (nchildren - index) * sizeof(*dkeys));
252                 memmove(dptrs + index + 1, dptrs + index,
253                         (nchildren - index) * sizeof(*dptrs));
254         }
255         dkeys[index] = cpu_to_le64(key);
256         dptrs[index] = cpu_to_le64(ptr);
257         nchildren++;
258         nilfs_btree_node_set_nchildren(node, nchildren);
259 }
260
261 /* Assume that the buffer head corresponding to node is locked. */
262 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
263                                     __u64 *keyp, __u64 *ptrp, int ncmax)
264 {
265         __u64 key;
266         __u64 ptr;
267         __le64 *dkeys;
268         __le64 *dptrs;
269         int nchildren;
270
271         dkeys = nilfs_btree_node_dkeys(node);
272         dptrs = nilfs_btree_node_dptrs(node, ncmax);
273         key = le64_to_cpu(dkeys[index]);
274         ptr = le64_to_cpu(dptrs[index]);
275         nchildren = nilfs_btree_node_get_nchildren(node);
276         if (keyp != NULL)
277                 *keyp = key;
278         if (ptrp != NULL)
279                 *ptrp = ptr;
280
281         if (index < nchildren - 1) {
282                 memmove(dkeys + index, dkeys + index + 1,
283                         (nchildren - index - 1) * sizeof(*dkeys));
284                 memmove(dptrs + index, dptrs + index + 1,
285                         (nchildren - index - 1) * sizeof(*dptrs));
286         }
287         nchildren--;
288         nilfs_btree_node_set_nchildren(node, nchildren);
289 }
290
291 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
292                                    __u64 key, int *indexp)
293 {
294         __u64 nkey;
295         int index, low, high, s;
296
297         /* binary search */
298         low = 0;
299         high = nilfs_btree_node_get_nchildren(node) - 1;
300         index = 0;
301         s = 0;
302         while (low <= high) {
303                 index = (low + high) / 2;
304                 nkey = nilfs_btree_node_get_key(node, index);
305                 if (nkey == key) {
306                         s = 0;
307                         goto out;
308                 } else if (nkey < key) {
309                         low = index + 1;
310                         s = -1;
311                 } else {
312                         high = index - 1;
313                         s = 1;
314                 }
315         }
316
317         /* adjust index */
318         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
319                 if (s > 0 && index > 0)
320                         index--;
321         } else if (s < 0)
322                 index++;
323
324  out:
325         *indexp = index;
326
327         return s == 0;
328 }
329
330 /**
331  * nilfs_btree_node_broken - verify consistency of btree node
332  * @node: btree node block to be examined
333  * @size: node size (in bytes)
334  * @inode: host inode of btree
335  * @blocknr: block number
336  *
337  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
338  */
339 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
340                                    size_t size, struct inode *inode,
341                                    sector_t blocknr)
342 {
343         int level, flags, nchildren;
344         int ret = 0;
345
346         level = nilfs_btree_node_get_level(node);
347         flags = nilfs_btree_node_get_flags(node);
348         nchildren = nilfs_btree_node_get_nchildren(node);
349
350         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
351                      level >= NILFS_BTREE_LEVEL_MAX ||
352                      (flags & NILFS_BTREE_NODE_ROOT) ||
353                      nchildren < 0 ||
354                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
355                 nilfs_crit(inode->i_sb,
356                            "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
357                            inode->i_ino, (unsigned long long)blocknr, level,
358                            flags, nchildren);
359                 ret = 1;
360         }
361         return ret;
362 }
363
364 /**
365  * nilfs_btree_root_broken - verify consistency of btree root node
366  * @node: btree root node to be examined
367  * @inode: host inode of btree
368  *
369  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
370  */
371 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
372                                    struct inode *inode)
373 {
374         int level, flags, nchildren;
375         int ret = 0;
376
377         level = nilfs_btree_node_get_level(node);
378         flags = nilfs_btree_node_get_flags(node);
379         nchildren = nilfs_btree_node_get_nchildren(node);
380
381         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
382                      level >= NILFS_BTREE_LEVEL_MAX ||
383                      nchildren < 0 ||
384                      nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
385                 nilfs_crit(inode->i_sb,
386                            "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
387                            inode->i_ino, level, flags, nchildren);
388                 ret = 1;
389         }
390         return ret;
391 }
392
393 int nilfs_btree_broken_node_block(struct buffer_head *bh)
394 {
395         struct inode *inode;
396         int ret;
397
398         if (buffer_nilfs_checked(bh))
399                 return 0;
400
401         inode = bh->b_page->mapping->host;
402         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
403                                       bh->b_size, inode, bh->b_blocknr);
404         if (likely(!ret))
405                 set_buffer_nilfs_checked(bh);
406         return ret;
407 }
408
409 static struct nilfs_btree_node *
410 nilfs_btree_get_root(const struct nilfs_bmap *btree)
411 {
412         return (struct nilfs_btree_node *)btree->b_u.u_data;
413 }
414
415 static struct nilfs_btree_node *
416 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
417 {
418         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
419 }
420
421 static struct nilfs_btree_node *
422 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
423 {
424         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
425 }
426
427 static int nilfs_btree_height(const struct nilfs_bmap *btree)
428 {
429         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
430 }
431
432 static struct nilfs_btree_node *
433 nilfs_btree_get_node(const struct nilfs_bmap *btree,
434                      const struct nilfs_btree_path *path,
435                      int level, int *ncmaxp)
436 {
437         struct nilfs_btree_node *node;
438
439         if (level == nilfs_btree_height(btree) - 1) {
440                 node = nilfs_btree_get_root(btree);
441                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
442         } else {
443                 node = nilfs_btree_get_nonroot_node(path, level);
444                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
445         }
446         return node;
447 }
448
449 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
450                                 struct nilfs_btree_node *node, int level)
451 {
452         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
453                 dump_stack();
454                 nilfs_crit(btree->b_inode->i_sb,
455                            "btree level mismatch (ino=%lu): %d != %d",
456                            btree->b_inode->i_ino,
457                            nilfs_btree_node_get_level(node), level);
458                 return 1;
459         }
460         return 0;
461 }
462
463 struct nilfs_btree_readahead_info {
464         struct nilfs_btree_node *node;  /* parent node */
465         int max_ra_blocks;              /* max nof blocks to read ahead */
466         int index;                      /* current index on the parent node */
467         int ncmax;                      /* nof children in the parent node */
468 };
469
470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
471                                    struct buffer_head **bhp,
472                                    const struct nilfs_btree_readahead_info *ra)
473 {
474         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
475         struct address_space *btnc = btnc_inode->i_mapping;
476         struct buffer_head *bh, *ra_bh;
477         sector_t submit_ptr = 0;
478         int ret;
479
480         ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
481                                         &submit_ptr);
482         if (ret) {
483                 if (ret != -EEXIST)
484                         return ret;
485                 goto out_check;
486         }
487
488         if (ra) {
489                 int i, n;
490                 __u64 ptr2;
491
492                 /* read ahead sibling nodes */
493                 for (n = ra->max_ra_blocks, i = ra->index + 1;
494                      n > 0 && i < ra->ncmax; n--, i++) {
495                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
496
497                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
498                                                 REQ_OP_READ | REQ_RAHEAD,
499                                                 &ra_bh, &submit_ptr);
500                         if (likely(!ret || ret == -EEXIST))
501                                 brelse(ra_bh);
502                         else if (ret != -EBUSY)
503                                 break;
504                         if (!buffer_locked(bh))
505                                 goto out_no_wait;
506                 }
507         }
508
509         wait_on_buffer(bh);
510
511  out_no_wait:
512         if (!buffer_uptodate(bh)) {
513                 nilfs_err(btree->b_inode->i_sb,
514                           "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
515                           btree->b_inode->i_ino, (unsigned long long)ptr);
516                 brelse(bh);
517                 return -EIO;
518         }
519
520  out_check:
521         if (nilfs_btree_broken_node_block(bh)) {
522                 clear_buffer_uptodate(bh);
523                 brelse(bh);
524                 return -EINVAL;
525         }
526
527         *bhp = bh;
528         return 0;
529 }
530
531 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
532                                    struct buffer_head **bhp)
533 {
534         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
535 }
536
537 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
538                                  struct nilfs_btree_path *path,
539                                  __u64 key, __u64 *ptrp, int minlevel,
540                                  int readahead)
541 {
542         struct nilfs_btree_node *node;
543         struct nilfs_btree_readahead_info p, *ra;
544         __u64 ptr;
545         int level, index, found, ncmax, ret;
546
547         node = nilfs_btree_get_root(btree);
548         level = nilfs_btree_node_get_level(node);
549         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
550                 return -ENOENT;
551
552         found = nilfs_btree_node_lookup(node, key, &index);
553         ptr = nilfs_btree_node_get_ptr(node, index,
554                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
555         path[level].bp_bh = NULL;
556         path[level].bp_index = index;
557
558         ncmax = nilfs_btree_nchildren_per_block(btree);
559
560         while (--level >= minlevel) {
561                 ra = NULL;
562                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
563                         p.node = nilfs_btree_get_node(btree, path, level + 1,
564                                                       &p.ncmax);
565                         p.index = index;
566                         p.max_ra_blocks = 7;
567                         ra = &p;
568                 }
569                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
570                                               ra);
571                 if (ret < 0)
572                         return ret;
573
574                 node = nilfs_btree_get_nonroot_node(path, level);
575                 if (nilfs_btree_bad_node(btree, node, level))
576                         return -EINVAL;
577                 if (!found)
578                         found = nilfs_btree_node_lookup(node, key, &index);
579                 else
580                         index = 0;
581                 if (index < ncmax) {
582                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
583                 } else {
584                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
585                         /* insert */
586                         ptr = NILFS_BMAP_INVALID_PTR;
587                 }
588                 path[level].bp_index = index;
589         }
590         if (!found)
591                 return -ENOENT;
592
593         if (ptrp != NULL)
594                 *ptrp = ptr;
595
596         return 0;
597 }
598
599 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
600                                       struct nilfs_btree_path *path,
601                                       __u64 *keyp, __u64 *ptrp)
602 {
603         struct nilfs_btree_node *node;
604         __u64 ptr;
605         int index, level, ncmax, ret;
606
607         node = nilfs_btree_get_root(btree);
608         index = nilfs_btree_node_get_nchildren(node) - 1;
609         if (index < 0)
610                 return -ENOENT;
611         level = nilfs_btree_node_get_level(node);
612         ptr = nilfs_btree_node_get_ptr(node, index,
613                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
614         path[level].bp_bh = NULL;
615         path[level].bp_index = index;
616         ncmax = nilfs_btree_nchildren_per_block(btree);
617
618         for (level--; level > 0; level--) {
619                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
620                 if (ret < 0)
621                         return ret;
622                 node = nilfs_btree_get_nonroot_node(path, level);
623                 if (nilfs_btree_bad_node(btree, node, level))
624                         return -EINVAL;
625                 index = nilfs_btree_node_get_nchildren(node) - 1;
626                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
627                 path[level].bp_index = index;
628         }
629
630         if (keyp != NULL)
631                 *keyp = nilfs_btree_node_get_key(node, index);
632         if (ptrp != NULL)
633                 *ptrp = ptr;
634
635         return 0;
636 }
637
638 /**
639  * nilfs_btree_get_next_key - get next valid key from btree path array
640  * @btree: bmap struct of btree
641  * @path: array of nilfs_btree_path struct
642  * @minlevel: start level
643  * @nextkey: place to store the next valid key
644  *
645  * Return Value: If a next key was found, 0 is returned. Otherwise,
646  * -ENOENT is returned.
647  */
648 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
649                                     const struct nilfs_btree_path *path,
650                                     int minlevel, __u64 *nextkey)
651 {
652         struct nilfs_btree_node *node;
653         int maxlevel = nilfs_btree_height(btree) - 1;
654         int index, next_adj, level;
655
656         /* Next index is already set to bp_index for leaf nodes. */
657         next_adj = 0;
658         for (level = minlevel; level <= maxlevel; level++) {
659                 if (level == maxlevel)
660                         node = nilfs_btree_get_root(btree);
661                 else
662                         node = nilfs_btree_get_nonroot_node(path, level);
663
664                 index = path[level].bp_index + next_adj;
665                 if (index < nilfs_btree_node_get_nchildren(node)) {
666                         /* Next key is in this node */
667                         *nextkey = nilfs_btree_node_get_key(node, index);
668                         return 0;
669                 }
670                 /* For non-leaf nodes, next index is stored at bp_index + 1. */
671                 next_adj = 1;
672         }
673         return -ENOENT;
674 }
675
676 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
677                               __u64 key, int level, __u64 *ptrp)
678 {
679         struct nilfs_btree_path *path;
680         int ret;
681
682         path = nilfs_btree_alloc_path();
683         if (path == NULL)
684                 return -ENOMEM;
685
686         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
687
688         nilfs_btree_free_path(path);
689
690         return ret;
691 }
692
693 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
694                                      __u64 key, __u64 *ptrp,
695                                      unsigned int maxblocks)
696 {
697         struct nilfs_btree_path *path;
698         struct nilfs_btree_node *node;
699         struct inode *dat = NULL;
700         __u64 ptr, ptr2;
701         sector_t blocknr;
702         int level = NILFS_BTREE_LEVEL_NODE_MIN;
703         int ret, cnt, index, maxlevel, ncmax;
704         struct nilfs_btree_readahead_info p;
705
706         path = nilfs_btree_alloc_path();
707         if (path == NULL)
708                 return -ENOMEM;
709
710         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
711         if (ret < 0)
712                 goto out;
713
714         if (NILFS_BMAP_USE_VBN(btree)) {
715                 dat = nilfs_bmap_get_dat(btree);
716                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
717                 if (ret < 0)
718                         goto out;
719                 ptr = blocknr;
720         }
721         cnt = 1;
722         if (cnt == maxblocks)
723                 goto end;
724
725         maxlevel = nilfs_btree_height(btree) - 1;
726         node = nilfs_btree_get_node(btree, path, level, &ncmax);
727         index = path[level].bp_index + 1;
728         for (;;) {
729                 while (index < nilfs_btree_node_get_nchildren(node)) {
730                         if (nilfs_btree_node_get_key(node, index) !=
731                             key + cnt)
732                                 goto end;
733                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
734                         if (dat) {
735                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
736                                 if (ret < 0)
737                                         goto out;
738                                 ptr2 = blocknr;
739                         }
740                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
741                                 goto end;
742                         index++;
743                 }
744                 if (level == maxlevel)
745                         break;
746
747                 /* look-up right sibling node */
748                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
749                 p.index = path[level + 1].bp_index + 1;
750                 p.max_ra_blocks = 7;
751                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
752                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
753                         break;
754                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
755                 path[level + 1].bp_index = p.index;
756
757                 brelse(path[level].bp_bh);
758                 path[level].bp_bh = NULL;
759
760                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
761                                               &p);
762                 if (ret < 0)
763                         goto out;
764                 node = nilfs_btree_get_nonroot_node(path, level);
765                 ncmax = nilfs_btree_nchildren_per_block(btree);
766                 index = 0;
767                 path[level].bp_index = index;
768         }
769  end:
770         *ptrp = ptr;
771         ret = cnt;
772  out:
773         nilfs_btree_free_path(path);
774         return ret;
775 }
776
777 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
778                                     struct nilfs_btree_path *path,
779                                     int level, __u64 key)
780 {
781         if (level < nilfs_btree_height(btree) - 1) {
782                 do {
783                         nilfs_btree_node_set_key(
784                                 nilfs_btree_get_nonroot_node(path, level),
785                                 path[level].bp_index, key);
786                         if (!buffer_dirty(path[level].bp_bh))
787                                 mark_buffer_dirty(path[level].bp_bh);
788                 } while ((path[level].bp_index == 0) &&
789                          (++level < nilfs_btree_height(btree) - 1));
790         }
791
792         /* root */
793         if (level == nilfs_btree_height(btree) - 1) {
794                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
795                                          path[level].bp_index, key);
796         }
797 }
798
799 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
800                                   struct nilfs_btree_path *path,
801                                   int level, __u64 *keyp, __u64 *ptrp)
802 {
803         struct nilfs_btree_node *node;
804         int ncblk;
805
806         if (level < nilfs_btree_height(btree) - 1) {
807                 node = nilfs_btree_get_nonroot_node(path, level);
808                 ncblk = nilfs_btree_nchildren_per_block(btree);
809                 nilfs_btree_node_insert(node, path[level].bp_index,
810                                         *keyp, *ptrp, ncblk);
811                 if (!buffer_dirty(path[level].bp_bh))
812                         mark_buffer_dirty(path[level].bp_bh);
813
814                 if (path[level].bp_index == 0)
815                         nilfs_btree_promote_key(btree, path, level + 1,
816                                                 nilfs_btree_node_get_key(node,
817                                                                          0));
818         } else {
819                 node = nilfs_btree_get_root(btree);
820                 nilfs_btree_node_insert(node, path[level].bp_index,
821                                         *keyp, *ptrp,
822                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
823         }
824 }
825
826 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
827                                    struct nilfs_btree_path *path,
828                                    int level, __u64 *keyp, __u64 *ptrp)
829 {
830         struct nilfs_btree_node *node, *left;
831         int nchildren, lnchildren, n, move, ncblk;
832
833         node = nilfs_btree_get_nonroot_node(path, level);
834         left = nilfs_btree_get_sib_node(path, level);
835         nchildren = nilfs_btree_node_get_nchildren(node);
836         lnchildren = nilfs_btree_node_get_nchildren(left);
837         ncblk = nilfs_btree_nchildren_per_block(btree);
838         move = 0;
839
840         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
841         if (n > path[level].bp_index) {
842                 /* move insert point */
843                 n--;
844                 move = 1;
845         }
846
847         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
848
849         if (!buffer_dirty(path[level].bp_bh))
850                 mark_buffer_dirty(path[level].bp_bh);
851         if (!buffer_dirty(path[level].bp_sib_bh))
852                 mark_buffer_dirty(path[level].bp_sib_bh);
853
854         nilfs_btree_promote_key(btree, path, level + 1,
855                                 nilfs_btree_node_get_key(node, 0));
856
857         if (move) {
858                 brelse(path[level].bp_bh);
859                 path[level].bp_bh = path[level].bp_sib_bh;
860                 path[level].bp_sib_bh = NULL;
861                 path[level].bp_index += lnchildren;
862                 path[level + 1].bp_index--;
863         } else {
864                 brelse(path[level].bp_sib_bh);
865                 path[level].bp_sib_bh = NULL;
866                 path[level].bp_index -= n;
867         }
868
869         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
870 }
871
872 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
873                                     struct nilfs_btree_path *path,
874                                     int level, __u64 *keyp, __u64 *ptrp)
875 {
876         struct nilfs_btree_node *node, *right;
877         int nchildren, rnchildren, n, move, ncblk;
878
879         node = nilfs_btree_get_nonroot_node(path, level);
880         right = nilfs_btree_get_sib_node(path, level);
881         nchildren = nilfs_btree_node_get_nchildren(node);
882         rnchildren = nilfs_btree_node_get_nchildren(right);
883         ncblk = nilfs_btree_nchildren_per_block(btree);
884         move = 0;
885
886         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
887         if (n > nchildren - path[level].bp_index) {
888                 /* move insert point */
889                 n--;
890                 move = 1;
891         }
892
893         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
894
895         if (!buffer_dirty(path[level].bp_bh))
896                 mark_buffer_dirty(path[level].bp_bh);
897         if (!buffer_dirty(path[level].bp_sib_bh))
898                 mark_buffer_dirty(path[level].bp_sib_bh);
899
900         path[level + 1].bp_index++;
901         nilfs_btree_promote_key(btree, path, level + 1,
902                                 nilfs_btree_node_get_key(right, 0));
903         path[level + 1].bp_index--;
904
905         if (move) {
906                 brelse(path[level].bp_bh);
907                 path[level].bp_bh = path[level].bp_sib_bh;
908                 path[level].bp_sib_bh = NULL;
909                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
910                 path[level + 1].bp_index++;
911         } else {
912                 brelse(path[level].bp_sib_bh);
913                 path[level].bp_sib_bh = NULL;
914         }
915
916         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
917 }
918
919 static void nilfs_btree_split(struct nilfs_bmap *btree,
920                               struct nilfs_btree_path *path,
921                               int level, __u64 *keyp, __u64 *ptrp)
922 {
923         struct nilfs_btree_node *node, *right;
924         int nchildren, n, move, ncblk;
925
926         node = nilfs_btree_get_nonroot_node(path, level);
927         right = nilfs_btree_get_sib_node(path, level);
928         nchildren = nilfs_btree_node_get_nchildren(node);
929         ncblk = nilfs_btree_nchildren_per_block(btree);
930         move = 0;
931
932         n = (nchildren + 1) / 2;
933         if (n > nchildren - path[level].bp_index) {
934                 n--;
935                 move = 1;
936         }
937
938         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
939
940         if (!buffer_dirty(path[level].bp_bh))
941                 mark_buffer_dirty(path[level].bp_bh);
942         if (!buffer_dirty(path[level].bp_sib_bh))
943                 mark_buffer_dirty(path[level].bp_sib_bh);
944
945         if (move) {
946                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
947                 nilfs_btree_node_insert(right, path[level].bp_index,
948                                         *keyp, *ptrp, ncblk);
949
950                 *keyp = nilfs_btree_node_get_key(right, 0);
951                 *ptrp = path[level].bp_newreq.bpr_ptr;
952
953                 brelse(path[level].bp_bh);
954                 path[level].bp_bh = path[level].bp_sib_bh;
955                 path[level].bp_sib_bh = NULL;
956         } else {
957                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
958
959                 *keyp = nilfs_btree_node_get_key(right, 0);
960                 *ptrp = path[level].bp_newreq.bpr_ptr;
961
962                 brelse(path[level].bp_sib_bh);
963                 path[level].bp_sib_bh = NULL;
964         }
965
966         path[level + 1].bp_index++;
967 }
968
969 static void nilfs_btree_grow(struct nilfs_bmap *btree,
970                              struct nilfs_btree_path *path,
971                              int level, __u64 *keyp, __u64 *ptrp)
972 {
973         struct nilfs_btree_node *root, *child;
974         int n, ncblk;
975
976         root = nilfs_btree_get_root(btree);
977         child = nilfs_btree_get_sib_node(path, level);
978         ncblk = nilfs_btree_nchildren_per_block(btree);
979
980         n = nilfs_btree_node_get_nchildren(root);
981
982         nilfs_btree_node_move_right(root, child, n,
983                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
984         nilfs_btree_node_set_level(root, level + 1);
985
986         if (!buffer_dirty(path[level].bp_sib_bh))
987                 mark_buffer_dirty(path[level].bp_sib_bh);
988
989         path[level].bp_bh = path[level].bp_sib_bh;
990         path[level].bp_sib_bh = NULL;
991
992         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
993
994         *keyp = nilfs_btree_node_get_key(child, 0);
995         *ptrp = path[level].bp_newreq.bpr_ptr;
996 }
997
998 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
999                                    const struct nilfs_btree_path *path)
1000 {
1001         struct nilfs_btree_node *node;
1002         int level, ncmax;
1003
1004         if (path == NULL)
1005                 return NILFS_BMAP_INVALID_PTR;
1006
1007         /* left sibling */
1008         level = NILFS_BTREE_LEVEL_NODE_MIN;
1009         if (path[level].bp_index > 0) {
1010                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1011                 return nilfs_btree_node_get_ptr(node,
1012                                                 path[level].bp_index - 1,
1013                                                 ncmax);
1014         }
1015
1016         /* parent */
1017         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1018         if (level <= nilfs_btree_height(btree) - 1) {
1019                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1020                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1021                                                 ncmax);
1022         }
1023
1024         return NILFS_BMAP_INVALID_PTR;
1025 }
1026
1027 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1028                                        const struct nilfs_btree_path *path,
1029                                        __u64 key)
1030 {
1031         __u64 ptr;
1032
1033         ptr = nilfs_bmap_find_target_seq(btree, key);
1034         if (ptr != NILFS_BMAP_INVALID_PTR)
1035                 /* sequential access */
1036                 return ptr;
1037
1038         ptr = nilfs_btree_find_near(btree, path);
1039         if (ptr != NILFS_BMAP_INVALID_PTR)
1040                 /* near */
1041                 return ptr;
1042
1043         /* block group */
1044         return nilfs_bmap_find_target_in_group(btree);
1045 }
1046
1047 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1048                                       struct nilfs_btree_path *path,
1049                                       int *levelp, __u64 key, __u64 ptr,
1050                                       struct nilfs_bmap_stats *stats)
1051 {
1052         struct buffer_head *bh;
1053         struct nilfs_btree_node *node, *parent, *sib;
1054         __u64 sibptr;
1055         int pindex, level, ncmax, ncblk, ret;
1056         struct inode *dat = NULL;
1057
1058         stats->bs_nblocks = 0;
1059         level = NILFS_BTREE_LEVEL_DATA;
1060
1061         /* allocate a new ptr for data block */
1062         if (NILFS_BMAP_USE_VBN(btree)) {
1063                 path[level].bp_newreq.bpr_ptr =
1064                         nilfs_btree_find_target_v(btree, path, key);
1065                 dat = nilfs_bmap_get_dat(btree);
1066         }
1067
1068         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1069         if (ret < 0)
1070                 goto err_out_data;
1071
1072         ncblk = nilfs_btree_nchildren_per_block(btree);
1073
1074         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1075              level < nilfs_btree_height(btree) - 1;
1076              level++) {
1077                 node = nilfs_btree_get_nonroot_node(path, level);
1078                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1079                         path[level].bp_op = nilfs_btree_do_insert;
1080                         stats->bs_nblocks++;
1081                         goto out;
1082                 }
1083
1084                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1085                 pindex = path[level + 1].bp_index;
1086
1087                 /* left sibling */
1088                 if (pindex > 0) {
1089                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1090                                                           ncmax);
1091                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1092                         if (ret < 0)
1093                                 goto err_out_child_node;
1094                         sib = (struct nilfs_btree_node *)bh->b_data;
1095                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1096                                 path[level].bp_sib_bh = bh;
1097                                 path[level].bp_op = nilfs_btree_carry_left;
1098                                 stats->bs_nblocks++;
1099                                 goto out;
1100                         } else {
1101                                 brelse(bh);
1102                         }
1103                 }
1104
1105                 /* right sibling */
1106                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1107                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1108                                                           ncmax);
1109                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1110                         if (ret < 0)
1111                                 goto err_out_child_node;
1112                         sib = (struct nilfs_btree_node *)bh->b_data;
1113                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1114                                 path[level].bp_sib_bh = bh;
1115                                 path[level].bp_op = nilfs_btree_carry_right;
1116                                 stats->bs_nblocks++;
1117                                 goto out;
1118                         } else {
1119                                 brelse(bh);
1120                         }
1121                 }
1122
1123                 /* split */
1124                 path[level].bp_newreq.bpr_ptr =
1125                         path[level - 1].bp_newreq.bpr_ptr + 1;
1126                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1127                                                    &path[level].bp_newreq, dat);
1128                 if (ret < 0)
1129                         goto err_out_child_node;
1130                 ret = nilfs_btree_get_new_block(btree,
1131                                                 path[level].bp_newreq.bpr_ptr,
1132                                                 &bh);
1133                 if (ret < 0)
1134                         goto err_out_curr_node;
1135
1136                 stats->bs_nblocks++;
1137
1138                 sib = (struct nilfs_btree_node *)bh->b_data;
1139                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1140                 path[level].bp_sib_bh = bh;
1141                 path[level].bp_op = nilfs_btree_split;
1142         }
1143
1144         /* root */
1145         node = nilfs_btree_get_root(btree);
1146         if (nilfs_btree_node_get_nchildren(node) <
1147             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1148                 path[level].bp_op = nilfs_btree_do_insert;
1149                 stats->bs_nblocks++;
1150                 goto out;
1151         }
1152
1153         /* grow */
1154         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1155         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1156         if (ret < 0)
1157                 goto err_out_child_node;
1158         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1159                                         &bh);
1160         if (ret < 0)
1161                 goto err_out_curr_node;
1162
1163         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1164                               0, level, 0, ncblk, NULL, NULL);
1165         path[level].bp_sib_bh = bh;
1166         path[level].bp_op = nilfs_btree_grow;
1167
1168         level++;
1169         path[level].bp_op = nilfs_btree_do_insert;
1170
1171         /* a newly-created node block and a data block are added */
1172         stats->bs_nblocks += 2;
1173
1174         /* success */
1175  out:
1176         *levelp = level;
1177         return ret;
1178
1179         /* error */
1180  err_out_curr_node:
1181         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1182  err_out_child_node:
1183         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1184                 nilfs_btnode_delete(path[level].bp_sib_bh);
1185                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1186
1187         }
1188
1189         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1190  err_out_data:
1191         *levelp = level;
1192         stats->bs_nblocks = 0;
1193         return ret;
1194 }
1195
1196 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1197                                       struct nilfs_btree_path *path,
1198                                       int maxlevel, __u64 key, __u64 ptr)
1199 {
1200         struct inode *dat = NULL;
1201         int level;
1202
1203         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1204         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1205         if (NILFS_BMAP_USE_VBN(btree)) {
1206                 nilfs_bmap_set_target_v(btree, key, ptr);
1207                 dat = nilfs_bmap_get_dat(btree);
1208         }
1209
1210         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1211                 nilfs_bmap_commit_alloc_ptr(btree,
1212                                             &path[level - 1].bp_newreq, dat);
1213                 path[level].bp_op(btree, path, level, &key, &ptr);
1214         }
1215
1216         if (!nilfs_bmap_dirty(btree))
1217                 nilfs_bmap_set_dirty(btree);
1218 }
1219
1220 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1221 {
1222         struct nilfs_btree_path *path;
1223         struct nilfs_bmap_stats stats;
1224         int level, ret;
1225
1226         path = nilfs_btree_alloc_path();
1227         if (path == NULL)
1228                 return -ENOMEM;
1229
1230         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1231                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1232         if (ret != -ENOENT) {
1233                 if (ret == 0)
1234                         ret = -EEXIST;
1235                 goto out;
1236         }
1237
1238         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1239         if (ret < 0)
1240                 goto out;
1241         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1242         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1243
1244  out:
1245         nilfs_btree_free_path(path);
1246         return ret;
1247 }
1248
1249 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1250                                   struct nilfs_btree_path *path,
1251                                   int level, __u64 *keyp, __u64 *ptrp)
1252 {
1253         struct nilfs_btree_node *node;
1254         int ncblk;
1255
1256         if (level < nilfs_btree_height(btree) - 1) {
1257                 node = nilfs_btree_get_nonroot_node(path, level);
1258                 ncblk = nilfs_btree_nchildren_per_block(btree);
1259                 nilfs_btree_node_delete(node, path[level].bp_index,
1260                                         keyp, ptrp, ncblk);
1261                 if (!buffer_dirty(path[level].bp_bh))
1262                         mark_buffer_dirty(path[level].bp_bh);
1263                 if (path[level].bp_index == 0)
1264                         nilfs_btree_promote_key(btree, path, level + 1,
1265                                 nilfs_btree_node_get_key(node, 0));
1266         } else {
1267                 node = nilfs_btree_get_root(btree);
1268                 nilfs_btree_node_delete(node, path[level].bp_index,
1269                                         keyp, ptrp,
1270                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1271         }
1272 }
1273
1274 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1275                                     struct nilfs_btree_path *path,
1276                                     int level, __u64 *keyp, __u64 *ptrp)
1277 {
1278         struct nilfs_btree_node *node, *left;
1279         int nchildren, lnchildren, n, ncblk;
1280
1281         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1282
1283         node = nilfs_btree_get_nonroot_node(path, level);
1284         left = nilfs_btree_get_sib_node(path, level);
1285         nchildren = nilfs_btree_node_get_nchildren(node);
1286         lnchildren = nilfs_btree_node_get_nchildren(left);
1287         ncblk = nilfs_btree_nchildren_per_block(btree);
1288
1289         n = (nchildren + lnchildren) / 2 - nchildren;
1290
1291         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1292
1293         if (!buffer_dirty(path[level].bp_bh))
1294                 mark_buffer_dirty(path[level].bp_bh);
1295         if (!buffer_dirty(path[level].bp_sib_bh))
1296                 mark_buffer_dirty(path[level].bp_sib_bh);
1297
1298         nilfs_btree_promote_key(btree, path, level + 1,
1299                                 nilfs_btree_node_get_key(node, 0));
1300
1301         brelse(path[level].bp_sib_bh);
1302         path[level].bp_sib_bh = NULL;
1303         path[level].bp_index += n;
1304 }
1305
1306 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1307                                      struct nilfs_btree_path *path,
1308                                      int level, __u64 *keyp, __u64 *ptrp)
1309 {
1310         struct nilfs_btree_node *node, *right;
1311         int nchildren, rnchildren, n, ncblk;
1312
1313         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1314
1315         node = nilfs_btree_get_nonroot_node(path, level);
1316         right = nilfs_btree_get_sib_node(path, level);
1317         nchildren = nilfs_btree_node_get_nchildren(node);
1318         rnchildren = nilfs_btree_node_get_nchildren(right);
1319         ncblk = nilfs_btree_nchildren_per_block(btree);
1320
1321         n = (nchildren + rnchildren) / 2 - nchildren;
1322
1323         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1324
1325         if (!buffer_dirty(path[level].bp_bh))
1326                 mark_buffer_dirty(path[level].bp_bh);
1327         if (!buffer_dirty(path[level].bp_sib_bh))
1328                 mark_buffer_dirty(path[level].bp_sib_bh);
1329
1330         path[level + 1].bp_index++;
1331         nilfs_btree_promote_key(btree, path, level + 1,
1332                                 nilfs_btree_node_get_key(right, 0));
1333         path[level + 1].bp_index--;
1334
1335         brelse(path[level].bp_sib_bh);
1336         path[level].bp_sib_bh = NULL;
1337 }
1338
1339 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1340                                     struct nilfs_btree_path *path,
1341                                     int level, __u64 *keyp, __u64 *ptrp)
1342 {
1343         struct nilfs_btree_node *node, *left;
1344         int n, ncblk;
1345
1346         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1347
1348         node = nilfs_btree_get_nonroot_node(path, level);
1349         left = nilfs_btree_get_sib_node(path, level);
1350         ncblk = nilfs_btree_nchildren_per_block(btree);
1351
1352         n = nilfs_btree_node_get_nchildren(node);
1353
1354         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1355
1356         if (!buffer_dirty(path[level].bp_sib_bh))
1357                 mark_buffer_dirty(path[level].bp_sib_bh);
1358
1359         nilfs_btnode_delete(path[level].bp_bh);
1360         path[level].bp_bh = path[level].bp_sib_bh;
1361         path[level].bp_sib_bh = NULL;
1362         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1363 }
1364
1365 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1366                                      struct nilfs_btree_path *path,
1367                                      int level, __u64 *keyp, __u64 *ptrp)
1368 {
1369         struct nilfs_btree_node *node, *right;
1370         int n, ncblk;
1371
1372         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1373
1374         node = nilfs_btree_get_nonroot_node(path, level);
1375         right = nilfs_btree_get_sib_node(path, level);
1376         ncblk = nilfs_btree_nchildren_per_block(btree);
1377
1378         n = nilfs_btree_node_get_nchildren(right);
1379
1380         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1381
1382         if (!buffer_dirty(path[level].bp_bh))
1383                 mark_buffer_dirty(path[level].bp_bh);
1384
1385         nilfs_btnode_delete(path[level].bp_sib_bh);
1386         path[level].bp_sib_bh = NULL;
1387         path[level + 1].bp_index++;
1388 }
1389
1390 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1391                                struct nilfs_btree_path *path,
1392                                int level, __u64 *keyp, __u64 *ptrp)
1393 {
1394         struct nilfs_btree_node *root, *child;
1395         int n, ncblk;
1396
1397         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1398
1399         root = nilfs_btree_get_root(btree);
1400         child = nilfs_btree_get_nonroot_node(path, level);
1401         ncblk = nilfs_btree_nchildren_per_block(btree);
1402
1403         nilfs_btree_node_delete(root, 0, NULL, NULL,
1404                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1405         nilfs_btree_node_set_level(root, level);
1406         n = nilfs_btree_node_get_nchildren(child);
1407         nilfs_btree_node_move_left(root, child, n,
1408                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1409
1410         nilfs_btnode_delete(path[level].bp_bh);
1411         path[level].bp_bh = NULL;
1412 }
1413
1414 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1415                             struct nilfs_btree_path *path,
1416                             int level, __u64 *keyp, __u64 *ptrp)
1417 {
1418 }
1419
1420 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1421                                       struct nilfs_btree_path *path,
1422                                       int *levelp,
1423                                       struct nilfs_bmap_stats *stats,
1424                                       struct inode *dat)
1425 {
1426         struct buffer_head *bh;
1427         struct nilfs_btree_node *node, *parent, *sib;
1428         __u64 sibptr;
1429         int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1430
1431         ret = 0;
1432         stats->bs_nblocks = 0;
1433         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1434         ncblk = nilfs_btree_nchildren_per_block(btree);
1435
1436         for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1437              level < nilfs_btree_height(btree) - 1;
1438              level++) {
1439                 node = nilfs_btree_get_nonroot_node(path, level);
1440                 path[level].bp_oldreq.bpr_ptr =
1441                         nilfs_btree_node_get_ptr(node, dindex, ncblk);
1442                 ret = nilfs_bmap_prepare_end_ptr(btree,
1443                                                  &path[level].bp_oldreq, dat);
1444                 if (ret < 0)
1445                         goto err_out_child_node;
1446
1447                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1448                         path[level].bp_op = nilfs_btree_do_delete;
1449                         stats->bs_nblocks++;
1450                         goto out;
1451                 }
1452
1453                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1454                 pindex = path[level + 1].bp_index;
1455                 dindex = pindex;
1456
1457                 if (pindex > 0) {
1458                         /* left sibling */
1459                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1460                                                           ncmax);
1461                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1462                         if (ret < 0)
1463                                 goto err_out_curr_node;
1464                         sib = (struct nilfs_btree_node *)bh->b_data;
1465                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1466                                 path[level].bp_sib_bh = bh;
1467                                 path[level].bp_op = nilfs_btree_borrow_left;
1468                                 stats->bs_nblocks++;
1469                                 goto out;
1470                         } else {
1471                                 path[level].bp_sib_bh = bh;
1472                                 path[level].bp_op = nilfs_btree_concat_left;
1473                                 stats->bs_nblocks++;
1474                                 /* continue; */
1475                         }
1476                 } else if (pindex <
1477                            nilfs_btree_node_get_nchildren(parent) - 1) {
1478                         /* right sibling */
1479                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1480                                                           ncmax);
1481                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1482                         if (ret < 0)
1483                                 goto err_out_curr_node;
1484                         sib = (struct nilfs_btree_node *)bh->b_data;
1485                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1486                                 path[level].bp_sib_bh = bh;
1487                                 path[level].bp_op = nilfs_btree_borrow_right;
1488                                 stats->bs_nblocks++;
1489                                 goto out;
1490                         } else {
1491                                 path[level].bp_sib_bh = bh;
1492                                 path[level].bp_op = nilfs_btree_concat_right;
1493                                 stats->bs_nblocks++;
1494                                 /*
1495                                  * When merging right sibling node
1496                                  * into the current node, pointer to
1497                                  * the right sibling node must be
1498                                  * terminated instead.  The adjustment
1499                                  * below is required for that.
1500                                  */
1501                                 dindex = pindex + 1;
1502                                 /* continue; */
1503                         }
1504                 } else {
1505                         /* no siblings */
1506                         /* the only child of the root node */
1507                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1508                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1509                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1510                                 path[level].bp_op = nilfs_btree_shrink;
1511                                 stats->bs_nblocks += 2;
1512                                 level++;
1513                                 path[level].bp_op = nilfs_btree_nop;
1514                                 goto shrink_root_child;
1515                         } else {
1516                                 path[level].bp_op = nilfs_btree_do_delete;
1517                                 stats->bs_nblocks++;
1518                                 goto out;
1519                         }
1520                 }
1521         }
1522
1523         /* child of the root node is deleted */
1524         path[level].bp_op = nilfs_btree_do_delete;
1525         stats->bs_nblocks++;
1526
1527 shrink_root_child:
1528         node = nilfs_btree_get_root(btree);
1529         path[level].bp_oldreq.bpr_ptr =
1530                 nilfs_btree_node_get_ptr(node, dindex,
1531                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1532
1533         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1534         if (ret < 0)
1535                 goto err_out_child_node;
1536
1537         /* success */
1538  out:
1539         *levelp = level;
1540         return ret;
1541
1542         /* error */
1543  err_out_curr_node:
1544         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1545  err_out_child_node:
1546         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1547                 brelse(path[level].bp_sib_bh);
1548                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1549         }
1550         *levelp = level;
1551         stats->bs_nblocks = 0;
1552         return ret;
1553 }
1554
1555 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1556                                       struct nilfs_btree_path *path,
1557                                       int maxlevel, struct inode *dat)
1558 {
1559         int level;
1560
1561         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1562                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1563                 path[level].bp_op(btree, path, level, NULL, NULL);
1564         }
1565
1566         if (!nilfs_bmap_dirty(btree))
1567                 nilfs_bmap_set_dirty(btree);
1568 }
1569
1570 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1571
1572 {
1573         struct nilfs_btree_path *path;
1574         struct nilfs_bmap_stats stats;
1575         struct inode *dat;
1576         int level, ret;
1577
1578         path = nilfs_btree_alloc_path();
1579         if (path == NULL)
1580                 return -ENOMEM;
1581
1582         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1583                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1584         if (ret < 0)
1585                 goto out;
1586
1587
1588         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1589
1590         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1591         if (ret < 0)
1592                 goto out;
1593         nilfs_btree_commit_delete(btree, path, level, dat);
1594         nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1595
1596 out:
1597         nilfs_btree_free_path(path);
1598         return ret;
1599 }
1600
1601 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1602                                 __u64 *keyp)
1603 {
1604         struct nilfs_btree_path *path;
1605         const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1606         int ret;
1607
1608         path = nilfs_btree_alloc_path();
1609         if (!path)
1610                 return -ENOMEM;
1611
1612         ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1613         if (!ret)
1614                 *keyp = start;
1615         else if (ret == -ENOENT)
1616                 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1617
1618         nilfs_btree_free_path(path);
1619         return ret;
1620 }
1621
1622 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1623 {
1624         struct nilfs_btree_path *path;
1625         int ret;
1626
1627         path = nilfs_btree_alloc_path();
1628         if (path == NULL)
1629                 return -ENOMEM;
1630
1631         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1632
1633         nilfs_btree_free_path(path);
1634
1635         return ret;
1636 }
1637
1638 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1639 {
1640         struct buffer_head *bh;
1641         struct nilfs_btree_node *root, *node;
1642         __u64 maxkey, nextmaxkey;
1643         __u64 ptr;
1644         int nchildren, ret;
1645
1646         root = nilfs_btree_get_root(btree);
1647         switch (nilfs_btree_height(btree)) {
1648         case 2:
1649                 bh = NULL;
1650                 node = root;
1651                 break;
1652         case 3:
1653                 nchildren = nilfs_btree_node_get_nchildren(root);
1654                 if (nchildren > 1)
1655                         return 0;
1656                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1657                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1658                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1659                 if (ret < 0)
1660                         return ret;
1661                 node = (struct nilfs_btree_node *)bh->b_data;
1662                 break;
1663         default:
1664                 return 0;
1665         }
1666
1667         nchildren = nilfs_btree_node_get_nchildren(node);
1668         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1669         nextmaxkey = (nchildren > 1) ?
1670                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1671         if (bh != NULL)
1672                 brelse(bh);
1673
1674         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1675 }
1676
1677 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1678                                    __u64 *keys, __u64 *ptrs, int nitems)
1679 {
1680         struct buffer_head *bh;
1681         struct nilfs_btree_node *node, *root;
1682         __le64 *dkeys;
1683         __le64 *dptrs;
1684         __u64 ptr;
1685         int nchildren, ncmax, i, ret;
1686
1687         root = nilfs_btree_get_root(btree);
1688         switch (nilfs_btree_height(btree)) {
1689         case 2:
1690                 bh = NULL;
1691                 node = root;
1692                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1693                 break;
1694         case 3:
1695                 nchildren = nilfs_btree_node_get_nchildren(root);
1696                 WARN_ON(nchildren > 1);
1697                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1698                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1699                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1700                 if (ret < 0)
1701                         return ret;
1702                 node = (struct nilfs_btree_node *)bh->b_data;
1703                 ncmax = nilfs_btree_nchildren_per_block(btree);
1704                 break;
1705         default:
1706                 node = NULL;
1707                 return -EINVAL;
1708         }
1709
1710         nchildren = nilfs_btree_node_get_nchildren(node);
1711         if (nchildren < nitems)
1712                 nitems = nchildren;
1713         dkeys = nilfs_btree_node_dkeys(node);
1714         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1715         for (i = 0; i < nitems; i++) {
1716                 keys[i] = le64_to_cpu(dkeys[i]);
1717                 ptrs[i] = le64_to_cpu(dptrs[i]);
1718         }
1719
1720         if (bh != NULL)
1721                 brelse(bh);
1722
1723         return nitems;
1724 }
1725
1726 static int
1727 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1728                                        union nilfs_bmap_ptr_req *dreq,
1729                                        union nilfs_bmap_ptr_req *nreq,
1730                                        struct buffer_head **bhp,
1731                                        struct nilfs_bmap_stats *stats)
1732 {
1733         struct buffer_head *bh;
1734         struct inode *dat = NULL;
1735         int ret;
1736
1737         stats->bs_nblocks = 0;
1738
1739         /* for data */
1740         /* cannot find near ptr */
1741         if (NILFS_BMAP_USE_VBN(btree)) {
1742                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1743                 dat = nilfs_bmap_get_dat(btree);
1744         }
1745
1746         ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1747         if (ret < 0)
1748                 return ret;
1749
1750         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1751         if (ret < 0)
1752                 return ret;
1753
1754         *bhp = NULL;
1755         stats->bs_nblocks++;
1756         if (nreq != NULL) {
1757                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1758                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1759                 if (ret < 0)
1760                         goto err_out_dreq;
1761
1762                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1763                 if (ret < 0)
1764                         goto err_out_nreq;
1765
1766                 *bhp = bh;
1767                 stats->bs_nblocks++;
1768         }
1769
1770         /* success */
1771         return 0;
1772
1773         /* error */
1774  err_out_nreq:
1775         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1776  err_out_dreq:
1777         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1778         stats->bs_nblocks = 0;
1779         return ret;
1780
1781 }
1782
1783 static void
1784 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1785                                       __u64 key, __u64 ptr,
1786                                       const __u64 *keys, const __u64 *ptrs,
1787                                       int n,
1788                                       union nilfs_bmap_ptr_req *dreq,
1789                                       union nilfs_bmap_ptr_req *nreq,
1790                                       struct buffer_head *bh)
1791 {
1792         struct nilfs_btree_node *node;
1793         struct inode *dat;
1794         __u64 tmpptr;
1795         int ncblk;
1796
1797         /* free resources */
1798         if (btree->b_ops->bop_clear != NULL)
1799                 btree->b_ops->bop_clear(btree);
1800
1801         /* ptr must be a pointer to a buffer head. */
1802         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1803
1804         /* convert and insert */
1805         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1806         __nilfs_btree_init(btree);
1807         if (nreq != NULL) {
1808                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1809                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1810
1811                 /* create child node at level 1 */
1812                 node = (struct nilfs_btree_node *)bh->b_data;
1813                 ncblk = nilfs_btree_nchildren_per_block(btree);
1814                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1815                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1816                 if (!buffer_dirty(bh))
1817                         mark_buffer_dirty(bh);
1818                 if (!nilfs_bmap_dirty(btree))
1819                         nilfs_bmap_set_dirty(btree);
1820
1821                 brelse(bh);
1822
1823                 /* create root node at level 2 */
1824                 node = nilfs_btree_get_root(btree);
1825                 tmpptr = nreq->bpr_ptr;
1826                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1827                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1828                                       &keys[0], &tmpptr);
1829         } else {
1830                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1831
1832                 /* create root node at level 1 */
1833                 node = nilfs_btree_get_root(btree);
1834                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1835                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1836                                       keys, ptrs);
1837                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1838                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1839                 if (!nilfs_bmap_dirty(btree))
1840                         nilfs_bmap_set_dirty(btree);
1841         }
1842
1843         if (NILFS_BMAP_USE_VBN(btree))
1844                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1845 }
1846
1847 /**
1848  * nilfs_btree_convert_and_insert -
1849  * @bmap:
1850  * @key:
1851  * @ptr:
1852  * @keys:
1853  * @ptrs:
1854  * @n:
1855  */
1856 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1857                                    __u64 key, __u64 ptr,
1858                                    const __u64 *keys, const __u64 *ptrs, int n)
1859 {
1860         struct buffer_head *bh = NULL;
1861         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1862         struct nilfs_bmap_stats stats;
1863         int ret;
1864
1865         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1866                 di = &dreq;
1867                 ni = NULL;
1868         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1869                            nilfs_btree_node_size(btree))) {
1870                 di = &dreq;
1871                 ni = &nreq;
1872         } else {
1873                 di = NULL;
1874                 ni = NULL;
1875                 BUG();
1876         }
1877
1878         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1879                                                      &stats);
1880         if (ret < 0)
1881                 return ret;
1882         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1883                                               di, ni, bh);
1884         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1885         return 0;
1886 }
1887
1888 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1889                                    struct nilfs_btree_path *path,
1890                                    int level,
1891                                    struct buffer_head *bh)
1892 {
1893         while ((++level < nilfs_btree_height(btree) - 1) &&
1894                !buffer_dirty(path[level].bp_bh))
1895                 mark_buffer_dirty(path[level].bp_bh);
1896
1897         return 0;
1898 }
1899
1900 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1901                                         struct nilfs_btree_path *path,
1902                                         int level, struct inode *dat)
1903 {
1904         struct nilfs_btree_node *parent;
1905         int ncmax, ret;
1906
1907         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1908         path[level].bp_oldreq.bpr_ptr =
1909                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1910                                          ncmax);
1911         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1912         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1913                                        &path[level].bp_newreq.bpr_req);
1914         if (ret < 0)
1915                 return ret;
1916
1917         if (buffer_nilfs_node(path[level].bp_bh)) {
1918                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1919                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1920                 path[level].bp_ctxt.bh = path[level].bp_bh;
1921                 ret = nilfs_btnode_prepare_change_key(
1922                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1923                         &path[level].bp_ctxt);
1924                 if (ret < 0) {
1925                         nilfs_dat_abort_update(dat,
1926                                                &path[level].bp_oldreq.bpr_req,
1927                                                &path[level].bp_newreq.bpr_req);
1928                         return ret;
1929                 }
1930         }
1931
1932         return 0;
1933 }
1934
1935 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1936                                         struct nilfs_btree_path *path,
1937                                         int level, struct inode *dat)
1938 {
1939         struct nilfs_btree_node *parent;
1940         int ncmax;
1941
1942         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1943                                 &path[level].bp_newreq.bpr_req,
1944                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1945
1946         if (buffer_nilfs_node(path[level].bp_bh)) {
1947                 nilfs_btnode_commit_change_key(
1948                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1949                         &path[level].bp_ctxt);
1950                 path[level].bp_bh = path[level].bp_ctxt.bh;
1951         }
1952         set_buffer_nilfs_volatile(path[level].bp_bh);
1953
1954         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1955         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1956                                  path[level].bp_newreq.bpr_ptr, ncmax);
1957 }
1958
1959 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1960                                        struct nilfs_btree_path *path,
1961                                        int level, struct inode *dat)
1962 {
1963         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1964                                &path[level].bp_newreq.bpr_req);
1965         if (buffer_nilfs_node(path[level].bp_bh))
1966                 nilfs_btnode_abort_change_key(
1967                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1968                         &path[level].bp_ctxt);
1969 }
1970
1971 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1972                                            struct nilfs_btree_path *path,
1973                                            int minlevel, int *maxlevelp,
1974                                            struct inode *dat)
1975 {
1976         int level, ret;
1977
1978         level = minlevel;
1979         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1980                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1981                 if (ret < 0)
1982                         return ret;
1983         }
1984         while ((++level < nilfs_btree_height(btree) - 1) &&
1985                !buffer_dirty(path[level].bp_bh)) {
1986
1987                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1988                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1989                 if (ret < 0)
1990                         goto out;
1991         }
1992
1993         /* success */
1994         *maxlevelp = level - 1;
1995         return 0;
1996
1997         /* error */
1998  out:
1999         while (--level > minlevel)
2000                 nilfs_btree_abort_update_v(btree, path, level, dat);
2001         if (!buffer_nilfs_volatile(path[level].bp_bh))
2002                 nilfs_btree_abort_update_v(btree, path, level, dat);
2003         return ret;
2004 }
2005
2006 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2007                                            struct nilfs_btree_path *path,
2008                                            int minlevel, int maxlevel,
2009                                            struct buffer_head *bh,
2010                                            struct inode *dat)
2011 {
2012         int level;
2013
2014         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2015                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2016
2017         for (level = minlevel + 1; level <= maxlevel; level++)
2018                 nilfs_btree_commit_update_v(btree, path, level, dat);
2019 }
2020
2021 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2022                                    struct nilfs_btree_path *path,
2023                                    int level, struct buffer_head *bh)
2024 {
2025         int maxlevel = 0, ret;
2026         struct nilfs_btree_node *parent;
2027         struct inode *dat = nilfs_bmap_get_dat(btree);
2028         __u64 ptr;
2029         int ncmax;
2030
2031         get_bh(bh);
2032         path[level].bp_bh = bh;
2033         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2034                                               dat);
2035         if (ret < 0)
2036                 goto out;
2037
2038         if (buffer_nilfs_volatile(path[level].bp_bh)) {
2039                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2040                 ptr = nilfs_btree_node_get_ptr(parent,
2041                                                path[level + 1].bp_index,
2042                                                ncmax);
2043                 ret = nilfs_dat_mark_dirty(dat, ptr);
2044                 if (ret < 0)
2045                         goto out;
2046         }
2047
2048         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2049
2050  out:
2051         brelse(path[level].bp_bh);
2052         path[level].bp_bh = NULL;
2053         return ret;
2054 }
2055
2056 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2057                                  struct buffer_head *bh)
2058 {
2059         struct nilfs_btree_path *path;
2060         struct nilfs_btree_node *node;
2061         __u64 key;
2062         int level, ret;
2063
2064         WARN_ON(!buffer_dirty(bh));
2065
2066         path = nilfs_btree_alloc_path();
2067         if (path == NULL)
2068                 return -ENOMEM;
2069
2070         if (buffer_nilfs_node(bh)) {
2071                 node = (struct nilfs_btree_node *)bh->b_data;
2072                 key = nilfs_btree_node_get_key(node, 0);
2073                 level = nilfs_btree_node_get_level(node);
2074         } else {
2075                 key = nilfs_bmap_data_get_key(btree, bh);
2076                 level = NILFS_BTREE_LEVEL_DATA;
2077         }
2078
2079         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2080         if (ret < 0) {
2081                 if (unlikely(ret == -ENOENT))
2082                         nilfs_crit(btree->b_inode->i_sb,
2083                                    "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2084                                    btree->b_inode->i_ino,
2085                                    (unsigned long long)key, level);
2086                 goto out;
2087         }
2088
2089         ret = NILFS_BMAP_USE_VBN(btree) ?
2090                 nilfs_btree_propagate_v(btree, path, level, bh) :
2091                 nilfs_btree_propagate_p(btree, path, level, bh);
2092
2093  out:
2094         nilfs_btree_free_path(path);
2095
2096         return ret;
2097 }
2098
2099 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2100                                     struct buffer_head *bh)
2101 {
2102         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2103 }
2104
2105 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2106                                          struct list_head *lists,
2107                                          struct buffer_head *bh)
2108 {
2109         struct list_head *head;
2110         struct buffer_head *cbh;
2111         struct nilfs_btree_node *node, *cnode;
2112         __u64 key, ckey;
2113         int level;
2114
2115         get_bh(bh);
2116         node = (struct nilfs_btree_node *)bh->b_data;
2117         key = nilfs_btree_node_get_key(node, 0);
2118         level = nilfs_btree_node_get_level(node);
2119         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2120             level >= NILFS_BTREE_LEVEL_MAX) {
2121                 dump_stack();
2122                 nilfs_warn(btree->b_inode->i_sb,
2123                            "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2124                            level, (unsigned long long)key,
2125                            btree->b_inode->i_ino,
2126                            (unsigned long long)bh->b_blocknr);
2127                 return;
2128         }
2129
2130         list_for_each(head, &lists[level]) {
2131                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2132                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2133                 ckey = nilfs_btree_node_get_key(cnode, 0);
2134                 if (key < ckey)
2135                         break;
2136         }
2137         list_add_tail(&bh->b_assoc_buffers, head);
2138 }
2139
2140 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2141                                              struct list_head *listp)
2142 {
2143         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2144         struct address_space *btcache = btnc_inode->i_mapping;
2145         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2146         struct pagevec pvec;
2147         struct buffer_head *bh, *head;
2148         pgoff_t index = 0;
2149         int level, i;
2150
2151         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2152              level < NILFS_BTREE_LEVEL_MAX;
2153              level++)
2154                 INIT_LIST_HEAD(&lists[level]);
2155
2156         pagevec_init(&pvec);
2157
2158         while (pagevec_lookup_tag(&pvec, btcache, &index,
2159                                         PAGECACHE_TAG_DIRTY)) {
2160                 for (i = 0; i < pagevec_count(&pvec); i++) {
2161                         bh = head = page_buffers(pvec.pages[i]);
2162                         do {
2163                                 if (buffer_dirty(bh))
2164                                         nilfs_btree_add_dirty_buffer(btree,
2165                                                                      lists, bh);
2166                         } while ((bh = bh->b_this_page) != head);
2167                 }
2168                 pagevec_release(&pvec);
2169                 cond_resched();
2170         }
2171
2172         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2173              level < NILFS_BTREE_LEVEL_MAX;
2174              level++)
2175                 list_splice_tail(&lists[level], listp);
2176 }
2177
2178 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2179                                 struct nilfs_btree_path *path,
2180                                 int level,
2181                                 struct buffer_head **bh,
2182                                 sector_t blocknr,
2183                                 union nilfs_binfo *binfo)
2184 {
2185         struct nilfs_btree_node *parent;
2186         __u64 key;
2187         __u64 ptr;
2188         int ncmax, ret;
2189
2190         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2191         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2192                                        ncmax);
2193         if (buffer_nilfs_node(*bh)) {
2194                 path[level].bp_ctxt.oldkey = ptr;
2195                 path[level].bp_ctxt.newkey = blocknr;
2196                 path[level].bp_ctxt.bh = *bh;
2197                 ret = nilfs_btnode_prepare_change_key(
2198                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2199                         &path[level].bp_ctxt);
2200                 if (ret < 0)
2201                         return ret;
2202                 nilfs_btnode_commit_change_key(
2203                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2204                         &path[level].bp_ctxt);
2205                 *bh = path[level].bp_ctxt.bh;
2206         }
2207
2208         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2209                                  ncmax);
2210
2211         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2212         /* on-disk format */
2213         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2214         binfo->bi_dat.bi_level = level;
2215
2216         return 0;
2217 }
2218
2219 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2220                                 struct nilfs_btree_path *path,
2221                                 int level,
2222                                 struct buffer_head **bh,
2223                                 sector_t blocknr,
2224                                 union nilfs_binfo *binfo)
2225 {
2226         struct nilfs_btree_node *parent;
2227         struct inode *dat = nilfs_bmap_get_dat(btree);
2228         __u64 key;
2229         __u64 ptr;
2230         union nilfs_bmap_ptr_req req;
2231         int ncmax, ret;
2232
2233         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2234         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2235                                        ncmax);
2236         req.bpr_ptr = ptr;
2237         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2238         if (ret < 0)
2239                 return ret;
2240         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2241
2242         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2243         /* on-disk format */
2244         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2245         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2246
2247         return 0;
2248 }
2249
2250 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2251                               struct buffer_head **bh,
2252                               sector_t blocknr,
2253                               union nilfs_binfo *binfo)
2254 {
2255         struct nilfs_btree_path *path;
2256         struct nilfs_btree_node *node;
2257         __u64 key;
2258         int level, ret;
2259
2260         path = nilfs_btree_alloc_path();
2261         if (path == NULL)
2262                 return -ENOMEM;
2263
2264         if (buffer_nilfs_node(*bh)) {
2265                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2266                 key = nilfs_btree_node_get_key(node, 0);
2267                 level = nilfs_btree_node_get_level(node);
2268         } else {
2269                 key = nilfs_bmap_data_get_key(btree, *bh);
2270                 level = NILFS_BTREE_LEVEL_DATA;
2271         }
2272
2273         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2274         if (ret < 0) {
2275                 WARN_ON(ret == -ENOENT);
2276                 goto out;
2277         }
2278
2279         ret = NILFS_BMAP_USE_VBN(btree) ?
2280                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2281                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2282
2283  out:
2284         nilfs_btree_free_path(path);
2285
2286         return ret;
2287 }
2288
2289 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2290                                  struct buffer_head **bh,
2291                                  sector_t blocknr,
2292                                  union nilfs_binfo *binfo)
2293 {
2294         struct nilfs_btree_node *node;
2295         __u64 key;
2296         int ret;
2297
2298         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2299                              blocknr);
2300         if (ret < 0)
2301                 return ret;
2302
2303         if (buffer_nilfs_node(*bh)) {
2304                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2305                 key = nilfs_btree_node_get_key(node, 0);
2306         } else
2307                 key = nilfs_bmap_data_get_key(btree, *bh);
2308
2309         /* on-disk format */
2310         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2311         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2312
2313         return 0;
2314 }
2315
2316 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2317 {
2318         struct buffer_head *bh;
2319         struct nilfs_btree_path *path;
2320         __u64 ptr;
2321         int ret;
2322
2323         path = nilfs_btree_alloc_path();
2324         if (path == NULL)
2325                 return -ENOMEM;
2326
2327         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2328         if (ret < 0) {
2329                 WARN_ON(ret == -ENOENT);
2330                 goto out;
2331         }
2332         ret = nilfs_btree_get_block(btree, ptr, &bh);
2333         if (ret < 0) {
2334                 WARN_ON(ret == -ENOENT);
2335                 goto out;
2336         }
2337
2338         if (!buffer_dirty(bh))
2339                 mark_buffer_dirty(bh);
2340         brelse(bh);
2341         if (!nilfs_bmap_dirty(btree))
2342                 nilfs_bmap_set_dirty(btree);
2343
2344  out:
2345         nilfs_btree_free_path(path);
2346         return ret;
2347 }
2348
2349 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2350         .bop_lookup             =       nilfs_btree_lookup,
2351         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2352         .bop_insert             =       nilfs_btree_insert,
2353         .bop_delete             =       nilfs_btree_delete,
2354         .bop_clear              =       NULL,
2355
2356         .bop_propagate          =       nilfs_btree_propagate,
2357
2358         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2359
2360         .bop_assign             =       nilfs_btree_assign,
2361         .bop_mark               =       nilfs_btree_mark,
2362
2363         .bop_seek_key           =       nilfs_btree_seek_key,
2364         .bop_last_key           =       nilfs_btree_last_key,
2365
2366         .bop_check_insert       =       NULL,
2367         .bop_check_delete       =       nilfs_btree_check_delete,
2368         .bop_gather_data        =       nilfs_btree_gather_data,
2369 };
2370
2371 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2372         .bop_lookup             =       NULL,
2373         .bop_lookup_contig      =       NULL,
2374         .bop_insert             =       NULL,
2375         .bop_delete             =       NULL,
2376         .bop_clear              =       NULL,
2377
2378         .bop_propagate          =       nilfs_btree_propagate_gc,
2379
2380         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2381
2382         .bop_assign             =       nilfs_btree_assign_gc,
2383         .bop_mark               =       NULL,
2384
2385         .bop_seek_key           =       NULL,
2386         .bop_last_key           =       NULL,
2387
2388         .bop_check_insert       =       NULL,
2389         .bop_check_delete       =       NULL,
2390         .bop_gather_data        =       NULL,
2391 };
2392
2393 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2394 {
2395         bmap->b_ops = &nilfs_btree_ops;
2396         bmap->b_nchildren_per_block =
2397                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2398 }
2399
2400 int nilfs_btree_init(struct nilfs_bmap *bmap)
2401 {
2402         int ret = 0;
2403
2404         __nilfs_btree_init(bmap);
2405
2406         if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2407                 ret = -EIO;
2408         else
2409                 ret = nilfs_attach_btree_node_cache(
2410                         &NILFS_BMAP_I(bmap)->vfs_inode);
2411
2412         return ret;
2413 }
2414
2415 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2416 {
2417         bmap->b_ops = &nilfs_btree_ops_gc;
2418         bmap->b_nchildren_per_block =
2419                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2420 }