GNU Linux-libre 4.14.332-gnu1
[releases.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * Written by Koji Sato.
17  */
18
19 #include <linux/slab.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/pagevec.h>
23 #include "nilfs.h"
24 #include "page.h"
25 #include "btnode.h"
26 #include "btree.h"
27 #include "alloc.h"
28 #include "dat.h"
29
30 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
31
32 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
33 {
34         struct nilfs_btree_path *path;
35         int level = NILFS_BTREE_LEVEL_DATA;
36
37         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
38         if (path == NULL)
39                 goto out;
40
41         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
42                 path[level].bp_bh = NULL;
43                 path[level].bp_sib_bh = NULL;
44                 path[level].bp_index = 0;
45                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
46                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
47                 path[level].bp_op = NULL;
48         }
49
50 out:
51         return path;
52 }
53
54 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
55 {
56         int level = NILFS_BTREE_LEVEL_DATA;
57
58         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
59                 brelse(path[level].bp_bh);
60
61         kmem_cache_free(nilfs_btree_path_cache, path);
62 }
63
64 /*
65  * B-tree node operations
66  */
67 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
68                                      __u64 ptr, struct buffer_head **bhp)
69 {
70         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
71         struct address_space *btnc = btnc_inode->i_mapping;
72         struct buffer_head *bh;
73
74         bh = nilfs_btnode_create_block(btnc, ptr);
75         if (!bh)
76                 return -ENOMEM;
77
78         set_buffer_nilfs_volatile(bh);
79         *bhp = bh;
80         return 0;
81 }
82
83 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
84 {
85         return node->bn_flags;
86 }
87
88 static void
89 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
90 {
91         node->bn_flags = flags;
92 }
93
94 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
95 {
96         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
97 }
98
99 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
100 {
101         return node->bn_level;
102 }
103
104 static void
105 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
106 {
107         node->bn_level = level;
108 }
109
110 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
111 {
112         return le16_to_cpu(node->bn_nchildren);
113 }
114
115 static void
116 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
117 {
118         node->bn_nchildren = cpu_to_le16(nchildren);
119 }
120
121 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
122 {
123         return i_blocksize(btree->b_inode);
124 }
125
126 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
127 {
128         return btree->b_nchildren_per_block;
129 }
130
131 static __le64 *
132 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
133 {
134         return (__le64 *)((char *)(node + 1) +
135                           (nilfs_btree_node_root(node) ?
136                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
137 }
138
139 static __le64 *
140 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
141 {
142         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
143 }
144
145 static __u64
146 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
147 {
148         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
149 }
150
151 static void
152 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
153 {
154         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
155 }
156
157 static __u64
158 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
159                          int ncmax)
160 {
161         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
162 }
163
164 static void
165 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
166                          int ncmax)
167 {
168         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
169 }
170
171 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
172                                   int level, int nchildren, int ncmax,
173                                   const __u64 *keys, const __u64 *ptrs)
174 {
175         __le64 *dkeys;
176         __le64 *dptrs;
177         int i;
178
179         nilfs_btree_node_set_flags(node, flags);
180         nilfs_btree_node_set_level(node, level);
181         nilfs_btree_node_set_nchildren(node, nchildren);
182
183         dkeys = nilfs_btree_node_dkeys(node);
184         dptrs = nilfs_btree_node_dptrs(node, ncmax);
185         for (i = 0; i < nchildren; i++) {
186                 dkeys[i] = cpu_to_le64(keys[i]);
187                 dptrs[i] = cpu_to_le64(ptrs[i]);
188         }
189 }
190
191 /* Assume the buffer heads corresponding to left and right are locked. */
192 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
193                                        struct nilfs_btree_node *right,
194                                        int n, int lncmax, int rncmax)
195 {
196         __le64 *ldkeys, *rdkeys;
197         __le64 *ldptrs, *rdptrs;
198         int lnchildren, rnchildren;
199
200         ldkeys = nilfs_btree_node_dkeys(left);
201         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
202         lnchildren = nilfs_btree_node_get_nchildren(left);
203
204         rdkeys = nilfs_btree_node_dkeys(right);
205         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
206         rnchildren = nilfs_btree_node_get_nchildren(right);
207
208         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
209         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
210         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
211         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
212
213         lnchildren += n;
214         rnchildren -= n;
215         nilfs_btree_node_set_nchildren(left, lnchildren);
216         nilfs_btree_node_set_nchildren(right, rnchildren);
217 }
218
219 /* Assume that the buffer heads corresponding to left and right are locked. */
220 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
221                                         struct nilfs_btree_node *right,
222                                         int n, int lncmax, int rncmax)
223 {
224         __le64 *ldkeys, *rdkeys;
225         __le64 *ldptrs, *rdptrs;
226         int lnchildren, rnchildren;
227
228         ldkeys = nilfs_btree_node_dkeys(left);
229         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
230         lnchildren = nilfs_btree_node_get_nchildren(left);
231
232         rdkeys = nilfs_btree_node_dkeys(right);
233         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
234         rnchildren = nilfs_btree_node_get_nchildren(right);
235
236         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
237         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
238         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
239         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
240
241         lnchildren -= n;
242         rnchildren += n;
243         nilfs_btree_node_set_nchildren(left, lnchildren);
244         nilfs_btree_node_set_nchildren(right, rnchildren);
245 }
246
247 /* Assume that the buffer head corresponding to node is locked. */
248 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
249                                     __u64 key, __u64 ptr, int ncmax)
250 {
251         __le64 *dkeys;
252         __le64 *dptrs;
253         int nchildren;
254
255         dkeys = nilfs_btree_node_dkeys(node);
256         dptrs = nilfs_btree_node_dptrs(node, ncmax);
257         nchildren = nilfs_btree_node_get_nchildren(node);
258         if (index < nchildren) {
259                 memmove(dkeys + index + 1, dkeys + index,
260                         (nchildren - index) * sizeof(*dkeys));
261                 memmove(dptrs + index + 1, dptrs + index,
262                         (nchildren - index) * sizeof(*dptrs));
263         }
264         dkeys[index] = cpu_to_le64(key);
265         dptrs[index] = cpu_to_le64(ptr);
266         nchildren++;
267         nilfs_btree_node_set_nchildren(node, nchildren);
268 }
269
270 /* Assume that the buffer head corresponding to node is locked. */
271 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
272                                     __u64 *keyp, __u64 *ptrp, int ncmax)
273 {
274         __u64 key;
275         __u64 ptr;
276         __le64 *dkeys;
277         __le64 *dptrs;
278         int nchildren;
279
280         dkeys = nilfs_btree_node_dkeys(node);
281         dptrs = nilfs_btree_node_dptrs(node, ncmax);
282         key = le64_to_cpu(dkeys[index]);
283         ptr = le64_to_cpu(dptrs[index]);
284         nchildren = nilfs_btree_node_get_nchildren(node);
285         if (keyp != NULL)
286                 *keyp = key;
287         if (ptrp != NULL)
288                 *ptrp = ptr;
289
290         if (index < nchildren - 1) {
291                 memmove(dkeys + index, dkeys + index + 1,
292                         (nchildren - index - 1) * sizeof(*dkeys));
293                 memmove(dptrs + index, dptrs + index + 1,
294                         (nchildren - index - 1) * sizeof(*dptrs));
295         }
296         nchildren--;
297         nilfs_btree_node_set_nchildren(node, nchildren);
298 }
299
300 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
301                                    __u64 key, int *indexp)
302 {
303         __u64 nkey;
304         int index, low, high, s;
305
306         /* binary search */
307         low = 0;
308         high = nilfs_btree_node_get_nchildren(node) - 1;
309         index = 0;
310         s = 0;
311         while (low <= high) {
312                 index = (low + high) / 2;
313                 nkey = nilfs_btree_node_get_key(node, index);
314                 if (nkey == key) {
315                         s = 0;
316                         goto out;
317                 } else if (nkey < key) {
318                         low = index + 1;
319                         s = -1;
320                 } else {
321                         high = index - 1;
322                         s = 1;
323                 }
324         }
325
326         /* adjust index */
327         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
328                 if (s > 0 && index > 0)
329                         index--;
330         } else if (s < 0)
331                 index++;
332
333  out:
334         *indexp = index;
335
336         return s == 0;
337 }
338
339 /**
340  * nilfs_btree_node_broken - verify consistency of btree node
341  * @node: btree node block to be examined
342  * @size: node size (in bytes)
343  * @inode: host inode of btree
344  * @blocknr: block number
345  *
346  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
347  */
348 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
349                                    size_t size, struct inode *inode,
350                                    sector_t blocknr)
351 {
352         int level, flags, nchildren;
353         int ret = 0;
354
355         level = nilfs_btree_node_get_level(node);
356         flags = nilfs_btree_node_get_flags(node);
357         nchildren = nilfs_btree_node_get_nchildren(node);
358
359         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
360                      level >= NILFS_BTREE_LEVEL_MAX ||
361                      (flags & NILFS_BTREE_NODE_ROOT) ||
362                      nchildren < 0 ||
363                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
364                 nilfs_msg(inode->i_sb, KERN_CRIT,
365                           "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
366                           inode->i_ino, (unsigned long long)blocknr, level,
367                           flags, nchildren);
368                 ret = 1;
369         }
370         return ret;
371 }
372
373 /**
374  * nilfs_btree_root_broken - verify consistency of btree root node
375  * @node: btree root node to be examined
376  * @inode: host inode of btree
377  *
378  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
379  */
380 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
381                                    struct inode *inode)
382 {
383         int level, flags, nchildren;
384         int ret = 0;
385
386         level = nilfs_btree_node_get_level(node);
387         flags = nilfs_btree_node_get_flags(node);
388         nchildren = nilfs_btree_node_get_nchildren(node);
389
390         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
391                      level >= NILFS_BTREE_LEVEL_MAX ||
392                      nchildren < 0 ||
393                      nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
394                 nilfs_msg(inode->i_sb, KERN_CRIT,
395                           "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
396                           inode->i_ino, level, flags, nchildren);
397                 ret = 1;
398         }
399         return ret;
400 }
401
402 int nilfs_btree_broken_node_block(struct buffer_head *bh)
403 {
404         struct inode *inode;
405         int ret;
406
407         if (buffer_nilfs_checked(bh))
408                 return 0;
409
410         inode = bh->b_page->mapping->host;
411         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
412                                       bh->b_size, inode, bh->b_blocknr);
413         if (likely(!ret))
414                 set_buffer_nilfs_checked(bh);
415         return ret;
416 }
417
418 static struct nilfs_btree_node *
419 nilfs_btree_get_root(const struct nilfs_bmap *btree)
420 {
421         return (struct nilfs_btree_node *)btree->b_u.u_data;
422 }
423
424 static struct nilfs_btree_node *
425 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
426 {
427         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
428 }
429
430 static struct nilfs_btree_node *
431 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
432 {
433         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
434 }
435
436 static int nilfs_btree_height(const struct nilfs_bmap *btree)
437 {
438         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
439 }
440
441 static struct nilfs_btree_node *
442 nilfs_btree_get_node(const struct nilfs_bmap *btree,
443                      const struct nilfs_btree_path *path,
444                      int level, int *ncmaxp)
445 {
446         struct nilfs_btree_node *node;
447
448         if (level == nilfs_btree_height(btree) - 1) {
449                 node = nilfs_btree_get_root(btree);
450                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
451         } else {
452                 node = nilfs_btree_get_nonroot_node(path, level);
453                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
454         }
455         return node;
456 }
457
458 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
459                                 struct nilfs_btree_node *node, int level)
460 {
461         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
462                 dump_stack();
463                 nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
464                           "btree level mismatch (ino=%lu): %d != %d",
465                           btree->b_inode->i_ino,
466                           nilfs_btree_node_get_level(node), level);
467                 return 1;
468         }
469         return 0;
470 }
471
472 struct nilfs_btree_readahead_info {
473         struct nilfs_btree_node *node;  /* parent node */
474         int max_ra_blocks;              /* max nof blocks to read ahead */
475         int index;                      /* current index on the parent node */
476         int ncmax;                      /* nof children in the parent node */
477 };
478
479 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
480                                    struct buffer_head **bhp,
481                                    const struct nilfs_btree_readahead_info *ra)
482 {
483         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
484         struct address_space *btnc = btnc_inode->i_mapping;
485         struct buffer_head *bh, *ra_bh;
486         sector_t submit_ptr = 0;
487         int ret;
488
489         ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
490                                         &submit_ptr);
491         if (ret) {
492                 if (likely(ret == -EEXIST))
493                         goto out_check;
494                 if (ret == -ENOENT) {
495                         /*
496                          * Block address translation failed due to invalid
497                          * value of 'ptr'.  In this case, return internal code
498                          * -EINVAL (broken bmap) to notify bmap layer of fatal
499                          * metadata corruption.
500                          */
501                         ret = -EINVAL;
502                 }
503                 return ret;
504         }
505
506         if (ra) {
507                 int i, n;
508                 __u64 ptr2;
509
510                 /* read ahead sibling nodes */
511                 for (n = ra->max_ra_blocks, i = ra->index + 1;
512                      n > 0 && i < ra->ncmax; n--, i++) {
513                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
514
515                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
516                                                         REQ_OP_READ, REQ_RAHEAD,
517                                                         &ra_bh, &submit_ptr);
518                         if (likely(!ret || ret == -EEXIST))
519                                 brelse(ra_bh);
520                         else if (ret != -EBUSY)
521                                 break;
522                         if (!buffer_locked(bh))
523                                 goto out_no_wait;
524                 }
525         }
526
527         wait_on_buffer(bh);
528
529  out_no_wait:
530         if (!buffer_uptodate(bh)) {
531                 nilfs_msg(btree->b_inode->i_sb, KERN_ERR,
532                           "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
533                           btree->b_inode->i_ino, (unsigned long long)ptr);
534                 brelse(bh);
535                 return -EIO;
536         }
537
538  out_check:
539         if (nilfs_btree_broken_node_block(bh)) {
540                 clear_buffer_uptodate(bh);
541                 brelse(bh);
542                 return -EINVAL;
543         }
544
545         *bhp = bh;
546         return 0;
547 }
548
549 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
550                                    struct buffer_head **bhp)
551 {
552         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
553 }
554
555 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
556                                  struct nilfs_btree_path *path,
557                                  __u64 key, __u64 *ptrp, int minlevel,
558                                  int readahead)
559 {
560         struct nilfs_btree_node *node;
561         struct nilfs_btree_readahead_info p, *ra;
562         __u64 ptr;
563         int level, index, found, ncmax, ret;
564
565         node = nilfs_btree_get_root(btree);
566         level = nilfs_btree_node_get_level(node);
567         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
568                 return -ENOENT;
569
570         found = nilfs_btree_node_lookup(node, key, &index);
571         ptr = nilfs_btree_node_get_ptr(node, index,
572                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
573         path[level].bp_bh = NULL;
574         path[level].bp_index = index;
575
576         ncmax = nilfs_btree_nchildren_per_block(btree);
577
578         while (--level >= minlevel) {
579                 ra = NULL;
580                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
581                         p.node = nilfs_btree_get_node(btree, path, level + 1,
582                                                       &p.ncmax);
583                         p.index = index;
584                         p.max_ra_blocks = 7;
585                         ra = &p;
586                 }
587                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
588                                               ra);
589                 if (ret < 0)
590                         return ret;
591
592                 node = nilfs_btree_get_nonroot_node(path, level);
593                 if (nilfs_btree_bad_node(btree, node, level))
594                         return -EINVAL;
595                 if (!found)
596                         found = nilfs_btree_node_lookup(node, key, &index);
597                 else
598                         index = 0;
599                 if (index < ncmax) {
600                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
601                 } else {
602                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
603                         /* insert */
604                         ptr = NILFS_BMAP_INVALID_PTR;
605                 }
606                 path[level].bp_index = index;
607         }
608         if (!found)
609                 return -ENOENT;
610
611         if (ptrp != NULL)
612                 *ptrp = ptr;
613
614         return 0;
615 }
616
617 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
618                                       struct nilfs_btree_path *path,
619                                       __u64 *keyp, __u64 *ptrp)
620 {
621         struct nilfs_btree_node *node;
622         __u64 ptr;
623         int index, level, ncmax, ret;
624
625         node = nilfs_btree_get_root(btree);
626         index = nilfs_btree_node_get_nchildren(node) - 1;
627         if (index < 0)
628                 return -ENOENT;
629         level = nilfs_btree_node_get_level(node);
630         ptr = nilfs_btree_node_get_ptr(node, index,
631                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
632         path[level].bp_bh = NULL;
633         path[level].bp_index = index;
634         ncmax = nilfs_btree_nchildren_per_block(btree);
635
636         for (level--; level > 0; level--) {
637                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
638                 if (ret < 0)
639                         return ret;
640                 node = nilfs_btree_get_nonroot_node(path, level);
641                 if (nilfs_btree_bad_node(btree, node, level))
642                         return -EINVAL;
643                 index = nilfs_btree_node_get_nchildren(node) - 1;
644                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
645                 path[level].bp_index = index;
646         }
647
648         if (keyp != NULL)
649                 *keyp = nilfs_btree_node_get_key(node, index);
650         if (ptrp != NULL)
651                 *ptrp = ptr;
652
653         return 0;
654 }
655
656 /**
657  * nilfs_btree_get_next_key - get next valid key from btree path array
658  * @btree: bmap struct of btree
659  * @path: array of nilfs_btree_path struct
660  * @minlevel: start level
661  * @nextkey: place to store the next valid key
662  *
663  * Return Value: If a next key was found, 0 is returned. Otherwise,
664  * -ENOENT is returned.
665  */
666 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
667                                     const struct nilfs_btree_path *path,
668                                     int minlevel, __u64 *nextkey)
669 {
670         struct nilfs_btree_node *node;
671         int maxlevel = nilfs_btree_height(btree) - 1;
672         int index, next_adj, level;
673
674         /* Next index is already set to bp_index for leaf nodes. */
675         next_adj = 0;
676         for (level = minlevel; level <= maxlevel; level++) {
677                 if (level == maxlevel)
678                         node = nilfs_btree_get_root(btree);
679                 else
680                         node = nilfs_btree_get_nonroot_node(path, level);
681
682                 index = path[level].bp_index + next_adj;
683                 if (index < nilfs_btree_node_get_nchildren(node)) {
684                         /* Next key is in this node */
685                         *nextkey = nilfs_btree_node_get_key(node, index);
686                         return 0;
687                 }
688                 /* For non-leaf nodes, next index is stored at bp_index + 1. */
689                 next_adj = 1;
690         }
691         return -ENOENT;
692 }
693
694 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
695                               __u64 key, int level, __u64 *ptrp)
696 {
697         struct nilfs_btree_path *path;
698         int ret;
699
700         path = nilfs_btree_alloc_path();
701         if (path == NULL)
702                 return -ENOMEM;
703
704         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
705
706         nilfs_btree_free_path(path);
707
708         return ret;
709 }
710
711 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
712                                      __u64 key, __u64 *ptrp,
713                                      unsigned int maxblocks)
714 {
715         struct nilfs_btree_path *path;
716         struct nilfs_btree_node *node;
717         struct inode *dat = NULL;
718         __u64 ptr, ptr2;
719         sector_t blocknr;
720         int level = NILFS_BTREE_LEVEL_NODE_MIN;
721         int ret, cnt, index, maxlevel, ncmax;
722         struct nilfs_btree_readahead_info p;
723
724         path = nilfs_btree_alloc_path();
725         if (path == NULL)
726                 return -ENOMEM;
727
728         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
729         if (ret < 0)
730                 goto out;
731
732         if (NILFS_BMAP_USE_VBN(btree)) {
733                 dat = nilfs_bmap_get_dat(btree);
734                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
735                 if (ret < 0)
736                         goto out;
737                 ptr = blocknr;
738         }
739         cnt = 1;
740         if (cnt == maxblocks)
741                 goto end;
742
743         maxlevel = nilfs_btree_height(btree) - 1;
744         node = nilfs_btree_get_node(btree, path, level, &ncmax);
745         index = path[level].bp_index + 1;
746         for (;;) {
747                 while (index < nilfs_btree_node_get_nchildren(node)) {
748                         if (nilfs_btree_node_get_key(node, index) !=
749                             key + cnt)
750                                 goto end;
751                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
752                         if (dat) {
753                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
754                                 if (ret < 0)
755                                         goto out;
756                                 ptr2 = blocknr;
757                         }
758                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
759                                 goto end;
760                         index++;
761                         continue;
762                 }
763                 if (level == maxlevel)
764                         break;
765
766                 /* look-up right sibling node */
767                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
768                 p.index = path[level + 1].bp_index + 1;
769                 p.max_ra_blocks = 7;
770                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
771                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
772                         break;
773                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
774                 path[level + 1].bp_index = p.index;
775
776                 brelse(path[level].bp_bh);
777                 path[level].bp_bh = NULL;
778
779                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
780                                               &p);
781                 if (ret < 0)
782                         goto out;
783                 node = nilfs_btree_get_nonroot_node(path, level);
784                 ncmax = nilfs_btree_nchildren_per_block(btree);
785                 index = 0;
786                 path[level].bp_index = index;
787         }
788  end:
789         *ptrp = ptr;
790         ret = cnt;
791  out:
792         nilfs_btree_free_path(path);
793         return ret;
794 }
795
796 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
797                                     struct nilfs_btree_path *path,
798                                     int level, __u64 key)
799 {
800         if (level < nilfs_btree_height(btree) - 1) {
801                 do {
802                         nilfs_btree_node_set_key(
803                                 nilfs_btree_get_nonroot_node(path, level),
804                                 path[level].bp_index, key);
805                         if (!buffer_dirty(path[level].bp_bh))
806                                 mark_buffer_dirty(path[level].bp_bh);
807                 } while ((path[level].bp_index == 0) &&
808                          (++level < nilfs_btree_height(btree) - 1));
809         }
810
811         /* root */
812         if (level == nilfs_btree_height(btree) - 1) {
813                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
814                                          path[level].bp_index, key);
815         }
816 }
817
818 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
819                                   struct nilfs_btree_path *path,
820                                   int level, __u64 *keyp, __u64 *ptrp)
821 {
822         struct nilfs_btree_node *node;
823         int ncblk;
824
825         if (level < nilfs_btree_height(btree) - 1) {
826                 node = nilfs_btree_get_nonroot_node(path, level);
827                 ncblk = nilfs_btree_nchildren_per_block(btree);
828                 nilfs_btree_node_insert(node, path[level].bp_index,
829                                         *keyp, *ptrp, ncblk);
830                 if (!buffer_dirty(path[level].bp_bh))
831                         mark_buffer_dirty(path[level].bp_bh);
832
833                 if (path[level].bp_index == 0)
834                         nilfs_btree_promote_key(btree, path, level + 1,
835                                                 nilfs_btree_node_get_key(node,
836                                                                          0));
837         } else {
838                 node = nilfs_btree_get_root(btree);
839                 nilfs_btree_node_insert(node, path[level].bp_index,
840                                         *keyp, *ptrp,
841                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
842         }
843 }
844
845 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
846                                    struct nilfs_btree_path *path,
847                                    int level, __u64 *keyp, __u64 *ptrp)
848 {
849         struct nilfs_btree_node *node, *left;
850         int nchildren, lnchildren, n, move, ncblk;
851
852         node = nilfs_btree_get_nonroot_node(path, level);
853         left = nilfs_btree_get_sib_node(path, level);
854         nchildren = nilfs_btree_node_get_nchildren(node);
855         lnchildren = nilfs_btree_node_get_nchildren(left);
856         ncblk = nilfs_btree_nchildren_per_block(btree);
857         move = 0;
858
859         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
860         if (n > path[level].bp_index) {
861                 /* move insert point */
862                 n--;
863                 move = 1;
864         }
865
866         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
867
868         if (!buffer_dirty(path[level].bp_bh))
869                 mark_buffer_dirty(path[level].bp_bh);
870         if (!buffer_dirty(path[level].bp_sib_bh))
871                 mark_buffer_dirty(path[level].bp_sib_bh);
872
873         nilfs_btree_promote_key(btree, path, level + 1,
874                                 nilfs_btree_node_get_key(node, 0));
875
876         if (move) {
877                 brelse(path[level].bp_bh);
878                 path[level].bp_bh = path[level].bp_sib_bh;
879                 path[level].bp_sib_bh = NULL;
880                 path[level].bp_index += lnchildren;
881                 path[level + 1].bp_index--;
882         } else {
883                 brelse(path[level].bp_sib_bh);
884                 path[level].bp_sib_bh = NULL;
885                 path[level].bp_index -= n;
886         }
887
888         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
889 }
890
891 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
892                                     struct nilfs_btree_path *path,
893                                     int level, __u64 *keyp, __u64 *ptrp)
894 {
895         struct nilfs_btree_node *node, *right;
896         int nchildren, rnchildren, n, move, ncblk;
897
898         node = nilfs_btree_get_nonroot_node(path, level);
899         right = nilfs_btree_get_sib_node(path, level);
900         nchildren = nilfs_btree_node_get_nchildren(node);
901         rnchildren = nilfs_btree_node_get_nchildren(right);
902         ncblk = nilfs_btree_nchildren_per_block(btree);
903         move = 0;
904
905         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
906         if (n > nchildren - path[level].bp_index) {
907                 /* move insert point */
908                 n--;
909                 move = 1;
910         }
911
912         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
913
914         if (!buffer_dirty(path[level].bp_bh))
915                 mark_buffer_dirty(path[level].bp_bh);
916         if (!buffer_dirty(path[level].bp_sib_bh))
917                 mark_buffer_dirty(path[level].bp_sib_bh);
918
919         path[level + 1].bp_index++;
920         nilfs_btree_promote_key(btree, path, level + 1,
921                                 nilfs_btree_node_get_key(right, 0));
922         path[level + 1].bp_index--;
923
924         if (move) {
925                 brelse(path[level].bp_bh);
926                 path[level].bp_bh = path[level].bp_sib_bh;
927                 path[level].bp_sib_bh = NULL;
928                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
929                 path[level + 1].bp_index++;
930         } else {
931                 brelse(path[level].bp_sib_bh);
932                 path[level].bp_sib_bh = NULL;
933         }
934
935         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
936 }
937
938 static void nilfs_btree_split(struct nilfs_bmap *btree,
939                               struct nilfs_btree_path *path,
940                               int level, __u64 *keyp, __u64 *ptrp)
941 {
942         struct nilfs_btree_node *node, *right;
943         int nchildren, n, move, ncblk;
944
945         node = nilfs_btree_get_nonroot_node(path, level);
946         right = nilfs_btree_get_sib_node(path, level);
947         nchildren = nilfs_btree_node_get_nchildren(node);
948         ncblk = nilfs_btree_nchildren_per_block(btree);
949         move = 0;
950
951         n = (nchildren + 1) / 2;
952         if (n > nchildren - path[level].bp_index) {
953                 n--;
954                 move = 1;
955         }
956
957         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
958
959         if (!buffer_dirty(path[level].bp_bh))
960                 mark_buffer_dirty(path[level].bp_bh);
961         if (!buffer_dirty(path[level].bp_sib_bh))
962                 mark_buffer_dirty(path[level].bp_sib_bh);
963
964         if (move) {
965                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
966                 nilfs_btree_node_insert(right, path[level].bp_index,
967                                         *keyp, *ptrp, ncblk);
968
969                 *keyp = nilfs_btree_node_get_key(right, 0);
970                 *ptrp = path[level].bp_newreq.bpr_ptr;
971
972                 brelse(path[level].bp_bh);
973                 path[level].bp_bh = path[level].bp_sib_bh;
974                 path[level].bp_sib_bh = NULL;
975         } else {
976                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
977
978                 *keyp = nilfs_btree_node_get_key(right, 0);
979                 *ptrp = path[level].bp_newreq.bpr_ptr;
980
981                 brelse(path[level].bp_sib_bh);
982                 path[level].bp_sib_bh = NULL;
983         }
984
985         path[level + 1].bp_index++;
986 }
987
988 static void nilfs_btree_grow(struct nilfs_bmap *btree,
989                              struct nilfs_btree_path *path,
990                              int level, __u64 *keyp, __u64 *ptrp)
991 {
992         struct nilfs_btree_node *root, *child;
993         int n, ncblk;
994
995         root = nilfs_btree_get_root(btree);
996         child = nilfs_btree_get_sib_node(path, level);
997         ncblk = nilfs_btree_nchildren_per_block(btree);
998
999         n = nilfs_btree_node_get_nchildren(root);
1000
1001         nilfs_btree_node_move_right(root, child, n,
1002                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1003         nilfs_btree_node_set_level(root, level + 1);
1004
1005         if (!buffer_dirty(path[level].bp_sib_bh))
1006                 mark_buffer_dirty(path[level].bp_sib_bh);
1007
1008         path[level].bp_bh = path[level].bp_sib_bh;
1009         path[level].bp_sib_bh = NULL;
1010
1011         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1012
1013         *keyp = nilfs_btree_node_get_key(child, 0);
1014         *ptrp = path[level].bp_newreq.bpr_ptr;
1015 }
1016
1017 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1018                                    const struct nilfs_btree_path *path)
1019 {
1020         struct nilfs_btree_node *node;
1021         int level, ncmax;
1022
1023         if (path == NULL)
1024                 return NILFS_BMAP_INVALID_PTR;
1025
1026         /* left sibling */
1027         level = NILFS_BTREE_LEVEL_NODE_MIN;
1028         if (path[level].bp_index > 0) {
1029                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1030                 return nilfs_btree_node_get_ptr(node,
1031                                                 path[level].bp_index - 1,
1032                                                 ncmax);
1033         }
1034
1035         /* parent */
1036         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1037         if (level <= nilfs_btree_height(btree) - 1) {
1038                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1039                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1040                                                 ncmax);
1041         }
1042
1043         return NILFS_BMAP_INVALID_PTR;
1044 }
1045
1046 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1047                                        const struct nilfs_btree_path *path,
1048                                        __u64 key)
1049 {
1050         __u64 ptr;
1051
1052         ptr = nilfs_bmap_find_target_seq(btree, key);
1053         if (ptr != NILFS_BMAP_INVALID_PTR)
1054                 /* sequential access */
1055                 return ptr;
1056
1057         ptr = nilfs_btree_find_near(btree, path);
1058         if (ptr != NILFS_BMAP_INVALID_PTR)
1059                 /* near */
1060                 return ptr;
1061
1062         /* block group */
1063         return nilfs_bmap_find_target_in_group(btree);
1064 }
1065
1066 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1067                                       struct nilfs_btree_path *path,
1068                                       int *levelp, __u64 key, __u64 ptr,
1069                                       struct nilfs_bmap_stats *stats)
1070 {
1071         struct buffer_head *bh;
1072         struct nilfs_btree_node *node, *parent, *sib;
1073         __u64 sibptr;
1074         int pindex, level, ncmax, ncblk, ret;
1075         struct inode *dat = NULL;
1076
1077         stats->bs_nblocks = 0;
1078         level = NILFS_BTREE_LEVEL_DATA;
1079
1080         /* allocate a new ptr for data block */
1081         if (NILFS_BMAP_USE_VBN(btree)) {
1082                 path[level].bp_newreq.bpr_ptr =
1083                         nilfs_btree_find_target_v(btree, path, key);
1084                 dat = nilfs_bmap_get_dat(btree);
1085         }
1086
1087         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1088         if (ret < 0)
1089                 goto err_out_data;
1090
1091         ncblk = nilfs_btree_nchildren_per_block(btree);
1092
1093         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1094              level < nilfs_btree_height(btree) - 1;
1095              level++) {
1096                 node = nilfs_btree_get_nonroot_node(path, level);
1097                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1098                         path[level].bp_op = nilfs_btree_do_insert;
1099                         stats->bs_nblocks++;
1100                         goto out;
1101                 }
1102
1103                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1104                 pindex = path[level + 1].bp_index;
1105
1106                 /* left sibling */
1107                 if (pindex > 0) {
1108                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1109                                                           ncmax);
1110                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1111                         if (ret < 0)
1112                                 goto err_out_child_node;
1113                         sib = (struct nilfs_btree_node *)bh->b_data;
1114                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1115                                 path[level].bp_sib_bh = bh;
1116                                 path[level].bp_op = nilfs_btree_carry_left;
1117                                 stats->bs_nblocks++;
1118                                 goto out;
1119                         } else {
1120                                 brelse(bh);
1121                         }
1122                 }
1123
1124                 /* right sibling */
1125                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1126                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1127                                                           ncmax);
1128                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1129                         if (ret < 0)
1130                                 goto err_out_child_node;
1131                         sib = (struct nilfs_btree_node *)bh->b_data;
1132                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1133                                 path[level].bp_sib_bh = bh;
1134                                 path[level].bp_op = nilfs_btree_carry_right;
1135                                 stats->bs_nblocks++;
1136                                 goto out;
1137                         } else {
1138                                 brelse(bh);
1139                         }
1140                 }
1141
1142                 /* split */
1143                 path[level].bp_newreq.bpr_ptr =
1144                         path[level - 1].bp_newreq.bpr_ptr + 1;
1145                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1146                                                    &path[level].bp_newreq, dat);
1147                 if (ret < 0)
1148                         goto err_out_child_node;
1149                 ret = nilfs_btree_get_new_block(btree,
1150                                                 path[level].bp_newreq.bpr_ptr,
1151                                                 &bh);
1152                 if (ret < 0)
1153                         goto err_out_curr_node;
1154
1155                 stats->bs_nblocks++;
1156
1157                 sib = (struct nilfs_btree_node *)bh->b_data;
1158                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1159                 path[level].bp_sib_bh = bh;
1160                 path[level].bp_op = nilfs_btree_split;
1161         }
1162
1163         /* root */
1164         node = nilfs_btree_get_root(btree);
1165         if (nilfs_btree_node_get_nchildren(node) <
1166             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1167                 path[level].bp_op = nilfs_btree_do_insert;
1168                 stats->bs_nblocks++;
1169                 goto out;
1170         }
1171
1172         /* grow */
1173         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1174         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1175         if (ret < 0)
1176                 goto err_out_child_node;
1177         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1178                                         &bh);
1179         if (ret < 0)
1180                 goto err_out_curr_node;
1181
1182         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1183                               0, level, 0, ncblk, NULL, NULL);
1184         path[level].bp_sib_bh = bh;
1185         path[level].bp_op = nilfs_btree_grow;
1186
1187         level++;
1188         path[level].bp_op = nilfs_btree_do_insert;
1189
1190         /* a newly-created node block and a data block are added */
1191         stats->bs_nblocks += 2;
1192
1193         /* success */
1194  out:
1195         *levelp = level;
1196         return ret;
1197
1198         /* error */
1199  err_out_curr_node:
1200         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1201  err_out_child_node:
1202         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1203                 nilfs_btnode_delete(path[level].bp_sib_bh);
1204                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1205
1206         }
1207
1208         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1209  err_out_data:
1210         *levelp = level;
1211         stats->bs_nblocks = 0;
1212         return ret;
1213 }
1214
1215 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1216                                       struct nilfs_btree_path *path,
1217                                       int maxlevel, __u64 key, __u64 ptr)
1218 {
1219         struct inode *dat = NULL;
1220         int level;
1221
1222         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1223         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1224         if (NILFS_BMAP_USE_VBN(btree)) {
1225                 nilfs_bmap_set_target_v(btree, key, ptr);
1226                 dat = nilfs_bmap_get_dat(btree);
1227         }
1228
1229         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1230                 nilfs_bmap_commit_alloc_ptr(btree,
1231                                             &path[level - 1].bp_newreq, dat);
1232                 path[level].bp_op(btree, path, level, &key, &ptr);
1233         }
1234
1235         if (!nilfs_bmap_dirty(btree))
1236                 nilfs_bmap_set_dirty(btree);
1237 }
1238
1239 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1240 {
1241         struct nilfs_btree_path *path;
1242         struct nilfs_bmap_stats stats;
1243         int level, ret;
1244
1245         path = nilfs_btree_alloc_path();
1246         if (path == NULL)
1247                 return -ENOMEM;
1248
1249         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1250                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1251         if (ret != -ENOENT) {
1252                 if (ret == 0)
1253                         ret = -EEXIST;
1254                 goto out;
1255         }
1256
1257         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1258         if (ret < 0)
1259                 goto out;
1260         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1261         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1262
1263  out:
1264         nilfs_btree_free_path(path);
1265         return ret;
1266 }
1267
1268 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1269                                   struct nilfs_btree_path *path,
1270                                   int level, __u64 *keyp, __u64 *ptrp)
1271 {
1272         struct nilfs_btree_node *node;
1273         int ncblk;
1274
1275         if (level < nilfs_btree_height(btree) - 1) {
1276                 node = nilfs_btree_get_nonroot_node(path, level);
1277                 ncblk = nilfs_btree_nchildren_per_block(btree);
1278                 nilfs_btree_node_delete(node, path[level].bp_index,
1279                                         keyp, ptrp, ncblk);
1280                 if (!buffer_dirty(path[level].bp_bh))
1281                         mark_buffer_dirty(path[level].bp_bh);
1282                 if (path[level].bp_index == 0)
1283                         nilfs_btree_promote_key(btree, path, level + 1,
1284                                 nilfs_btree_node_get_key(node, 0));
1285         } else {
1286                 node = nilfs_btree_get_root(btree);
1287                 nilfs_btree_node_delete(node, path[level].bp_index,
1288                                         keyp, ptrp,
1289                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1290         }
1291 }
1292
1293 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1294                                     struct nilfs_btree_path *path,
1295                                     int level, __u64 *keyp, __u64 *ptrp)
1296 {
1297         struct nilfs_btree_node *node, *left;
1298         int nchildren, lnchildren, n, ncblk;
1299
1300         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1301
1302         node = nilfs_btree_get_nonroot_node(path, level);
1303         left = nilfs_btree_get_sib_node(path, level);
1304         nchildren = nilfs_btree_node_get_nchildren(node);
1305         lnchildren = nilfs_btree_node_get_nchildren(left);
1306         ncblk = nilfs_btree_nchildren_per_block(btree);
1307
1308         n = (nchildren + lnchildren) / 2 - nchildren;
1309
1310         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1311
1312         if (!buffer_dirty(path[level].bp_bh))
1313                 mark_buffer_dirty(path[level].bp_bh);
1314         if (!buffer_dirty(path[level].bp_sib_bh))
1315                 mark_buffer_dirty(path[level].bp_sib_bh);
1316
1317         nilfs_btree_promote_key(btree, path, level + 1,
1318                                 nilfs_btree_node_get_key(node, 0));
1319
1320         brelse(path[level].bp_sib_bh);
1321         path[level].bp_sib_bh = NULL;
1322         path[level].bp_index += n;
1323 }
1324
1325 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1326                                      struct nilfs_btree_path *path,
1327                                      int level, __u64 *keyp, __u64 *ptrp)
1328 {
1329         struct nilfs_btree_node *node, *right;
1330         int nchildren, rnchildren, n, ncblk;
1331
1332         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1333
1334         node = nilfs_btree_get_nonroot_node(path, level);
1335         right = nilfs_btree_get_sib_node(path, level);
1336         nchildren = nilfs_btree_node_get_nchildren(node);
1337         rnchildren = nilfs_btree_node_get_nchildren(right);
1338         ncblk = nilfs_btree_nchildren_per_block(btree);
1339
1340         n = (nchildren + rnchildren) / 2 - nchildren;
1341
1342         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1343
1344         if (!buffer_dirty(path[level].bp_bh))
1345                 mark_buffer_dirty(path[level].bp_bh);
1346         if (!buffer_dirty(path[level].bp_sib_bh))
1347                 mark_buffer_dirty(path[level].bp_sib_bh);
1348
1349         path[level + 1].bp_index++;
1350         nilfs_btree_promote_key(btree, path, level + 1,
1351                                 nilfs_btree_node_get_key(right, 0));
1352         path[level + 1].bp_index--;
1353
1354         brelse(path[level].bp_sib_bh);
1355         path[level].bp_sib_bh = NULL;
1356 }
1357
1358 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1359                                     struct nilfs_btree_path *path,
1360                                     int level, __u64 *keyp, __u64 *ptrp)
1361 {
1362         struct nilfs_btree_node *node, *left;
1363         int n, ncblk;
1364
1365         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1366
1367         node = nilfs_btree_get_nonroot_node(path, level);
1368         left = nilfs_btree_get_sib_node(path, level);
1369         ncblk = nilfs_btree_nchildren_per_block(btree);
1370
1371         n = nilfs_btree_node_get_nchildren(node);
1372
1373         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1374
1375         if (!buffer_dirty(path[level].bp_sib_bh))
1376                 mark_buffer_dirty(path[level].bp_sib_bh);
1377
1378         nilfs_btnode_delete(path[level].bp_bh);
1379         path[level].bp_bh = path[level].bp_sib_bh;
1380         path[level].bp_sib_bh = NULL;
1381         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1382 }
1383
1384 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1385                                      struct nilfs_btree_path *path,
1386                                      int level, __u64 *keyp, __u64 *ptrp)
1387 {
1388         struct nilfs_btree_node *node, *right;
1389         int n, ncblk;
1390
1391         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1392
1393         node = nilfs_btree_get_nonroot_node(path, level);
1394         right = nilfs_btree_get_sib_node(path, level);
1395         ncblk = nilfs_btree_nchildren_per_block(btree);
1396
1397         n = nilfs_btree_node_get_nchildren(right);
1398
1399         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1400
1401         if (!buffer_dirty(path[level].bp_bh))
1402                 mark_buffer_dirty(path[level].bp_bh);
1403
1404         nilfs_btnode_delete(path[level].bp_sib_bh);
1405         path[level].bp_sib_bh = NULL;
1406         path[level + 1].bp_index++;
1407 }
1408
1409 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1410                                struct nilfs_btree_path *path,
1411                                int level, __u64 *keyp, __u64 *ptrp)
1412 {
1413         struct nilfs_btree_node *root, *child;
1414         int n, ncblk;
1415
1416         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1417
1418         root = nilfs_btree_get_root(btree);
1419         child = nilfs_btree_get_nonroot_node(path, level);
1420         ncblk = nilfs_btree_nchildren_per_block(btree);
1421
1422         nilfs_btree_node_delete(root, 0, NULL, NULL,
1423                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1424         nilfs_btree_node_set_level(root, level);
1425         n = nilfs_btree_node_get_nchildren(child);
1426         nilfs_btree_node_move_left(root, child, n,
1427                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1428
1429         nilfs_btnode_delete(path[level].bp_bh);
1430         path[level].bp_bh = NULL;
1431 }
1432
1433 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1434                             struct nilfs_btree_path *path,
1435                             int level, __u64 *keyp, __u64 *ptrp)
1436 {
1437 }
1438
1439 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1440                                       struct nilfs_btree_path *path,
1441                                       int *levelp,
1442                                       struct nilfs_bmap_stats *stats,
1443                                       struct inode *dat)
1444 {
1445         struct buffer_head *bh;
1446         struct nilfs_btree_node *node, *parent, *sib;
1447         __u64 sibptr;
1448         int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1449
1450         ret = 0;
1451         stats->bs_nblocks = 0;
1452         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1453         ncblk = nilfs_btree_nchildren_per_block(btree);
1454
1455         for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1456              level < nilfs_btree_height(btree) - 1;
1457              level++) {
1458                 node = nilfs_btree_get_nonroot_node(path, level);
1459                 path[level].bp_oldreq.bpr_ptr =
1460                         nilfs_btree_node_get_ptr(node, dindex, ncblk);
1461                 ret = nilfs_bmap_prepare_end_ptr(btree,
1462                                                  &path[level].bp_oldreq, dat);
1463                 if (ret < 0)
1464                         goto err_out_child_node;
1465
1466                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1467                         path[level].bp_op = nilfs_btree_do_delete;
1468                         stats->bs_nblocks++;
1469                         goto out;
1470                 }
1471
1472                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1473                 pindex = path[level + 1].bp_index;
1474                 dindex = pindex;
1475
1476                 if (pindex > 0) {
1477                         /* left sibling */
1478                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1479                                                           ncmax);
1480                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1481                         if (ret < 0)
1482                                 goto err_out_curr_node;
1483                         sib = (struct nilfs_btree_node *)bh->b_data;
1484                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1485                                 path[level].bp_sib_bh = bh;
1486                                 path[level].bp_op = nilfs_btree_borrow_left;
1487                                 stats->bs_nblocks++;
1488                                 goto out;
1489                         } else {
1490                                 path[level].bp_sib_bh = bh;
1491                                 path[level].bp_op = nilfs_btree_concat_left;
1492                                 stats->bs_nblocks++;
1493                                 /* continue; */
1494                         }
1495                 } else if (pindex <
1496                            nilfs_btree_node_get_nchildren(parent) - 1) {
1497                         /* right sibling */
1498                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1499                                                           ncmax);
1500                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1501                         if (ret < 0)
1502                                 goto err_out_curr_node;
1503                         sib = (struct nilfs_btree_node *)bh->b_data;
1504                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1505                                 path[level].bp_sib_bh = bh;
1506                                 path[level].bp_op = nilfs_btree_borrow_right;
1507                                 stats->bs_nblocks++;
1508                                 goto out;
1509                         } else {
1510                                 path[level].bp_sib_bh = bh;
1511                                 path[level].bp_op = nilfs_btree_concat_right;
1512                                 stats->bs_nblocks++;
1513                                 /*
1514                                  * When merging right sibling node
1515                                  * into the current node, pointer to
1516                                  * the right sibling node must be
1517                                  * terminated instead.  The adjustment
1518                                  * below is required for that.
1519                                  */
1520                                 dindex = pindex + 1;
1521                                 /* continue; */
1522                         }
1523                 } else {
1524                         /* no siblings */
1525                         /* the only child of the root node */
1526                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1527                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1528                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1529                                 path[level].bp_op = nilfs_btree_shrink;
1530                                 stats->bs_nblocks += 2;
1531                                 level++;
1532                                 path[level].bp_op = nilfs_btree_nop;
1533                                 goto shrink_root_child;
1534                         } else {
1535                                 path[level].bp_op = nilfs_btree_do_delete;
1536                                 stats->bs_nblocks++;
1537                                 goto out;
1538                         }
1539                 }
1540         }
1541
1542         /* child of the root node is deleted */
1543         path[level].bp_op = nilfs_btree_do_delete;
1544         stats->bs_nblocks++;
1545
1546 shrink_root_child:
1547         node = nilfs_btree_get_root(btree);
1548         path[level].bp_oldreq.bpr_ptr =
1549                 nilfs_btree_node_get_ptr(node, dindex,
1550                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1551
1552         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1553         if (ret < 0)
1554                 goto err_out_child_node;
1555
1556         /* success */
1557  out:
1558         *levelp = level;
1559         return ret;
1560
1561         /* error */
1562  err_out_curr_node:
1563         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1564  err_out_child_node:
1565         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1566                 brelse(path[level].bp_sib_bh);
1567                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1568         }
1569         *levelp = level;
1570         stats->bs_nblocks = 0;
1571         return ret;
1572 }
1573
1574 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1575                                       struct nilfs_btree_path *path,
1576                                       int maxlevel, struct inode *dat)
1577 {
1578         int level;
1579
1580         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1581                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1582                 path[level].bp_op(btree, path, level, NULL, NULL);
1583         }
1584
1585         if (!nilfs_bmap_dirty(btree))
1586                 nilfs_bmap_set_dirty(btree);
1587 }
1588
1589 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1590
1591 {
1592         struct nilfs_btree_path *path;
1593         struct nilfs_bmap_stats stats;
1594         struct inode *dat;
1595         int level, ret;
1596
1597         path = nilfs_btree_alloc_path();
1598         if (path == NULL)
1599                 return -ENOMEM;
1600
1601         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1602                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1603         if (ret < 0)
1604                 goto out;
1605
1606
1607         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1608
1609         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1610         if (ret < 0)
1611                 goto out;
1612         nilfs_btree_commit_delete(btree, path, level, dat);
1613         nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1614
1615 out:
1616         nilfs_btree_free_path(path);
1617         return ret;
1618 }
1619
1620 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1621                                 __u64 *keyp)
1622 {
1623         struct nilfs_btree_path *path;
1624         const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1625         int ret;
1626
1627         path = nilfs_btree_alloc_path();
1628         if (!path)
1629                 return -ENOMEM;
1630
1631         ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1632         if (!ret)
1633                 *keyp = start;
1634         else if (ret == -ENOENT)
1635                 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1636
1637         nilfs_btree_free_path(path);
1638         return ret;
1639 }
1640
1641 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1642 {
1643         struct nilfs_btree_path *path;
1644         int ret;
1645
1646         path = nilfs_btree_alloc_path();
1647         if (path == NULL)
1648                 return -ENOMEM;
1649
1650         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1651
1652         nilfs_btree_free_path(path);
1653
1654         return ret;
1655 }
1656
1657 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1658 {
1659         struct buffer_head *bh;
1660         struct nilfs_btree_node *root, *node;
1661         __u64 maxkey, nextmaxkey;
1662         __u64 ptr;
1663         int nchildren, ret;
1664
1665         root = nilfs_btree_get_root(btree);
1666         switch (nilfs_btree_height(btree)) {
1667         case 2:
1668                 bh = NULL;
1669                 node = root;
1670                 break;
1671         case 3:
1672                 nchildren = nilfs_btree_node_get_nchildren(root);
1673                 if (nchildren > 1)
1674                         return 0;
1675                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1676                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1677                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1678                 if (ret < 0)
1679                         return ret;
1680                 node = (struct nilfs_btree_node *)bh->b_data;
1681                 break;
1682         default:
1683                 return 0;
1684         }
1685
1686         nchildren = nilfs_btree_node_get_nchildren(node);
1687         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1688         nextmaxkey = (nchildren > 1) ?
1689                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1690         if (bh != NULL)
1691                 brelse(bh);
1692
1693         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1694 }
1695
1696 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1697                                    __u64 *keys, __u64 *ptrs, int nitems)
1698 {
1699         struct buffer_head *bh;
1700         struct nilfs_btree_node *node, *root;
1701         __le64 *dkeys;
1702         __le64 *dptrs;
1703         __u64 ptr;
1704         int nchildren, ncmax, i, ret;
1705
1706         root = nilfs_btree_get_root(btree);
1707         switch (nilfs_btree_height(btree)) {
1708         case 2:
1709                 bh = NULL;
1710                 node = root;
1711                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1712                 break;
1713         case 3:
1714                 nchildren = nilfs_btree_node_get_nchildren(root);
1715                 WARN_ON(nchildren > 1);
1716                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1717                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1718                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1719                 if (ret < 0)
1720                         return ret;
1721                 node = (struct nilfs_btree_node *)bh->b_data;
1722                 ncmax = nilfs_btree_nchildren_per_block(btree);
1723                 break;
1724         default:
1725                 node = NULL;
1726                 return -EINVAL;
1727         }
1728
1729         nchildren = nilfs_btree_node_get_nchildren(node);
1730         if (nchildren < nitems)
1731                 nitems = nchildren;
1732         dkeys = nilfs_btree_node_dkeys(node);
1733         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1734         for (i = 0; i < nitems; i++) {
1735                 keys[i] = le64_to_cpu(dkeys[i]);
1736                 ptrs[i] = le64_to_cpu(dptrs[i]);
1737         }
1738
1739         if (bh != NULL)
1740                 brelse(bh);
1741
1742         return nitems;
1743 }
1744
1745 static int
1746 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1747                                        union nilfs_bmap_ptr_req *dreq,
1748                                        union nilfs_bmap_ptr_req *nreq,
1749                                        struct buffer_head **bhp,
1750                                        struct nilfs_bmap_stats *stats)
1751 {
1752         struct buffer_head *bh;
1753         struct inode *dat = NULL;
1754         int ret;
1755
1756         stats->bs_nblocks = 0;
1757
1758         /* for data */
1759         /* cannot find near ptr */
1760         if (NILFS_BMAP_USE_VBN(btree)) {
1761                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1762                 dat = nilfs_bmap_get_dat(btree);
1763         }
1764
1765         ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1766         if (ret < 0)
1767                 return ret;
1768
1769         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1770         if (ret < 0)
1771                 return ret;
1772
1773         *bhp = NULL;
1774         stats->bs_nblocks++;
1775         if (nreq != NULL) {
1776                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1777                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1778                 if (ret < 0)
1779                         goto err_out_dreq;
1780
1781                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1782                 if (ret < 0)
1783                         goto err_out_nreq;
1784
1785                 *bhp = bh;
1786                 stats->bs_nblocks++;
1787         }
1788
1789         /* success */
1790         return 0;
1791
1792         /* error */
1793  err_out_nreq:
1794         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1795  err_out_dreq:
1796         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1797         stats->bs_nblocks = 0;
1798         return ret;
1799
1800 }
1801
1802 static void
1803 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1804                                       __u64 key, __u64 ptr,
1805                                       const __u64 *keys, const __u64 *ptrs,
1806                                       int n,
1807                                       union nilfs_bmap_ptr_req *dreq,
1808                                       union nilfs_bmap_ptr_req *nreq,
1809                                       struct buffer_head *bh)
1810 {
1811         struct nilfs_btree_node *node;
1812         struct inode *dat;
1813         __u64 tmpptr;
1814         int ncblk;
1815
1816         /* free resources */
1817         if (btree->b_ops->bop_clear != NULL)
1818                 btree->b_ops->bop_clear(btree);
1819
1820         /* ptr must be a pointer to a buffer head. */
1821         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1822
1823         /* convert and insert */
1824         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1825         __nilfs_btree_init(btree);
1826         if (nreq != NULL) {
1827                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1828                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1829
1830                 /* create child node at level 1 */
1831                 node = (struct nilfs_btree_node *)bh->b_data;
1832                 ncblk = nilfs_btree_nchildren_per_block(btree);
1833                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1834                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1835                 if (!buffer_dirty(bh))
1836                         mark_buffer_dirty(bh);
1837                 if (!nilfs_bmap_dirty(btree))
1838                         nilfs_bmap_set_dirty(btree);
1839
1840                 brelse(bh);
1841
1842                 /* create root node at level 2 */
1843                 node = nilfs_btree_get_root(btree);
1844                 tmpptr = nreq->bpr_ptr;
1845                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1846                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1847                                       &keys[0], &tmpptr);
1848         } else {
1849                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1850
1851                 /* create root node at level 1 */
1852                 node = nilfs_btree_get_root(btree);
1853                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1854                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1855                                       keys, ptrs);
1856                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1857                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1858                 if (!nilfs_bmap_dirty(btree))
1859                         nilfs_bmap_set_dirty(btree);
1860         }
1861
1862         if (NILFS_BMAP_USE_VBN(btree))
1863                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1864 }
1865
1866 /**
1867  * nilfs_btree_convert_and_insert -
1868  * @bmap:
1869  * @key:
1870  * @ptr:
1871  * @keys:
1872  * @ptrs:
1873  * @n:
1874  */
1875 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1876                                    __u64 key, __u64 ptr,
1877                                    const __u64 *keys, const __u64 *ptrs, int n)
1878 {
1879         struct buffer_head *bh = NULL;
1880         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1881         struct nilfs_bmap_stats stats;
1882         int ret;
1883
1884         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1885                 di = &dreq;
1886                 ni = NULL;
1887         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1888                            nilfs_btree_node_size(btree))) {
1889                 di = &dreq;
1890                 ni = &nreq;
1891         } else {
1892                 di = NULL;
1893                 ni = NULL;
1894                 BUG();
1895         }
1896
1897         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1898                                                      &stats);
1899         if (ret < 0)
1900                 return ret;
1901         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1902                                               di, ni, bh);
1903         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1904         return 0;
1905 }
1906
1907 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1908                                    struct nilfs_btree_path *path,
1909                                    int level,
1910                                    struct buffer_head *bh)
1911 {
1912         while ((++level < nilfs_btree_height(btree) - 1) &&
1913                !buffer_dirty(path[level].bp_bh))
1914                 mark_buffer_dirty(path[level].bp_bh);
1915
1916         return 0;
1917 }
1918
1919 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1920                                         struct nilfs_btree_path *path,
1921                                         int level, struct inode *dat)
1922 {
1923         struct nilfs_btree_node *parent;
1924         int ncmax, ret;
1925
1926         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1927         path[level].bp_oldreq.bpr_ptr =
1928                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1929                                          ncmax);
1930         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1931         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1932                                        &path[level].bp_newreq.bpr_req);
1933         if (ret < 0)
1934                 return ret;
1935
1936         if (buffer_nilfs_node(path[level].bp_bh)) {
1937                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1938                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1939                 path[level].bp_ctxt.bh = path[level].bp_bh;
1940                 ret = nilfs_btnode_prepare_change_key(
1941                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1942                         &path[level].bp_ctxt);
1943                 if (ret < 0) {
1944                         nilfs_dat_abort_update(dat,
1945                                                &path[level].bp_oldreq.bpr_req,
1946                                                &path[level].bp_newreq.bpr_req);
1947                         return ret;
1948                 }
1949         }
1950
1951         return 0;
1952 }
1953
1954 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1955                                         struct nilfs_btree_path *path,
1956                                         int level, struct inode *dat)
1957 {
1958         struct nilfs_btree_node *parent;
1959         int ncmax;
1960
1961         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1962                                 &path[level].bp_newreq.bpr_req,
1963                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1964
1965         if (buffer_nilfs_node(path[level].bp_bh)) {
1966                 nilfs_btnode_commit_change_key(
1967                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1968                         &path[level].bp_ctxt);
1969                 path[level].bp_bh = path[level].bp_ctxt.bh;
1970         }
1971         set_buffer_nilfs_volatile(path[level].bp_bh);
1972
1973         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1974         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1975                                  path[level].bp_newreq.bpr_ptr, ncmax);
1976 }
1977
1978 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1979                                        struct nilfs_btree_path *path,
1980                                        int level, struct inode *dat)
1981 {
1982         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1983                                &path[level].bp_newreq.bpr_req);
1984         if (buffer_nilfs_node(path[level].bp_bh))
1985                 nilfs_btnode_abort_change_key(
1986                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1987                         &path[level].bp_ctxt);
1988 }
1989
1990 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1991                                            struct nilfs_btree_path *path,
1992                                            int minlevel, int *maxlevelp,
1993                                            struct inode *dat)
1994 {
1995         int level, ret;
1996
1997         level = minlevel;
1998         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1999                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
2000                 if (ret < 0)
2001                         return ret;
2002         }
2003         while ((++level < nilfs_btree_height(btree) - 1) &&
2004                !buffer_dirty(path[level].bp_bh)) {
2005
2006                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
2007                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
2008                 if (ret < 0)
2009                         goto out;
2010         }
2011
2012         /* success */
2013         *maxlevelp = level - 1;
2014         return 0;
2015
2016         /* error */
2017  out:
2018         while (--level > minlevel)
2019                 nilfs_btree_abort_update_v(btree, path, level, dat);
2020         if (!buffer_nilfs_volatile(path[level].bp_bh))
2021                 nilfs_btree_abort_update_v(btree, path, level, dat);
2022         return ret;
2023 }
2024
2025 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2026                                            struct nilfs_btree_path *path,
2027                                            int minlevel, int maxlevel,
2028                                            struct buffer_head *bh,
2029                                            struct inode *dat)
2030 {
2031         int level;
2032
2033         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2034                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2035
2036         for (level = minlevel + 1; level <= maxlevel; level++)
2037                 nilfs_btree_commit_update_v(btree, path, level, dat);
2038 }
2039
2040 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2041                                    struct nilfs_btree_path *path,
2042                                    int level, struct buffer_head *bh)
2043 {
2044         int maxlevel = 0, ret;
2045         struct nilfs_btree_node *parent;
2046         struct inode *dat = nilfs_bmap_get_dat(btree);
2047         __u64 ptr;
2048         int ncmax;
2049
2050         get_bh(bh);
2051         path[level].bp_bh = bh;
2052         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2053                                               dat);
2054         if (ret < 0)
2055                 goto out;
2056
2057         if (buffer_nilfs_volatile(path[level].bp_bh)) {
2058                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2059                 ptr = nilfs_btree_node_get_ptr(parent,
2060                                                path[level + 1].bp_index,
2061                                                ncmax);
2062                 ret = nilfs_dat_mark_dirty(dat, ptr);
2063                 if (ret < 0)
2064                         goto out;
2065         }
2066
2067         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2068
2069  out:
2070         brelse(path[level].bp_bh);
2071         path[level].bp_bh = NULL;
2072         return ret;
2073 }
2074
2075 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2076                                  struct buffer_head *bh)
2077 {
2078         struct nilfs_btree_path *path;
2079         struct nilfs_btree_node *node;
2080         __u64 key;
2081         int level, ret;
2082
2083         WARN_ON(!buffer_dirty(bh));
2084
2085         path = nilfs_btree_alloc_path();
2086         if (path == NULL)
2087                 return -ENOMEM;
2088
2089         if (buffer_nilfs_node(bh)) {
2090                 node = (struct nilfs_btree_node *)bh->b_data;
2091                 key = nilfs_btree_node_get_key(node, 0);
2092                 level = nilfs_btree_node_get_level(node);
2093         } else {
2094                 key = nilfs_bmap_data_get_key(btree, bh);
2095                 level = NILFS_BTREE_LEVEL_DATA;
2096         }
2097
2098         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2099         if (ret < 0) {
2100                 if (unlikely(ret == -ENOENT))
2101                         nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
2102                                   "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2103                                   btree->b_inode->i_ino,
2104                                   (unsigned long long)key, level);
2105                 goto out;
2106         }
2107
2108         ret = NILFS_BMAP_USE_VBN(btree) ?
2109                 nilfs_btree_propagate_v(btree, path, level, bh) :
2110                 nilfs_btree_propagate_p(btree, path, level, bh);
2111
2112  out:
2113         nilfs_btree_free_path(path);
2114
2115         return ret;
2116 }
2117
2118 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2119                                     struct buffer_head *bh)
2120 {
2121         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2122 }
2123
2124 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2125                                          struct list_head *lists,
2126                                          struct buffer_head *bh)
2127 {
2128         struct list_head *head;
2129         struct buffer_head *cbh;
2130         struct nilfs_btree_node *node, *cnode;
2131         __u64 key, ckey;
2132         int level;
2133
2134         get_bh(bh);
2135         node = (struct nilfs_btree_node *)bh->b_data;
2136         key = nilfs_btree_node_get_key(node, 0);
2137         level = nilfs_btree_node_get_level(node);
2138         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2139             level >= NILFS_BTREE_LEVEL_MAX) {
2140                 dump_stack();
2141                 nilfs_msg(btree->b_inode->i_sb, KERN_WARNING,
2142                           "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2143                           level, (unsigned long long)key,
2144                           btree->b_inode->i_ino,
2145                           (unsigned long long)bh->b_blocknr);
2146                 return;
2147         }
2148
2149         list_for_each(head, &lists[level]) {
2150                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2151                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2152                 ckey = nilfs_btree_node_get_key(cnode, 0);
2153                 if (key < ckey)
2154                         break;
2155         }
2156         list_add_tail(&bh->b_assoc_buffers, head);
2157 }
2158
2159 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2160                                              struct list_head *listp)
2161 {
2162         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2163         struct address_space *btcache = btnc_inode->i_mapping;
2164         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2165         struct pagevec pvec;
2166         struct buffer_head *bh, *head;
2167         pgoff_t index = 0;
2168         int level, i;
2169
2170         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2171              level < NILFS_BTREE_LEVEL_MAX;
2172              level++)
2173                 INIT_LIST_HEAD(&lists[level]);
2174
2175         pagevec_init(&pvec, 0);
2176
2177         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2178                                   PAGEVEC_SIZE)) {
2179                 for (i = 0; i < pagevec_count(&pvec); i++) {
2180                         bh = head = page_buffers(pvec.pages[i]);
2181                         do {
2182                                 if (buffer_dirty(bh))
2183                                         nilfs_btree_add_dirty_buffer(btree,
2184                                                                      lists, bh);
2185                         } while ((bh = bh->b_this_page) != head);
2186                 }
2187                 pagevec_release(&pvec);
2188                 cond_resched();
2189         }
2190
2191         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2192              level < NILFS_BTREE_LEVEL_MAX;
2193              level++)
2194                 list_splice_tail(&lists[level], listp);
2195 }
2196
2197 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2198                                 struct nilfs_btree_path *path,
2199                                 int level,
2200                                 struct buffer_head **bh,
2201                                 sector_t blocknr,
2202                                 union nilfs_binfo *binfo)
2203 {
2204         struct nilfs_btree_node *parent;
2205         __u64 key;
2206         __u64 ptr;
2207         int ncmax, ret;
2208
2209         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2210         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2211                                        ncmax);
2212         if (buffer_nilfs_node(*bh)) {
2213                 path[level].bp_ctxt.oldkey = ptr;
2214                 path[level].bp_ctxt.newkey = blocknr;
2215                 path[level].bp_ctxt.bh = *bh;
2216                 ret = nilfs_btnode_prepare_change_key(
2217                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2218                         &path[level].bp_ctxt);
2219                 if (ret < 0)
2220                         return ret;
2221                 nilfs_btnode_commit_change_key(
2222                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2223                         &path[level].bp_ctxt);
2224                 *bh = path[level].bp_ctxt.bh;
2225         }
2226
2227         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2228                                  ncmax);
2229
2230         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2231         /* on-disk format */
2232         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2233         binfo->bi_dat.bi_level = level;
2234
2235         return 0;
2236 }
2237
2238 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2239                                 struct nilfs_btree_path *path,
2240                                 int level,
2241                                 struct buffer_head **bh,
2242                                 sector_t blocknr,
2243                                 union nilfs_binfo *binfo)
2244 {
2245         struct nilfs_btree_node *parent;
2246         struct inode *dat = nilfs_bmap_get_dat(btree);
2247         __u64 key;
2248         __u64 ptr;
2249         union nilfs_bmap_ptr_req req;
2250         int ncmax, ret;
2251
2252         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2253         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2254                                        ncmax);
2255         req.bpr_ptr = ptr;
2256         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2257         if (ret < 0)
2258                 return ret;
2259         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2260
2261         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2262         /* on-disk format */
2263         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2264         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2265
2266         return 0;
2267 }
2268
2269 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2270                               struct buffer_head **bh,
2271                               sector_t blocknr,
2272                               union nilfs_binfo *binfo)
2273 {
2274         struct nilfs_btree_path *path;
2275         struct nilfs_btree_node *node;
2276         __u64 key;
2277         int level, ret;
2278
2279         path = nilfs_btree_alloc_path();
2280         if (path == NULL)
2281                 return -ENOMEM;
2282
2283         if (buffer_nilfs_node(*bh)) {
2284                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2285                 key = nilfs_btree_node_get_key(node, 0);
2286                 level = nilfs_btree_node_get_level(node);
2287         } else {
2288                 key = nilfs_bmap_data_get_key(btree, *bh);
2289                 level = NILFS_BTREE_LEVEL_DATA;
2290         }
2291
2292         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2293         if (ret < 0) {
2294                 WARN_ON(ret == -ENOENT);
2295                 goto out;
2296         }
2297
2298         ret = NILFS_BMAP_USE_VBN(btree) ?
2299                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2300                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2301
2302  out:
2303         nilfs_btree_free_path(path);
2304
2305         return ret;
2306 }
2307
2308 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2309                                  struct buffer_head **bh,
2310                                  sector_t blocknr,
2311                                  union nilfs_binfo *binfo)
2312 {
2313         struct nilfs_btree_node *node;
2314         __u64 key;
2315         int ret;
2316
2317         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2318                              blocknr);
2319         if (ret < 0)
2320                 return ret;
2321
2322         if (buffer_nilfs_node(*bh)) {
2323                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2324                 key = nilfs_btree_node_get_key(node, 0);
2325         } else
2326                 key = nilfs_bmap_data_get_key(btree, *bh);
2327
2328         /* on-disk format */
2329         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2330         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2331
2332         return 0;
2333 }
2334
2335 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2336 {
2337         struct buffer_head *bh;
2338         struct nilfs_btree_path *path;
2339         __u64 ptr;
2340         int ret;
2341
2342         path = nilfs_btree_alloc_path();
2343         if (path == NULL)
2344                 return -ENOMEM;
2345
2346         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2347         if (ret < 0) {
2348                 WARN_ON(ret == -ENOENT);
2349                 goto out;
2350         }
2351         ret = nilfs_btree_get_block(btree, ptr, &bh);
2352         if (ret < 0) {
2353                 WARN_ON(ret == -ENOENT);
2354                 goto out;
2355         }
2356
2357         if (!buffer_dirty(bh))
2358                 mark_buffer_dirty(bh);
2359         brelse(bh);
2360         if (!nilfs_bmap_dirty(btree))
2361                 nilfs_bmap_set_dirty(btree);
2362
2363  out:
2364         nilfs_btree_free_path(path);
2365         return ret;
2366 }
2367
2368 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2369         .bop_lookup             =       nilfs_btree_lookup,
2370         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2371         .bop_insert             =       nilfs_btree_insert,
2372         .bop_delete             =       nilfs_btree_delete,
2373         .bop_clear              =       NULL,
2374
2375         .bop_propagate          =       nilfs_btree_propagate,
2376
2377         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2378
2379         .bop_assign             =       nilfs_btree_assign,
2380         .bop_mark               =       nilfs_btree_mark,
2381
2382         .bop_seek_key           =       nilfs_btree_seek_key,
2383         .bop_last_key           =       nilfs_btree_last_key,
2384
2385         .bop_check_insert       =       NULL,
2386         .bop_check_delete       =       nilfs_btree_check_delete,
2387         .bop_gather_data        =       nilfs_btree_gather_data,
2388 };
2389
2390 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2391         .bop_lookup             =       NULL,
2392         .bop_lookup_contig      =       NULL,
2393         .bop_insert             =       NULL,
2394         .bop_delete             =       NULL,
2395         .bop_clear              =       NULL,
2396
2397         .bop_propagate          =       nilfs_btree_propagate_gc,
2398
2399         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2400
2401         .bop_assign             =       nilfs_btree_assign_gc,
2402         .bop_mark               =       NULL,
2403
2404         .bop_seek_key           =       NULL,
2405         .bop_last_key           =       NULL,
2406
2407         .bop_check_insert       =       NULL,
2408         .bop_check_delete       =       NULL,
2409         .bop_gather_data        =       NULL,
2410 };
2411
2412 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2413 {
2414         bmap->b_ops = &nilfs_btree_ops;
2415         bmap->b_nchildren_per_block =
2416                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2417 }
2418
2419 int nilfs_btree_init(struct nilfs_bmap *bmap)
2420 {
2421         int ret = 0;
2422
2423         __nilfs_btree_init(bmap);
2424
2425         if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2426                 ret = -EIO;
2427         else
2428                 ret = nilfs_attach_btree_node_cache(
2429                         &NILFS_BMAP_I(bmap)->vfs_inode);
2430
2431         return ret;
2432 }
2433
2434 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2435 {
2436         bmap->b_ops = &nilfs_btree_ops_gc;
2437         bmap->b_nchildren_per_block =
2438                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2439 }