GNU Linux-libre 4.14.251-gnu1
[releases.git] / fs / xfs / libxfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_inode.h"
28 #include "xfs_trans.h"
29 #include "xfs_inode_item.h"
30 #include "xfs_buf_item.h"
31 #include "xfs_btree.h"
32 #include "xfs_error.h"
33 #include "xfs_trace.h"
34 #include "xfs_cksum.h"
35 #include "xfs_alloc.h"
36 #include "xfs_log.h"
37
38 /*
39  * Cursor allocation zone.
40  */
41 kmem_zone_t     *xfs_btree_cur_zone;
42
43 /*
44  * Btree magic numbers.
45  */
46 static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
47         { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
48           XFS_FIBT_MAGIC, 0 },
49         { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
50           XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
51           XFS_REFC_CRC_MAGIC }
52 };
53
54 uint32_t
55 xfs_btree_magic(
56         int                     crc,
57         xfs_btnum_t             btnum)
58 {
59         uint32_t                magic = xfs_magics[crc][btnum];
60
61         /* Ensure we asked for crc for crc-only magics. */
62         ASSERT(magic != 0);
63         return magic;
64 }
65
66 STATIC int                              /* error (0 or EFSCORRUPTED) */
67 xfs_btree_check_lblock(
68         struct xfs_btree_cur    *cur,   /* btree cursor */
69         struct xfs_btree_block  *block, /* btree long form block pointer */
70         int                     level,  /* level of the btree block */
71         struct xfs_buf          *bp)    /* buffer for block, if any */
72 {
73         int                     lblock_ok = 1; /* block passes checks */
74         struct xfs_mount        *mp;    /* file system mount point */
75         xfs_btnum_t             btnum = cur->bc_btnum;
76         int                     crc;
77
78         mp = cur->bc_mp;
79         crc = xfs_sb_version_hascrc(&mp->m_sb);
80
81         if (crc) {
82                 lblock_ok = lblock_ok &&
83                         uuid_equal(&block->bb_u.l.bb_uuid,
84                                    &mp->m_sb.sb_meta_uuid) &&
85                         block->bb_u.l.bb_blkno == cpu_to_be64(
86                                 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
87         }
88
89         lblock_ok = lblock_ok &&
90                 be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) &&
91                 be16_to_cpu(block->bb_level) == level &&
92                 be16_to_cpu(block->bb_numrecs) <=
93                         cur->bc_ops->get_maxrecs(cur, level) &&
94                 block->bb_u.l.bb_leftsib &&
95                 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) ||
96                  XFS_FSB_SANITY_CHECK(mp,
97                         be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
98                 block->bb_u.l.bb_rightsib &&
99                 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) ||
100                  XFS_FSB_SANITY_CHECK(mp,
101                         be64_to_cpu(block->bb_u.l.bb_rightsib)));
102
103         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
104                         XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
105                 if (bp)
106                         trace_xfs_btree_corrupt(bp, _RET_IP_);
107                 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
108                 return -EFSCORRUPTED;
109         }
110         return 0;
111 }
112
113 STATIC int                              /* error (0 or EFSCORRUPTED) */
114 xfs_btree_check_sblock(
115         struct xfs_btree_cur    *cur,   /* btree cursor */
116         struct xfs_btree_block  *block, /* btree short form block pointer */
117         int                     level,  /* level of the btree block */
118         struct xfs_buf          *bp)    /* buffer containing block */
119 {
120         struct xfs_mount        *mp;    /* file system mount point */
121         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
122         struct xfs_agf          *agf;   /* ag. freespace structure */
123         xfs_agblock_t           agflen; /* native ag. freespace length */
124         int                     sblock_ok = 1; /* block passes checks */
125         xfs_btnum_t             btnum = cur->bc_btnum;
126         int                     crc;
127
128         mp = cur->bc_mp;
129         crc = xfs_sb_version_hascrc(&mp->m_sb);
130         agbp = cur->bc_private.a.agbp;
131         agf = XFS_BUF_TO_AGF(agbp);
132         agflen = be32_to_cpu(agf->agf_length);
133
134         if (crc) {
135                 sblock_ok = sblock_ok &&
136                         uuid_equal(&block->bb_u.s.bb_uuid,
137                                    &mp->m_sb.sb_meta_uuid) &&
138                         block->bb_u.s.bb_blkno == cpu_to_be64(
139                                 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
140         }
141
142         sblock_ok = sblock_ok &&
143                 be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) &&
144                 be16_to_cpu(block->bb_level) == level &&
145                 be16_to_cpu(block->bb_numrecs) <=
146                         cur->bc_ops->get_maxrecs(cur, level) &&
147                 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
148                  be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
149                 block->bb_u.s.bb_leftsib &&
150                 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
151                  be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
152                 block->bb_u.s.bb_rightsib;
153
154         if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
155                         XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
156                 if (bp)
157                         trace_xfs_btree_corrupt(bp, _RET_IP_);
158                 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
159                 return -EFSCORRUPTED;
160         }
161         return 0;
162 }
163
164 /*
165  * Debug routine: check that block header is ok.
166  */
167 int
168 xfs_btree_check_block(
169         struct xfs_btree_cur    *cur,   /* btree cursor */
170         struct xfs_btree_block  *block, /* generic btree block pointer */
171         int                     level,  /* level of the btree block */
172         struct xfs_buf          *bp)    /* buffer containing block, if any */
173 {
174         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
175                 return xfs_btree_check_lblock(cur, block, level, bp);
176         else
177                 return xfs_btree_check_sblock(cur, block, level, bp);
178 }
179
180 /*
181  * Check that (long) pointer is ok.
182  */
183 int                                     /* error (0 or EFSCORRUPTED) */
184 xfs_btree_check_lptr(
185         struct xfs_btree_cur    *cur,   /* btree cursor */
186         xfs_fsblock_t           bno,    /* btree block disk address */
187         int                     level)  /* btree block level */
188 {
189         XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
190                 level > 0 &&
191                 bno != NULLFSBLOCK &&
192                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
193         return 0;
194 }
195
196 #ifdef DEBUG
197 /*
198  * Check that (short) pointer is ok.
199  */
200 STATIC int                              /* error (0 or EFSCORRUPTED) */
201 xfs_btree_check_sptr(
202         struct xfs_btree_cur    *cur,   /* btree cursor */
203         xfs_agblock_t           bno,    /* btree block disk address */
204         int                     level)  /* btree block level */
205 {
206         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
207
208         XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
209                 level > 0 &&
210                 bno != NULLAGBLOCK &&
211                 bno != 0 &&
212                 bno < agblocks);
213         return 0;
214 }
215
216 /*
217  * Check that block ptr is ok.
218  */
219 STATIC int                              /* error (0 or EFSCORRUPTED) */
220 xfs_btree_check_ptr(
221         struct xfs_btree_cur    *cur,   /* btree cursor */
222         union xfs_btree_ptr     *ptr,   /* btree block disk address */
223         int                     index,  /* offset from ptr to check */
224         int                     level)  /* btree block level */
225 {
226         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
227                 return xfs_btree_check_lptr(cur,
228                                 be64_to_cpu((&ptr->l)[index]), level);
229         } else {
230                 return xfs_btree_check_sptr(cur,
231                                 be32_to_cpu((&ptr->s)[index]), level);
232         }
233 }
234 #endif
235
236 /*
237  * Calculate CRC on the whole btree block and stuff it into the
238  * long-form btree header.
239  *
240  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
241  * it into the buffer so recovery knows what the last modification was that made
242  * it to disk.
243  */
244 void
245 xfs_btree_lblock_calc_crc(
246         struct xfs_buf          *bp)
247 {
248         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
249         struct xfs_buf_log_item *bip = bp->b_fspriv;
250
251         if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
252                 return;
253         if (bip)
254                 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
255         xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
256 }
257
258 bool
259 xfs_btree_lblock_verify_crc(
260         struct xfs_buf          *bp)
261 {
262         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
263         struct xfs_mount        *mp = bp->b_target->bt_mount;
264
265         if (xfs_sb_version_hascrc(&mp->m_sb)) {
266                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
267                         return false;
268                 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
269         }
270
271         return true;
272 }
273
274 /*
275  * Calculate CRC on the whole btree block and stuff it into the
276  * short-form btree header.
277  *
278  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
279  * it into the buffer so recovery knows what the last modification was that made
280  * it to disk.
281  */
282 void
283 xfs_btree_sblock_calc_crc(
284         struct xfs_buf          *bp)
285 {
286         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
287         struct xfs_buf_log_item *bip = bp->b_fspriv;
288
289         if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
290                 return;
291         if (bip)
292                 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
293         xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
294 }
295
296 bool
297 xfs_btree_sblock_verify_crc(
298         struct xfs_buf          *bp)
299 {
300         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
301         struct xfs_mount        *mp = bp->b_target->bt_mount;
302
303         if (xfs_sb_version_hascrc(&mp->m_sb)) {
304                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
305                         return false;
306                 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
307         }
308
309         return true;
310 }
311
312 static int
313 xfs_btree_free_block(
314         struct xfs_btree_cur    *cur,
315         struct xfs_buf          *bp)
316 {
317         int                     error;
318
319         error = cur->bc_ops->free_block(cur, bp);
320         if (!error) {
321                 xfs_trans_binval(cur->bc_tp, bp);
322                 XFS_BTREE_STATS_INC(cur, free);
323         }
324         return error;
325 }
326
327 /*
328  * Delete the btree cursor.
329  */
330 void
331 xfs_btree_del_cursor(
332         xfs_btree_cur_t *cur,           /* btree cursor */
333         int             error)          /* del because of error */
334 {
335         int             i;              /* btree level */
336
337         /*
338          * Clear the buffer pointers, and release the buffers.
339          * If we're doing this in the face of an error, we
340          * need to make sure to inspect all of the entries
341          * in the bc_bufs array for buffers to be unlocked.
342          * This is because some of the btree code works from
343          * level n down to 0, and if we get an error along
344          * the way we won't have initialized all the entries
345          * down to 0.
346          */
347         for (i = 0; i < cur->bc_nlevels; i++) {
348                 if (cur->bc_bufs[i])
349                         xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
350                 else if (!error)
351                         break;
352         }
353         /*
354          * Can't free a bmap cursor without having dealt with the
355          * allocated indirect blocks' accounting.
356          */
357         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
358                cur->bc_private.b.allocated == 0);
359         /*
360          * Free the cursor.
361          */
362         kmem_zone_free(xfs_btree_cur_zone, cur);
363 }
364
365 /*
366  * Duplicate the btree cursor.
367  * Allocate a new one, copy the record, re-get the buffers.
368  */
369 int                                     /* error */
370 xfs_btree_dup_cursor(
371         xfs_btree_cur_t *cur,           /* input cursor */
372         xfs_btree_cur_t **ncur)         /* output cursor */
373 {
374         xfs_buf_t       *bp;            /* btree block's buffer pointer */
375         int             error;          /* error return value */
376         int             i;              /* level number of btree block */
377         xfs_mount_t     *mp;            /* mount structure for filesystem */
378         xfs_btree_cur_t *new;           /* new cursor value */
379         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
380
381         tp = cur->bc_tp;
382         mp = cur->bc_mp;
383
384         /*
385          * Allocate a new cursor like the old one.
386          */
387         new = cur->bc_ops->dup_cursor(cur);
388
389         /*
390          * Copy the record currently in the cursor.
391          */
392         new->bc_rec = cur->bc_rec;
393
394         /*
395          * For each level current, re-get the buffer and copy the ptr value.
396          */
397         for (i = 0; i < new->bc_nlevels; i++) {
398                 new->bc_ptrs[i] = cur->bc_ptrs[i];
399                 new->bc_ra[i] = cur->bc_ra[i];
400                 bp = cur->bc_bufs[i];
401                 if (bp) {
402                         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
403                                                    XFS_BUF_ADDR(bp), mp->m_bsize,
404                                                    0, &bp,
405                                                    cur->bc_ops->buf_ops);
406                         if (error) {
407                                 xfs_btree_del_cursor(new, error);
408                                 *ncur = NULL;
409                                 return error;
410                         }
411                 }
412                 new->bc_bufs[i] = bp;
413         }
414         *ncur = new;
415         return 0;
416 }
417
418 /*
419  * XFS btree block layout and addressing:
420  *
421  * There are two types of blocks in the btree: leaf and non-leaf blocks.
422  *
423  * The leaf record start with a header then followed by records containing
424  * the values.  A non-leaf block also starts with the same header, and
425  * then first contains lookup keys followed by an equal number of pointers
426  * to the btree blocks at the previous level.
427  *
428  *              +--------+-------+-------+-------+-------+-------+-------+
429  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
430  *              +--------+-------+-------+-------+-------+-------+-------+
431  *
432  *              +--------+-------+-------+-------+-------+-------+-------+
433  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
434  *              +--------+-------+-------+-------+-------+-------+-------+
435  *
436  * The header is called struct xfs_btree_block for reasons better left unknown
437  * and comes in different versions for short (32bit) and long (64bit) block
438  * pointers.  The record and key structures are defined by the btree instances
439  * and opaque to the btree core.  The block pointers are simple disk endian
440  * integers, available in a short (32bit) and long (64bit) variant.
441  *
442  * The helpers below calculate the offset of a given record, key or pointer
443  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
444  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
445  * inside the btree block is done using indices starting at one, not zero!
446  *
447  * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
448  * overlapping intervals.  In such a tree, records are still sorted lowest to
449  * highest and indexed by the smallest key value that refers to the record.
450  * However, nodes are different: each pointer has two associated keys -- one
451  * indexing the lowest key available in the block(s) below (the same behavior
452  * as the key in a regular btree) and another indexing the highest key
453  * available in the block(s) below.  Because records are /not/ sorted by the
454  * highest key, all leaf block updates require us to compute the highest key
455  * that matches any record in the leaf and to recursively update the high keys
456  * in the nodes going further up in the tree, if necessary.  Nodes look like
457  * this:
458  *
459  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
460  * Non-Leaf:    | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
461  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
462  *
463  * To perform an interval query on an overlapped tree, perform the usual
464  * depth-first search and use the low and high keys to decide if we can skip
465  * that particular node.  If a leaf node is reached, return the records that
466  * intersect the interval.  Note that an interval query may return numerous
467  * entries.  For a non-overlapped tree, simply search for the record associated
468  * with the lowest key and iterate forward until a non-matching record is
469  * found.  Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
470  * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
471  * more detail.
472  *
473  * Why do we care about overlapping intervals?  Let's say you have a bunch of
474  * reverse mapping records on a reflink filesystem:
475  *
476  * 1: +- file A startblock B offset C length D -----------+
477  * 2:      +- file E startblock F offset G length H --------------+
478  * 3:      +- file I startblock F offset J length K --+
479  * 4:                                                        +- file L... --+
480  *
481  * Now say we want to map block (B+D) into file A at offset (C+D).  Ideally,
482  * we'd simply increment the length of record 1.  But how do we find the record
483  * that ends at (B+D-1) (i.e. record 1)?  A LE lookup of (B+D-1) would return
484  * record 3 because the keys are ordered first by startblock.  An interval
485  * query would return records 1 and 2 because they both overlap (B+D-1), and
486  * from that we can pick out record 1 as the appropriate left neighbor.
487  *
488  * In the non-overlapped case you can do a LE lookup and decrement the cursor
489  * because a record's interval must end before the next record.
490  */
491
492 /*
493  * Return size of the btree block header for this btree instance.
494  */
495 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
496 {
497         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
498                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
499                         return XFS_BTREE_LBLOCK_CRC_LEN;
500                 return XFS_BTREE_LBLOCK_LEN;
501         }
502         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
503                 return XFS_BTREE_SBLOCK_CRC_LEN;
504         return XFS_BTREE_SBLOCK_LEN;
505 }
506
507 /*
508  * Return size of btree block pointers for this btree instance.
509  */
510 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
511 {
512         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
513                 sizeof(__be64) : sizeof(__be32);
514 }
515
516 /*
517  * Calculate offset of the n-th record in a btree block.
518  */
519 STATIC size_t
520 xfs_btree_rec_offset(
521         struct xfs_btree_cur    *cur,
522         int                     n)
523 {
524         return xfs_btree_block_len(cur) +
525                 (n - 1) * cur->bc_ops->rec_len;
526 }
527
528 /*
529  * Calculate offset of the n-th key in a btree block.
530  */
531 STATIC size_t
532 xfs_btree_key_offset(
533         struct xfs_btree_cur    *cur,
534         int                     n)
535 {
536         return xfs_btree_block_len(cur) +
537                 (n - 1) * cur->bc_ops->key_len;
538 }
539
540 /*
541  * Calculate offset of the n-th high key in a btree block.
542  */
543 STATIC size_t
544 xfs_btree_high_key_offset(
545         struct xfs_btree_cur    *cur,
546         int                     n)
547 {
548         return xfs_btree_block_len(cur) +
549                 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
550 }
551
552 /*
553  * Calculate offset of the n-th block pointer in a btree block.
554  */
555 STATIC size_t
556 xfs_btree_ptr_offset(
557         struct xfs_btree_cur    *cur,
558         int                     n,
559         int                     level)
560 {
561         return xfs_btree_block_len(cur) +
562                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
563                 (n - 1) * xfs_btree_ptr_len(cur);
564 }
565
566 /*
567  * Return a pointer to the n-th record in the btree block.
568  */
569 union xfs_btree_rec *
570 xfs_btree_rec_addr(
571         struct xfs_btree_cur    *cur,
572         int                     n,
573         struct xfs_btree_block  *block)
574 {
575         return (union xfs_btree_rec *)
576                 ((char *)block + xfs_btree_rec_offset(cur, n));
577 }
578
579 /*
580  * Return a pointer to the n-th key in the btree block.
581  */
582 union xfs_btree_key *
583 xfs_btree_key_addr(
584         struct xfs_btree_cur    *cur,
585         int                     n,
586         struct xfs_btree_block  *block)
587 {
588         return (union xfs_btree_key *)
589                 ((char *)block + xfs_btree_key_offset(cur, n));
590 }
591
592 /*
593  * Return a pointer to the n-th high key in the btree block.
594  */
595 union xfs_btree_key *
596 xfs_btree_high_key_addr(
597         struct xfs_btree_cur    *cur,
598         int                     n,
599         struct xfs_btree_block  *block)
600 {
601         return (union xfs_btree_key *)
602                 ((char *)block + xfs_btree_high_key_offset(cur, n));
603 }
604
605 /*
606  * Return a pointer to the n-th block pointer in the btree block.
607  */
608 union xfs_btree_ptr *
609 xfs_btree_ptr_addr(
610         struct xfs_btree_cur    *cur,
611         int                     n,
612         struct xfs_btree_block  *block)
613 {
614         int                     level = xfs_btree_get_level(block);
615
616         ASSERT(block->bb_level != 0);
617
618         return (union xfs_btree_ptr *)
619                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
620 }
621
622 /*
623  * Get the root block which is stored in the inode.
624  *
625  * For now this btree implementation assumes the btree root is always
626  * stored in the if_broot field of an inode fork.
627  */
628 STATIC struct xfs_btree_block *
629 xfs_btree_get_iroot(
630         struct xfs_btree_cur    *cur)
631 {
632         struct xfs_ifork        *ifp;
633
634         ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
635         return (struct xfs_btree_block *)ifp->if_broot;
636 }
637
638 /*
639  * Retrieve the block pointer from the cursor at the given level.
640  * This may be an inode btree root or from a buffer.
641  */
642 struct xfs_btree_block *                /* generic btree block pointer */
643 xfs_btree_get_block(
644         struct xfs_btree_cur    *cur,   /* btree cursor */
645         int                     level,  /* level in btree */
646         struct xfs_buf          **bpp)  /* buffer containing the block */
647 {
648         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
649             (level == cur->bc_nlevels - 1)) {
650                 *bpp = NULL;
651                 return xfs_btree_get_iroot(cur);
652         }
653
654         *bpp = cur->bc_bufs[level];
655         return XFS_BUF_TO_BLOCK(*bpp);
656 }
657
658 /*
659  * Get a buffer for the block, return it with no data read.
660  * Long-form addressing.
661  */
662 xfs_buf_t *                             /* buffer for fsbno */
663 xfs_btree_get_bufl(
664         xfs_mount_t     *mp,            /* file system mount point */
665         xfs_trans_t     *tp,            /* transaction pointer */
666         xfs_fsblock_t   fsbno,          /* file system block number */
667         uint            lock)           /* lock flags for get_buf */
668 {
669         xfs_daddr_t             d;              /* real disk block address */
670
671         ASSERT(fsbno != NULLFSBLOCK);
672         d = XFS_FSB_TO_DADDR(mp, fsbno);
673         return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
674 }
675
676 /*
677  * Get a buffer for the block, return it with no data read.
678  * Short-form addressing.
679  */
680 xfs_buf_t *                             /* buffer for agno/agbno */
681 xfs_btree_get_bufs(
682         xfs_mount_t     *mp,            /* file system mount point */
683         xfs_trans_t     *tp,            /* transaction pointer */
684         xfs_agnumber_t  agno,           /* allocation group number */
685         xfs_agblock_t   agbno,          /* allocation group block number */
686         uint            lock)           /* lock flags for get_buf */
687 {
688         xfs_daddr_t             d;              /* real disk block address */
689
690         ASSERT(agno != NULLAGNUMBER);
691         ASSERT(agbno != NULLAGBLOCK);
692         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
693         return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
694 }
695
696 /*
697  * Check for the cursor referring to the last block at the given level.
698  */
699 int                                     /* 1=is last block, 0=not last block */
700 xfs_btree_islastblock(
701         xfs_btree_cur_t         *cur,   /* btree cursor */
702         int                     level)  /* level to check */
703 {
704         struct xfs_btree_block  *block; /* generic btree block pointer */
705         xfs_buf_t               *bp;    /* buffer containing block */
706
707         block = xfs_btree_get_block(cur, level, &bp);
708         xfs_btree_check_block(cur, block, level, bp);
709         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
710                 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
711         else
712                 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
713 }
714
715 /*
716  * Change the cursor to point to the first record at the given level.
717  * Other levels are unaffected.
718  */
719 STATIC int                              /* success=1, failure=0 */
720 xfs_btree_firstrec(
721         xfs_btree_cur_t         *cur,   /* btree cursor */
722         int                     level)  /* level to change */
723 {
724         struct xfs_btree_block  *block; /* generic btree block pointer */
725         xfs_buf_t               *bp;    /* buffer containing block */
726
727         /*
728          * Get the block pointer for this level.
729          */
730         block = xfs_btree_get_block(cur, level, &bp);
731         if (xfs_btree_check_block(cur, block, level, bp))
732                 return 0;
733         /*
734          * It's empty, there is no such record.
735          */
736         if (!block->bb_numrecs)
737                 return 0;
738         /*
739          * Set the ptr value to 1, that's the first record/key.
740          */
741         cur->bc_ptrs[level] = 1;
742         return 1;
743 }
744
745 /*
746  * Change the cursor to point to the last record in the current block
747  * at the given level.  Other levels are unaffected.
748  */
749 STATIC int                              /* success=1, failure=0 */
750 xfs_btree_lastrec(
751         xfs_btree_cur_t         *cur,   /* btree cursor */
752         int                     level)  /* level to change */
753 {
754         struct xfs_btree_block  *block; /* generic btree block pointer */
755         xfs_buf_t               *bp;    /* buffer containing block */
756
757         /*
758          * Get the block pointer for this level.
759          */
760         block = xfs_btree_get_block(cur, level, &bp);
761         if (xfs_btree_check_block(cur, block, level, bp))
762                 return 0;
763         /*
764          * It's empty, there is no such record.
765          */
766         if (!block->bb_numrecs)
767                 return 0;
768         /*
769          * Set the ptr value to numrecs, that's the last record/key.
770          */
771         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
772         return 1;
773 }
774
775 /*
776  * Compute first and last byte offsets for the fields given.
777  * Interprets the offsets table, which contains struct field offsets.
778  */
779 void
780 xfs_btree_offsets(
781         int64_t         fields,         /* bitmask of fields */
782         const short     *offsets,       /* table of field offsets */
783         int             nbits,          /* number of bits to inspect */
784         int             *first,         /* output: first byte offset */
785         int             *last)          /* output: last byte offset */
786 {
787         int             i;              /* current bit number */
788         int64_t         imask;          /* mask for current bit number */
789
790         ASSERT(fields != 0);
791         /*
792          * Find the lowest bit, so the first byte offset.
793          */
794         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
795                 if (imask & fields) {
796                         *first = offsets[i];
797                         break;
798                 }
799         }
800         /*
801          * Find the highest bit, so the last byte offset.
802          */
803         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
804                 if (imask & fields) {
805                         *last = offsets[i + 1] - 1;
806                         break;
807                 }
808         }
809 }
810
811 /*
812  * Get a buffer for the block, return it read in.
813  * Long-form addressing.
814  */
815 int
816 xfs_btree_read_bufl(
817         struct xfs_mount        *mp,            /* file system mount point */
818         struct xfs_trans        *tp,            /* transaction pointer */
819         xfs_fsblock_t           fsbno,          /* file system block number */
820         uint                    lock,           /* lock flags for read_buf */
821         struct xfs_buf          **bpp,          /* buffer for fsbno */
822         int                     refval,         /* ref count value for buffer */
823         const struct xfs_buf_ops *ops)
824 {
825         struct xfs_buf          *bp;            /* return value */
826         xfs_daddr_t             d;              /* real disk block address */
827         int                     error;
828
829         if (!XFS_FSB_SANITY_CHECK(mp, fsbno))
830                 return -EFSCORRUPTED;
831         d = XFS_FSB_TO_DADDR(mp, fsbno);
832         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
833                                    mp->m_bsize, lock, &bp, ops);
834         if (error)
835                 return error;
836         if (bp)
837                 xfs_buf_set_ref(bp, refval);
838         *bpp = bp;
839         return 0;
840 }
841
842 /*
843  * Read-ahead the block, don't wait for it, don't return a buffer.
844  * Long-form addressing.
845  */
846 /* ARGSUSED */
847 void
848 xfs_btree_reada_bufl(
849         struct xfs_mount        *mp,            /* file system mount point */
850         xfs_fsblock_t           fsbno,          /* file system block number */
851         xfs_extlen_t            count,          /* count of filesystem blocks */
852         const struct xfs_buf_ops *ops)
853 {
854         xfs_daddr_t             d;
855
856         ASSERT(fsbno != NULLFSBLOCK);
857         d = XFS_FSB_TO_DADDR(mp, fsbno);
858         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
859 }
860
861 /*
862  * Read-ahead the block, don't wait for it, don't return a buffer.
863  * Short-form addressing.
864  */
865 /* ARGSUSED */
866 void
867 xfs_btree_reada_bufs(
868         struct xfs_mount        *mp,            /* file system mount point */
869         xfs_agnumber_t          agno,           /* allocation group number */
870         xfs_agblock_t           agbno,          /* allocation group block number */
871         xfs_extlen_t            count,          /* count of filesystem blocks */
872         const struct xfs_buf_ops *ops)
873 {
874         xfs_daddr_t             d;
875
876         ASSERT(agno != NULLAGNUMBER);
877         ASSERT(agbno != NULLAGBLOCK);
878         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
879         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
880 }
881
882 STATIC int
883 xfs_btree_readahead_lblock(
884         struct xfs_btree_cur    *cur,
885         int                     lr,
886         struct xfs_btree_block  *block)
887 {
888         int                     rval = 0;
889         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
890         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
891
892         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
893                 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
894                                      cur->bc_ops->buf_ops);
895                 rval++;
896         }
897
898         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
899                 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
900                                      cur->bc_ops->buf_ops);
901                 rval++;
902         }
903
904         return rval;
905 }
906
907 STATIC int
908 xfs_btree_readahead_sblock(
909         struct xfs_btree_cur    *cur,
910         int                     lr,
911         struct xfs_btree_block *block)
912 {
913         int                     rval = 0;
914         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
915         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
916
917
918         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
919                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
920                                      left, 1, cur->bc_ops->buf_ops);
921                 rval++;
922         }
923
924         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
925                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
926                                      right, 1, cur->bc_ops->buf_ops);
927                 rval++;
928         }
929
930         return rval;
931 }
932
933 /*
934  * Read-ahead btree blocks, at the given level.
935  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
936  */
937 STATIC int
938 xfs_btree_readahead(
939         struct xfs_btree_cur    *cur,           /* btree cursor */
940         int                     lev,            /* level in btree */
941         int                     lr)             /* left/right bits */
942 {
943         struct xfs_btree_block  *block;
944
945         /*
946          * No readahead needed if we are at the root level and the
947          * btree root is stored in the inode.
948          */
949         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
950             (lev == cur->bc_nlevels - 1))
951                 return 0;
952
953         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
954                 return 0;
955
956         cur->bc_ra[lev] |= lr;
957         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
958
959         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
960                 return xfs_btree_readahead_lblock(cur, lr, block);
961         return xfs_btree_readahead_sblock(cur, lr, block);
962 }
963
964 STATIC xfs_daddr_t
965 xfs_btree_ptr_to_daddr(
966         struct xfs_btree_cur    *cur,
967         union xfs_btree_ptr     *ptr)
968 {
969         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
970                 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
971
972                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
973         } else {
974                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
975                 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
976
977                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
978                                         be32_to_cpu(ptr->s));
979         }
980 }
981
982 /*
983  * Readahead @count btree blocks at the given @ptr location.
984  *
985  * We don't need to care about long or short form btrees here as we have a
986  * method of converting the ptr directly to a daddr available to us.
987  */
988 STATIC void
989 xfs_btree_readahead_ptr(
990         struct xfs_btree_cur    *cur,
991         union xfs_btree_ptr     *ptr,
992         xfs_extlen_t            count)
993 {
994         xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
995                           xfs_btree_ptr_to_daddr(cur, ptr),
996                           cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
997 }
998
999 /*
1000  * Set the buffer for level "lev" in the cursor to bp, releasing
1001  * any previous buffer.
1002  */
1003 STATIC void
1004 xfs_btree_setbuf(
1005         xfs_btree_cur_t         *cur,   /* btree cursor */
1006         int                     lev,    /* level in btree */
1007         xfs_buf_t               *bp)    /* new buffer to set */
1008 {
1009         struct xfs_btree_block  *b;     /* btree block */
1010
1011         if (cur->bc_bufs[lev])
1012                 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
1013         cur->bc_bufs[lev] = bp;
1014         cur->bc_ra[lev] = 0;
1015
1016         b = XFS_BUF_TO_BLOCK(bp);
1017         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1018                 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
1019                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
1020                 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
1021                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1022         } else {
1023                 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
1024                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
1025                 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
1026                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1027         }
1028 }
1029
1030 STATIC int
1031 xfs_btree_ptr_is_null(
1032         struct xfs_btree_cur    *cur,
1033         union xfs_btree_ptr     *ptr)
1034 {
1035         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1036                 return ptr->l == cpu_to_be64(NULLFSBLOCK);
1037         else
1038                 return ptr->s == cpu_to_be32(NULLAGBLOCK);
1039 }
1040
1041 STATIC void
1042 xfs_btree_set_ptr_null(
1043         struct xfs_btree_cur    *cur,
1044         union xfs_btree_ptr     *ptr)
1045 {
1046         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1047                 ptr->l = cpu_to_be64(NULLFSBLOCK);
1048         else
1049                 ptr->s = cpu_to_be32(NULLAGBLOCK);
1050 }
1051
1052 /*
1053  * Get/set/init sibling pointers
1054  */
1055 STATIC void
1056 xfs_btree_get_sibling(
1057         struct xfs_btree_cur    *cur,
1058         struct xfs_btree_block  *block,
1059         union xfs_btree_ptr     *ptr,
1060         int                     lr)
1061 {
1062         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1063
1064         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1065                 if (lr == XFS_BB_RIGHTSIB)
1066                         ptr->l = block->bb_u.l.bb_rightsib;
1067                 else
1068                         ptr->l = block->bb_u.l.bb_leftsib;
1069         } else {
1070                 if (lr == XFS_BB_RIGHTSIB)
1071                         ptr->s = block->bb_u.s.bb_rightsib;
1072                 else
1073                         ptr->s = block->bb_u.s.bb_leftsib;
1074         }
1075 }
1076
1077 STATIC void
1078 xfs_btree_set_sibling(
1079         struct xfs_btree_cur    *cur,
1080         struct xfs_btree_block  *block,
1081         union xfs_btree_ptr     *ptr,
1082         int                     lr)
1083 {
1084         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1085
1086         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1087                 if (lr == XFS_BB_RIGHTSIB)
1088                         block->bb_u.l.bb_rightsib = ptr->l;
1089                 else
1090                         block->bb_u.l.bb_leftsib = ptr->l;
1091         } else {
1092                 if (lr == XFS_BB_RIGHTSIB)
1093                         block->bb_u.s.bb_rightsib = ptr->s;
1094                 else
1095                         block->bb_u.s.bb_leftsib = ptr->s;
1096         }
1097 }
1098
1099 void
1100 xfs_btree_init_block_int(
1101         struct xfs_mount        *mp,
1102         struct xfs_btree_block  *buf,
1103         xfs_daddr_t             blkno,
1104         xfs_btnum_t             btnum,
1105         __u16                   level,
1106         __u16                   numrecs,
1107         __u64                   owner,
1108         unsigned int            flags)
1109 {
1110         int                     crc = xfs_sb_version_hascrc(&mp->m_sb);
1111         __u32                   magic = xfs_btree_magic(crc, btnum);
1112
1113         buf->bb_magic = cpu_to_be32(magic);
1114         buf->bb_level = cpu_to_be16(level);
1115         buf->bb_numrecs = cpu_to_be16(numrecs);
1116
1117         if (flags & XFS_BTREE_LONG_PTRS) {
1118                 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1119                 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
1120                 if (crc) {
1121                         buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1122                         buf->bb_u.l.bb_owner = cpu_to_be64(owner);
1123                         uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
1124                         buf->bb_u.l.bb_pad = 0;
1125                         buf->bb_u.l.bb_lsn = 0;
1126                 }
1127         } else {
1128                 /* owner is a 32 bit value on short blocks */
1129                 __u32 __owner = (__u32)owner;
1130
1131                 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1132                 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1133                 if (crc) {
1134                         buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1135                         buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
1136                         uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
1137                         buf->bb_u.s.bb_lsn = 0;
1138                 }
1139         }
1140 }
1141
1142 void
1143 xfs_btree_init_block(
1144         struct xfs_mount *mp,
1145         struct xfs_buf  *bp,
1146         xfs_btnum_t     btnum,
1147         __u16           level,
1148         __u16           numrecs,
1149         __u64           owner,
1150         unsigned int    flags)
1151 {
1152         xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1153                                  btnum, level, numrecs, owner, flags);
1154 }
1155
1156 STATIC void
1157 xfs_btree_init_block_cur(
1158         struct xfs_btree_cur    *cur,
1159         struct xfs_buf          *bp,
1160         int                     level,
1161         int                     numrecs)
1162 {
1163         __u64                   owner;
1164
1165         /*
1166          * we can pull the owner from the cursor right now as the different
1167          * owners align directly with the pointer size of the btree. This may
1168          * change in future, but is safe for current users of the generic btree
1169          * code.
1170          */
1171         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1172                 owner = cur->bc_private.b.ip->i_ino;
1173         else
1174                 owner = cur->bc_private.a.agno;
1175
1176         xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1177                                  cur->bc_btnum, level, numrecs,
1178                                  owner, cur->bc_flags);
1179 }
1180
1181 /*
1182  * Return true if ptr is the last record in the btree and
1183  * we need to track updates to this record.  The decision
1184  * will be further refined in the update_lastrec method.
1185  */
1186 STATIC int
1187 xfs_btree_is_lastrec(
1188         struct xfs_btree_cur    *cur,
1189         struct xfs_btree_block  *block,
1190         int                     level)
1191 {
1192         union xfs_btree_ptr     ptr;
1193
1194         if (level > 0)
1195                 return 0;
1196         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1197                 return 0;
1198
1199         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1200         if (!xfs_btree_ptr_is_null(cur, &ptr))
1201                 return 0;
1202         return 1;
1203 }
1204
1205 STATIC void
1206 xfs_btree_buf_to_ptr(
1207         struct xfs_btree_cur    *cur,
1208         struct xfs_buf          *bp,
1209         union xfs_btree_ptr     *ptr)
1210 {
1211         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1212                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1213                                         XFS_BUF_ADDR(bp)));
1214         else {
1215                 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
1216                                         XFS_BUF_ADDR(bp)));
1217         }
1218 }
1219
1220 STATIC void
1221 xfs_btree_set_refs(
1222         struct xfs_btree_cur    *cur,
1223         struct xfs_buf          *bp)
1224 {
1225         switch (cur->bc_btnum) {
1226         case XFS_BTNUM_BNO:
1227         case XFS_BTNUM_CNT:
1228                 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
1229                 break;
1230         case XFS_BTNUM_INO:
1231         case XFS_BTNUM_FINO:
1232                 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
1233                 break;
1234         case XFS_BTNUM_BMAP:
1235                 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
1236                 break;
1237         case XFS_BTNUM_RMAP:
1238                 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
1239                 break;
1240         case XFS_BTNUM_REFC:
1241                 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF);
1242                 break;
1243         default:
1244                 ASSERT(0);
1245         }
1246 }
1247
1248 STATIC int
1249 xfs_btree_get_buf_block(
1250         struct xfs_btree_cur    *cur,
1251         union xfs_btree_ptr     *ptr,
1252         int                     flags,
1253         struct xfs_btree_block  **block,
1254         struct xfs_buf          **bpp)
1255 {
1256         struct xfs_mount        *mp = cur->bc_mp;
1257         xfs_daddr_t             d;
1258
1259         /* need to sort out how callers deal with failures first */
1260         ASSERT(!(flags & XBF_TRYLOCK));
1261
1262         d = xfs_btree_ptr_to_daddr(cur, ptr);
1263         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1264                                  mp->m_bsize, flags);
1265
1266         if (!*bpp)
1267                 return -ENOMEM;
1268
1269         (*bpp)->b_ops = cur->bc_ops->buf_ops;
1270         *block = XFS_BUF_TO_BLOCK(*bpp);
1271         return 0;
1272 }
1273
1274 /*
1275  * Read in the buffer at the given ptr and return the buffer and
1276  * the block pointer within the buffer.
1277  */
1278 STATIC int
1279 xfs_btree_read_buf_block(
1280         struct xfs_btree_cur    *cur,
1281         union xfs_btree_ptr     *ptr,
1282         int                     flags,
1283         struct xfs_btree_block  **block,
1284         struct xfs_buf          **bpp)
1285 {
1286         struct xfs_mount        *mp = cur->bc_mp;
1287         xfs_daddr_t             d;
1288         int                     error;
1289
1290         /* need to sort out how callers deal with failures first */
1291         ASSERT(!(flags & XBF_TRYLOCK));
1292
1293         d = xfs_btree_ptr_to_daddr(cur, ptr);
1294         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1295                                    mp->m_bsize, flags, bpp,
1296                                    cur->bc_ops->buf_ops);
1297         if (error)
1298                 return error;
1299
1300         xfs_btree_set_refs(cur, *bpp);
1301         *block = XFS_BUF_TO_BLOCK(*bpp);
1302         return 0;
1303 }
1304
1305 /*
1306  * Copy keys from one btree block to another.
1307  */
1308 STATIC void
1309 xfs_btree_copy_keys(
1310         struct xfs_btree_cur    *cur,
1311         union xfs_btree_key     *dst_key,
1312         union xfs_btree_key     *src_key,
1313         int                     numkeys)
1314 {
1315         ASSERT(numkeys >= 0);
1316         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1317 }
1318
1319 /*
1320  * Copy records from one btree block to another.
1321  */
1322 STATIC void
1323 xfs_btree_copy_recs(
1324         struct xfs_btree_cur    *cur,
1325         union xfs_btree_rec     *dst_rec,
1326         union xfs_btree_rec     *src_rec,
1327         int                     numrecs)
1328 {
1329         ASSERT(numrecs >= 0);
1330         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1331 }
1332
1333 /*
1334  * Copy block pointers from one btree block to another.
1335  */
1336 STATIC void
1337 xfs_btree_copy_ptrs(
1338         struct xfs_btree_cur    *cur,
1339         union xfs_btree_ptr     *dst_ptr,
1340         union xfs_btree_ptr     *src_ptr,
1341         int                     numptrs)
1342 {
1343         ASSERT(numptrs >= 0);
1344         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1345 }
1346
1347 /*
1348  * Shift keys one index left/right inside a single btree block.
1349  */
1350 STATIC void
1351 xfs_btree_shift_keys(
1352         struct xfs_btree_cur    *cur,
1353         union xfs_btree_key     *key,
1354         int                     dir,
1355         int                     numkeys)
1356 {
1357         char                    *dst_key;
1358
1359         ASSERT(numkeys >= 0);
1360         ASSERT(dir == 1 || dir == -1);
1361
1362         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1363         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1364 }
1365
1366 /*
1367  * Shift records one index left/right inside a single btree block.
1368  */
1369 STATIC void
1370 xfs_btree_shift_recs(
1371         struct xfs_btree_cur    *cur,
1372         union xfs_btree_rec     *rec,
1373         int                     dir,
1374         int                     numrecs)
1375 {
1376         char                    *dst_rec;
1377
1378         ASSERT(numrecs >= 0);
1379         ASSERT(dir == 1 || dir == -1);
1380
1381         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1382         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1383 }
1384
1385 /*
1386  * Shift block pointers one index left/right inside a single btree block.
1387  */
1388 STATIC void
1389 xfs_btree_shift_ptrs(
1390         struct xfs_btree_cur    *cur,
1391         union xfs_btree_ptr     *ptr,
1392         int                     dir,
1393         int                     numptrs)
1394 {
1395         char                    *dst_ptr;
1396
1397         ASSERT(numptrs >= 0);
1398         ASSERT(dir == 1 || dir == -1);
1399
1400         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1401         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1402 }
1403
1404 /*
1405  * Log key values from the btree block.
1406  */
1407 STATIC void
1408 xfs_btree_log_keys(
1409         struct xfs_btree_cur    *cur,
1410         struct xfs_buf          *bp,
1411         int                     first,
1412         int                     last)
1413 {
1414         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1415         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1416
1417         if (bp) {
1418                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1419                 xfs_trans_log_buf(cur->bc_tp, bp,
1420                                   xfs_btree_key_offset(cur, first),
1421                                   xfs_btree_key_offset(cur, last + 1) - 1);
1422         } else {
1423                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1424                                 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1425         }
1426
1427         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1428 }
1429
1430 /*
1431  * Log record values from the btree block.
1432  */
1433 void
1434 xfs_btree_log_recs(
1435         struct xfs_btree_cur    *cur,
1436         struct xfs_buf          *bp,
1437         int                     first,
1438         int                     last)
1439 {
1440         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1441         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1442
1443         xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1444         xfs_trans_log_buf(cur->bc_tp, bp,
1445                           xfs_btree_rec_offset(cur, first),
1446                           xfs_btree_rec_offset(cur, last + 1) - 1);
1447
1448         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1449 }
1450
1451 /*
1452  * Log block pointer fields from a btree block (nonleaf).
1453  */
1454 STATIC void
1455 xfs_btree_log_ptrs(
1456         struct xfs_btree_cur    *cur,   /* btree cursor */
1457         struct xfs_buf          *bp,    /* buffer containing btree block */
1458         int                     first,  /* index of first pointer to log */
1459         int                     last)   /* index of last pointer to log */
1460 {
1461         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1462         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1463
1464         if (bp) {
1465                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1466                 int                     level = xfs_btree_get_level(block);
1467
1468                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1469                 xfs_trans_log_buf(cur->bc_tp, bp,
1470                                 xfs_btree_ptr_offset(cur, first, level),
1471                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1472         } else {
1473                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1474                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1475         }
1476
1477         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1478 }
1479
1480 /*
1481  * Log fields from a btree block header.
1482  */
1483 void
1484 xfs_btree_log_block(
1485         struct xfs_btree_cur    *cur,   /* btree cursor */
1486         struct xfs_buf          *bp,    /* buffer containing btree block */
1487         int                     fields) /* mask of fields: XFS_BB_... */
1488 {
1489         int                     first;  /* first byte offset logged */
1490         int                     last;   /* last byte offset logged */
1491         static const short      soffsets[] = {  /* table of offsets (short) */
1492                 offsetof(struct xfs_btree_block, bb_magic),
1493                 offsetof(struct xfs_btree_block, bb_level),
1494                 offsetof(struct xfs_btree_block, bb_numrecs),
1495                 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1496                 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1497                 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1498                 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1499                 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1500                 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1501                 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1502                 XFS_BTREE_SBLOCK_CRC_LEN
1503         };
1504         static const short      loffsets[] = {  /* table of offsets (long) */
1505                 offsetof(struct xfs_btree_block, bb_magic),
1506                 offsetof(struct xfs_btree_block, bb_level),
1507                 offsetof(struct xfs_btree_block, bb_numrecs),
1508                 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1509                 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1510                 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1511                 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1512                 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1513                 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1514                 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1515                 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1516                 XFS_BTREE_LBLOCK_CRC_LEN
1517         };
1518
1519         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1520         XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1521
1522         if (bp) {
1523                 int nbits;
1524
1525                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1526                         /*
1527                          * We don't log the CRC when updating a btree
1528                          * block but instead recreate it during log
1529                          * recovery.  As the log buffers have checksums
1530                          * of their own this is safe and avoids logging a crc
1531                          * update in a lot of places.
1532                          */
1533                         if (fields == XFS_BB_ALL_BITS)
1534                                 fields = XFS_BB_ALL_BITS_CRC;
1535                         nbits = XFS_BB_NUM_BITS_CRC;
1536                 } else {
1537                         nbits = XFS_BB_NUM_BITS;
1538                 }
1539                 xfs_btree_offsets(fields,
1540                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1541                                         loffsets : soffsets,
1542                                   nbits, &first, &last);
1543                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1544                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1545         } else {
1546                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1547                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1548         }
1549
1550         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1551 }
1552
1553 /*
1554  * Increment cursor by one record at the level.
1555  * For nonzero levels the leaf-ward information is untouched.
1556  */
1557 int                                             /* error */
1558 xfs_btree_increment(
1559         struct xfs_btree_cur    *cur,
1560         int                     level,
1561         int                     *stat)          /* success/failure */
1562 {
1563         struct xfs_btree_block  *block;
1564         union xfs_btree_ptr     ptr;
1565         struct xfs_buf          *bp;
1566         int                     error;          /* error return value */
1567         int                     lev;
1568
1569         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1570         XFS_BTREE_TRACE_ARGI(cur, level);
1571
1572         ASSERT(level < cur->bc_nlevels);
1573
1574         /* Read-ahead to the right at this level. */
1575         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1576
1577         /* Get a pointer to the btree block. */
1578         block = xfs_btree_get_block(cur, level, &bp);
1579
1580 #ifdef DEBUG
1581         error = xfs_btree_check_block(cur, block, level, bp);
1582         if (error)
1583                 goto error0;
1584 #endif
1585
1586         /* We're done if we remain in the block after the increment. */
1587         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1588                 goto out1;
1589
1590         /* Fail if we just went off the right edge of the tree. */
1591         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1592         if (xfs_btree_ptr_is_null(cur, &ptr))
1593                 goto out0;
1594
1595         XFS_BTREE_STATS_INC(cur, increment);
1596
1597         /*
1598          * March up the tree incrementing pointers.
1599          * Stop when we don't go off the right edge of a block.
1600          */
1601         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1602                 block = xfs_btree_get_block(cur, lev, &bp);
1603
1604 #ifdef DEBUG
1605                 error = xfs_btree_check_block(cur, block, lev, bp);
1606                 if (error)
1607                         goto error0;
1608 #endif
1609
1610                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1611                         break;
1612
1613                 /* Read-ahead the right block for the next loop. */
1614                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1615         }
1616
1617         /*
1618          * If we went off the root then we are either seriously
1619          * confused or have the tree root in an inode.
1620          */
1621         if (lev == cur->bc_nlevels) {
1622                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1623                         goto out0;
1624                 ASSERT(0);
1625                 error = -EFSCORRUPTED;
1626                 goto error0;
1627         }
1628         ASSERT(lev < cur->bc_nlevels);
1629
1630         /*
1631          * Now walk back down the tree, fixing up the cursor's buffer
1632          * pointers and key numbers.
1633          */
1634         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1635                 union xfs_btree_ptr     *ptrp;
1636
1637                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1638                 --lev;
1639                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1640                 if (error)
1641                         goto error0;
1642
1643                 xfs_btree_setbuf(cur, lev, bp);
1644                 cur->bc_ptrs[lev] = 1;
1645         }
1646 out1:
1647         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1648         *stat = 1;
1649         return 0;
1650
1651 out0:
1652         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1653         *stat = 0;
1654         return 0;
1655
1656 error0:
1657         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1658         return error;
1659 }
1660
1661 /*
1662  * Decrement cursor by one record at the level.
1663  * For nonzero levels the leaf-ward information is untouched.
1664  */
1665 int                                             /* error */
1666 xfs_btree_decrement(
1667         struct xfs_btree_cur    *cur,
1668         int                     level,
1669         int                     *stat)          /* success/failure */
1670 {
1671         struct xfs_btree_block  *block;
1672         xfs_buf_t               *bp;
1673         int                     error;          /* error return value */
1674         int                     lev;
1675         union xfs_btree_ptr     ptr;
1676
1677         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1678         XFS_BTREE_TRACE_ARGI(cur, level);
1679
1680         ASSERT(level < cur->bc_nlevels);
1681
1682         /* Read-ahead to the left at this level. */
1683         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1684
1685         /* We're done if we remain in the block after the decrement. */
1686         if (--cur->bc_ptrs[level] > 0)
1687                 goto out1;
1688
1689         /* Get a pointer to the btree block. */
1690         block = xfs_btree_get_block(cur, level, &bp);
1691
1692 #ifdef DEBUG
1693         error = xfs_btree_check_block(cur, block, level, bp);
1694         if (error)
1695                 goto error0;
1696 #endif
1697
1698         /* Fail if we just went off the left edge of the tree. */
1699         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1700         if (xfs_btree_ptr_is_null(cur, &ptr))
1701                 goto out0;
1702
1703         XFS_BTREE_STATS_INC(cur, decrement);
1704
1705         /*
1706          * March up the tree decrementing pointers.
1707          * Stop when we don't go off the left edge of a block.
1708          */
1709         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1710                 if (--cur->bc_ptrs[lev] > 0)
1711                         break;
1712                 /* Read-ahead the left block for the next loop. */
1713                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1714         }
1715
1716         /*
1717          * If we went off the root then we are seriously confused.
1718          * or the root of the tree is in an inode.
1719          */
1720         if (lev == cur->bc_nlevels) {
1721                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1722                         goto out0;
1723                 ASSERT(0);
1724                 error = -EFSCORRUPTED;
1725                 goto error0;
1726         }
1727         ASSERT(lev < cur->bc_nlevels);
1728
1729         /*
1730          * Now walk back down the tree, fixing up the cursor's buffer
1731          * pointers and key numbers.
1732          */
1733         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1734                 union xfs_btree_ptr     *ptrp;
1735
1736                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1737                 --lev;
1738                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1739                 if (error)
1740                         goto error0;
1741                 xfs_btree_setbuf(cur, lev, bp);
1742                 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1743         }
1744 out1:
1745         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1746         *stat = 1;
1747         return 0;
1748
1749 out0:
1750         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1751         *stat = 0;
1752         return 0;
1753
1754 error0:
1755         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1756         return error;
1757 }
1758
1759 int
1760 xfs_btree_lookup_get_block(
1761         struct xfs_btree_cur    *cur,   /* btree cursor */
1762         int                     level,  /* level in the btree */
1763         union xfs_btree_ptr     *pp,    /* ptr to btree block */
1764         struct xfs_btree_block  **blkp) /* return btree block */
1765 {
1766         struct xfs_buf          *bp;    /* buffer pointer for btree block */
1767         int                     error = 0;
1768
1769         /* special case the root block if in an inode */
1770         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1771             (level == cur->bc_nlevels - 1)) {
1772                 *blkp = xfs_btree_get_iroot(cur);
1773                 return 0;
1774         }
1775
1776         /*
1777          * If the old buffer at this level for the disk address we are
1778          * looking for re-use it.
1779          *
1780          * Otherwise throw it away and get a new one.
1781          */
1782         bp = cur->bc_bufs[level];
1783         if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1784                 *blkp = XFS_BUF_TO_BLOCK(bp);
1785                 return 0;
1786         }
1787
1788         error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
1789         if (error)
1790                 return error;
1791
1792         /* Check the inode owner since the verifiers don't. */
1793         if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
1794             !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) &&
1795             (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
1796             be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
1797                         cur->bc_private.b.ip->i_ino)
1798                 goto out_bad;
1799
1800         /* Did we get the level we were looking for? */
1801         if (be16_to_cpu((*blkp)->bb_level) != level)
1802                 goto out_bad;
1803
1804         /* Check that internal nodes have at least one record. */
1805         if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
1806                 goto out_bad;
1807
1808         xfs_btree_setbuf(cur, level, bp);
1809         return 0;
1810
1811 out_bad:
1812         *blkp = NULL;
1813         xfs_trans_brelse(cur->bc_tp, bp);
1814         return -EFSCORRUPTED;
1815 }
1816
1817 /*
1818  * Get current search key.  For level 0 we don't actually have a key
1819  * structure so we make one up from the record.  For all other levels
1820  * we just return the right key.
1821  */
1822 STATIC union xfs_btree_key *
1823 xfs_lookup_get_search_key(
1824         struct xfs_btree_cur    *cur,
1825         int                     level,
1826         int                     keyno,
1827         struct xfs_btree_block  *block,
1828         union xfs_btree_key     *kp)
1829 {
1830         if (level == 0) {
1831                 cur->bc_ops->init_key_from_rec(kp,
1832                                 xfs_btree_rec_addr(cur, keyno, block));
1833                 return kp;
1834         }
1835
1836         return xfs_btree_key_addr(cur, keyno, block);
1837 }
1838
1839 /*
1840  * Lookup the record.  The cursor is made to point to it, based on dir.
1841  * stat is set to 0 if can't find any such record, 1 for success.
1842  */
1843 int                                     /* error */
1844 xfs_btree_lookup(
1845         struct xfs_btree_cur    *cur,   /* btree cursor */
1846         xfs_lookup_t            dir,    /* <=, ==, or >= */
1847         int                     *stat)  /* success/failure */
1848 {
1849         struct xfs_btree_block  *block; /* current btree block */
1850         int64_t                 diff;   /* difference for the current key */
1851         int                     error;  /* error return value */
1852         int                     keyno;  /* current key number */
1853         int                     level;  /* level in the btree */
1854         union xfs_btree_ptr     *pp;    /* ptr to btree block */
1855         union xfs_btree_ptr     ptr;    /* ptr to btree block */
1856
1857         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1858         XFS_BTREE_TRACE_ARGI(cur, dir);
1859
1860         XFS_BTREE_STATS_INC(cur, lookup);
1861
1862         /* No such thing as a zero-level tree. */
1863         if (cur->bc_nlevels == 0)
1864                 return -EFSCORRUPTED;
1865
1866         block = NULL;
1867         keyno = 0;
1868
1869         /* initialise start pointer from cursor */
1870         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1871         pp = &ptr;
1872
1873         /*
1874          * Iterate over each level in the btree, starting at the root.
1875          * For each level above the leaves, find the key we need, based
1876          * on the lookup record, then follow the corresponding block
1877          * pointer down to the next level.
1878          */
1879         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1880                 /* Get the block we need to do the lookup on. */
1881                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1882                 if (error)
1883                         goto error0;
1884
1885                 if (diff == 0) {
1886                         /*
1887                          * If we already had a key match at a higher level, we
1888                          * know we need to use the first entry in this block.
1889                          */
1890                         keyno = 1;
1891                 } else {
1892                         /* Otherwise search this block. Do a binary search. */
1893
1894                         int     high;   /* high entry number */
1895                         int     low;    /* low entry number */
1896
1897                         /* Set low and high entry numbers, 1-based. */
1898                         low = 1;
1899                         high = xfs_btree_get_numrecs(block);
1900                         if (!high) {
1901                                 /* Block is empty, must be an empty leaf. */
1902                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1903
1904                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1905                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1906                                 *stat = 0;
1907                                 return 0;
1908                         }
1909
1910                         /* Binary search the block. */
1911                         while (low <= high) {
1912                                 union xfs_btree_key     key;
1913                                 union xfs_btree_key     *kp;
1914
1915                                 XFS_BTREE_STATS_INC(cur, compare);
1916
1917                                 /* keyno is average of low and high. */
1918                                 keyno = (low + high) >> 1;
1919
1920                                 /* Get current search key */
1921                                 kp = xfs_lookup_get_search_key(cur, level,
1922                                                 keyno, block, &key);
1923
1924                                 /*
1925                                  * Compute difference to get next direction:
1926                                  *  - less than, move right
1927                                  *  - greater than, move left
1928                                  *  - equal, we're done
1929                                  */
1930                                 diff = cur->bc_ops->key_diff(cur, kp);
1931                                 if (diff < 0)
1932                                         low = keyno + 1;
1933                                 else if (diff > 0)
1934                                         high = keyno - 1;
1935                                 else
1936                                         break;
1937                         }
1938                 }
1939
1940                 /*
1941                  * If there are more levels, set up for the next level
1942                  * by getting the block number and filling in the cursor.
1943                  */
1944                 if (level > 0) {
1945                         /*
1946                          * If we moved left, need the previous key number,
1947                          * unless there isn't one.
1948                          */
1949                         if (diff > 0 && --keyno < 1)
1950                                 keyno = 1;
1951                         pp = xfs_btree_ptr_addr(cur, keyno, block);
1952
1953 #ifdef DEBUG
1954                         error = xfs_btree_check_ptr(cur, pp, 0, level);
1955                         if (error)
1956                                 goto error0;
1957 #endif
1958                         cur->bc_ptrs[level] = keyno;
1959                 }
1960         }
1961
1962         /* Done with the search. See if we need to adjust the results. */
1963         if (dir != XFS_LOOKUP_LE && diff < 0) {
1964                 keyno++;
1965                 /*
1966                  * If ge search and we went off the end of the block, but it's
1967                  * not the last block, we're in the wrong block.
1968                  */
1969                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1970                 if (dir == XFS_LOOKUP_GE &&
1971                     keyno > xfs_btree_get_numrecs(block) &&
1972                     !xfs_btree_ptr_is_null(cur, &ptr)) {
1973                         int     i;
1974
1975                         cur->bc_ptrs[0] = keyno;
1976                         error = xfs_btree_increment(cur, 0, &i);
1977                         if (error)
1978                                 goto error0;
1979                         XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1980                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1981                         *stat = 1;
1982                         return 0;
1983                 }
1984         } else if (dir == XFS_LOOKUP_LE && diff > 0)
1985                 keyno--;
1986         cur->bc_ptrs[0] = keyno;
1987
1988         /* Return if we succeeded or not. */
1989         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1990                 *stat = 0;
1991         else if (dir != XFS_LOOKUP_EQ || diff == 0)
1992                 *stat = 1;
1993         else
1994                 *stat = 0;
1995         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1996         return 0;
1997
1998 error0:
1999         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2000         return error;
2001 }
2002
2003 /* Find the high key storage area from a regular key. */
2004 STATIC union xfs_btree_key *
2005 xfs_btree_high_key_from_key(
2006         struct xfs_btree_cur    *cur,
2007         union xfs_btree_key     *key)
2008 {
2009         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2010         return (union xfs_btree_key *)((char *)key +
2011                         (cur->bc_ops->key_len / 2));
2012 }
2013
2014 /* Determine the low (and high if overlapped) keys of a leaf block */
2015 STATIC void
2016 xfs_btree_get_leaf_keys(
2017         struct xfs_btree_cur    *cur,
2018         struct xfs_btree_block  *block,
2019         union xfs_btree_key     *key)
2020 {
2021         union xfs_btree_key     max_hkey;
2022         union xfs_btree_key     hkey;
2023         union xfs_btree_rec     *rec;
2024         union xfs_btree_key     *high;
2025         int                     n;
2026
2027         rec = xfs_btree_rec_addr(cur, 1, block);
2028         cur->bc_ops->init_key_from_rec(key, rec);
2029
2030         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2031
2032                 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
2033                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2034                         rec = xfs_btree_rec_addr(cur, n, block);
2035                         cur->bc_ops->init_high_key_from_rec(&hkey, rec);
2036                         if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
2037                                         > 0)
2038                                 max_hkey = hkey;
2039                 }
2040
2041                 high = xfs_btree_high_key_from_key(cur, key);
2042                 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
2043         }
2044 }
2045
2046 /* Determine the low (and high if overlapped) keys of a node block */
2047 STATIC void
2048 xfs_btree_get_node_keys(
2049         struct xfs_btree_cur    *cur,
2050         struct xfs_btree_block  *block,
2051         union xfs_btree_key     *key)
2052 {
2053         union xfs_btree_key     *hkey;
2054         union xfs_btree_key     *max_hkey;
2055         union xfs_btree_key     *high;
2056         int                     n;
2057
2058         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2059                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2060                                 cur->bc_ops->key_len / 2);
2061
2062                 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2063                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2064                         hkey = xfs_btree_high_key_addr(cur, n, block);
2065                         if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
2066                                 max_hkey = hkey;
2067                 }
2068
2069                 high = xfs_btree_high_key_from_key(cur, key);
2070                 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2071         } else {
2072                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2073                                 cur->bc_ops->key_len);
2074         }
2075 }
2076
2077 /* Derive the keys for any btree block. */
2078 STATIC void
2079 xfs_btree_get_keys(
2080         struct xfs_btree_cur    *cur,
2081         struct xfs_btree_block  *block,
2082         union xfs_btree_key     *key)
2083 {
2084         if (be16_to_cpu(block->bb_level) == 0)
2085                 xfs_btree_get_leaf_keys(cur, block, key);
2086         else
2087                 xfs_btree_get_node_keys(cur, block, key);
2088 }
2089
2090 /*
2091  * Decide if we need to update the parent keys of a btree block.  For
2092  * a standard btree this is only necessary if we're updating the first
2093  * record/key.  For an overlapping btree, we must always update the
2094  * keys because the highest key can be in any of the records or keys
2095  * in the block.
2096  */
2097 static inline bool
2098 xfs_btree_needs_key_update(
2099         struct xfs_btree_cur    *cur,
2100         int                     ptr)
2101 {
2102         return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2103 }
2104
2105 /*
2106  * Update the low and high parent keys of the given level, progressing
2107  * towards the root.  If force_all is false, stop if the keys for a given
2108  * level do not need updating.
2109  */
2110 STATIC int
2111 __xfs_btree_updkeys(
2112         struct xfs_btree_cur    *cur,
2113         int                     level,
2114         struct xfs_btree_block  *block,
2115         struct xfs_buf          *bp0,
2116         bool                    force_all)
2117 {
2118         union xfs_btree_key     key;    /* keys from current level */
2119         union xfs_btree_key     *lkey;  /* keys from the next level up */
2120         union xfs_btree_key     *hkey;
2121         union xfs_btree_key     *nlkey; /* keys from the next level up */
2122         union xfs_btree_key     *nhkey;
2123         struct xfs_buf          *bp;
2124         int                     ptr;
2125
2126         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2127
2128         /* Exit if there aren't any parent levels to update. */
2129         if (level + 1 >= cur->bc_nlevels)
2130                 return 0;
2131
2132         trace_xfs_btree_updkeys(cur, level, bp0);
2133
2134         lkey = &key;
2135         hkey = xfs_btree_high_key_from_key(cur, lkey);
2136         xfs_btree_get_keys(cur, block, lkey);
2137         for (level++; level < cur->bc_nlevels; level++) {
2138 #ifdef DEBUG
2139                 int             error;
2140 #endif
2141                 block = xfs_btree_get_block(cur, level, &bp);
2142                 trace_xfs_btree_updkeys(cur, level, bp);
2143 #ifdef DEBUG
2144                 error = xfs_btree_check_block(cur, block, level, bp);
2145                 if (error) {
2146                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2147                         return error;
2148                 }
2149 #endif
2150                 ptr = cur->bc_ptrs[level];
2151                 nlkey = xfs_btree_key_addr(cur, ptr, block);
2152                 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2153                 if (!force_all &&
2154                     !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
2155                       cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
2156                         break;
2157                 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2158                 xfs_btree_log_keys(cur, bp, ptr, ptr);
2159                 if (level + 1 >= cur->bc_nlevels)
2160                         break;
2161                 xfs_btree_get_node_keys(cur, block, lkey);
2162         }
2163
2164         return 0;
2165 }
2166
2167 /* Update all the keys from some level in cursor back to the root. */
2168 STATIC int
2169 xfs_btree_updkeys_force(
2170         struct xfs_btree_cur    *cur,
2171         int                     level)
2172 {
2173         struct xfs_buf          *bp;
2174         struct xfs_btree_block  *block;
2175
2176         block = xfs_btree_get_block(cur, level, &bp);
2177         return __xfs_btree_updkeys(cur, level, block, bp, true);
2178 }
2179
2180 /*
2181  * Update the parent keys of the given level, progressing towards the root.
2182  */
2183 STATIC int
2184 xfs_btree_update_keys(
2185         struct xfs_btree_cur    *cur,
2186         int                     level)
2187 {
2188         struct xfs_btree_block  *block;
2189         struct xfs_buf          *bp;
2190         union xfs_btree_key     *kp;
2191         union xfs_btree_key     key;
2192         int                     ptr;
2193
2194         ASSERT(level >= 0);
2195
2196         block = xfs_btree_get_block(cur, level, &bp);
2197         if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2198                 return __xfs_btree_updkeys(cur, level, block, bp, false);
2199
2200         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2201         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
2202
2203         /*
2204          * Go up the tree from this level toward the root.
2205          * At each level, update the key value to the value input.
2206          * Stop when we reach a level where the cursor isn't pointing
2207          * at the first entry in the block.
2208          */
2209         xfs_btree_get_keys(cur, block, &key);
2210         for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
2211 #ifdef DEBUG
2212                 int             error;
2213 #endif
2214                 block = xfs_btree_get_block(cur, level, &bp);
2215 #ifdef DEBUG
2216                 error = xfs_btree_check_block(cur, block, level, bp);
2217                 if (error) {
2218                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2219                         return error;
2220                 }
2221 #endif
2222                 ptr = cur->bc_ptrs[level];
2223                 kp = xfs_btree_key_addr(cur, ptr, block);
2224                 xfs_btree_copy_keys(cur, kp, &key, 1);
2225                 xfs_btree_log_keys(cur, bp, ptr, ptr);
2226         }
2227
2228         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2229         return 0;
2230 }
2231
2232 /*
2233  * Update the record referred to by cur to the value in the
2234  * given record. This either works (return 0) or gets an
2235  * EFSCORRUPTED error.
2236  */
2237 int
2238 xfs_btree_update(
2239         struct xfs_btree_cur    *cur,
2240         union xfs_btree_rec     *rec)
2241 {
2242         struct xfs_btree_block  *block;
2243         struct xfs_buf          *bp;
2244         int                     error;
2245         int                     ptr;
2246         union xfs_btree_rec     *rp;
2247
2248         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2249         XFS_BTREE_TRACE_ARGR(cur, rec);
2250
2251         /* Pick up the current block. */
2252         block = xfs_btree_get_block(cur, 0, &bp);
2253
2254 #ifdef DEBUG
2255         error = xfs_btree_check_block(cur, block, 0, bp);
2256         if (error)
2257                 goto error0;
2258 #endif
2259         /* Get the address of the rec to be updated. */
2260         ptr = cur->bc_ptrs[0];
2261         rp = xfs_btree_rec_addr(cur, ptr, block);
2262
2263         /* Fill in the new contents and log them. */
2264         xfs_btree_copy_recs(cur, rp, rec, 1);
2265         xfs_btree_log_recs(cur, bp, ptr, ptr);
2266
2267         /*
2268          * If we are tracking the last record in the tree and
2269          * we are at the far right edge of the tree, update it.
2270          */
2271         if (xfs_btree_is_lastrec(cur, block, 0)) {
2272                 cur->bc_ops->update_lastrec(cur, block, rec,
2273                                             ptr, LASTREC_UPDATE);
2274         }
2275
2276         /* Pass new key value up to our parent. */
2277         if (xfs_btree_needs_key_update(cur, ptr)) {
2278                 error = xfs_btree_update_keys(cur, 0);
2279                 if (error)
2280                         goto error0;
2281         }
2282
2283         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2284         return 0;
2285
2286 error0:
2287         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2288         return error;
2289 }
2290
2291 /*
2292  * Move 1 record left from cur/level if possible.
2293  * Update cur to reflect the new path.
2294  */
2295 STATIC int                                      /* error */
2296 xfs_btree_lshift(
2297         struct xfs_btree_cur    *cur,
2298         int                     level,
2299         int                     *stat)          /* success/failure */
2300 {
2301         struct xfs_buf          *lbp;           /* left buffer pointer */
2302         struct xfs_btree_block  *left;          /* left btree block */
2303         int                     lrecs;          /* left record count */
2304         struct xfs_buf          *rbp;           /* right buffer pointer */
2305         struct xfs_btree_block  *right;         /* right btree block */
2306         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2307         int                     rrecs;          /* right record count */
2308         union xfs_btree_ptr     lptr;           /* left btree pointer */
2309         union xfs_btree_key     *rkp = NULL;    /* right btree key */
2310         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
2311         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
2312         int                     error;          /* error return value */
2313         int                     i;
2314
2315         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2316         XFS_BTREE_TRACE_ARGI(cur, level);
2317
2318         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2319             level == cur->bc_nlevels - 1)
2320                 goto out0;
2321
2322         /* Set up variables for this block as "right". */
2323         right = xfs_btree_get_block(cur, level, &rbp);
2324
2325 #ifdef DEBUG
2326         error = xfs_btree_check_block(cur, right, level, rbp);
2327         if (error)
2328                 goto error0;
2329 #endif
2330
2331         /* If we've got no left sibling then we can't shift an entry left. */
2332         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2333         if (xfs_btree_ptr_is_null(cur, &lptr))
2334                 goto out0;
2335
2336         /*
2337          * If the cursor entry is the one that would be moved, don't
2338          * do it... it's too complicated.
2339          */
2340         if (cur->bc_ptrs[level] <= 1)
2341                 goto out0;
2342
2343         /* Set up the left neighbor as "left". */
2344         error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2345         if (error)
2346                 goto error0;
2347
2348         /* If it's full, it can't take another entry. */
2349         lrecs = xfs_btree_get_numrecs(left);
2350         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2351                 goto out0;
2352
2353         rrecs = xfs_btree_get_numrecs(right);
2354
2355         /*
2356          * We add one entry to the left side and remove one for the right side.
2357          * Account for it here, the changes will be updated on disk and logged
2358          * later.
2359          */
2360         lrecs++;
2361         rrecs--;
2362
2363         XFS_BTREE_STATS_INC(cur, lshift);
2364         XFS_BTREE_STATS_ADD(cur, moves, 1);
2365
2366         /*
2367          * If non-leaf, copy a key and a ptr to the left block.
2368          * Log the changes to the left block.
2369          */
2370         if (level > 0) {
2371                 /* It's a non-leaf.  Move keys and pointers. */
2372                 union xfs_btree_key     *lkp;   /* left btree key */
2373                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2374
2375                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2376                 rkp = xfs_btree_key_addr(cur, 1, right);
2377
2378                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2379                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2380 #ifdef DEBUG
2381                 error = xfs_btree_check_ptr(cur, rpp, 0, level);
2382                 if (error)
2383                         goto error0;
2384 #endif
2385                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
2386                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2387
2388                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2389                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2390
2391                 ASSERT(cur->bc_ops->keys_inorder(cur,
2392                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
2393         } else {
2394                 /* It's a leaf.  Move records.  */
2395                 union xfs_btree_rec     *lrp;   /* left record pointer */
2396
2397                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2398                 rrp = xfs_btree_rec_addr(cur, 1, right);
2399
2400                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
2401                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2402
2403                 ASSERT(cur->bc_ops->recs_inorder(cur,
2404                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
2405         }
2406
2407         xfs_btree_set_numrecs(left, lrecs);
2408         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2409
2410         xfs_btree_set_numrecs(right, rrecs);
2411         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2412
2413         /*
2414          * Slide the contents of right down one entry.
2415          */
2416         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2417         if (level > 0) {
2418                 /* It's a nonleaf. operate on keys and ptrs */
2419 #ifdef DEBUG
2420                 int                     i;              /* loop index */
2421
2422                 for (i = 0; i < rrecs; i++) {
2423                         error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
2424                         if (error)
2425                                 goto error0;
2426                 }
2427 #endif
2428                 xfs_btree_shift_keys(cur,
2429                                 xfs_btree_key_addr(cur, 2, right),
2430                                 -1, rrecs);
2431                 xfs_btree_shift_ptrs(cur,
2432                                 xfs_btree_ptr_addr(cur, 2, right),
2433                                 -1, rrecs);
2434
2435                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2436                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2437         } else {
2438                 /* It's a leaf. operate on records */
2439                 xfs_btree_shift_recs(cur,
2440                         xfs_btree_rec_addr(cur, 2, right),
2441                         -1, rrecs);
2442                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2443         }
2444
2445         /*
2446          * Using a temporary cursor, update the parent key values of the
2447          * block on the left.
2448          */
2449         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2450                 error = xfs_btree_dup_cursor(cur, &tcur);
2451                 if (error)
2452                         goto error0;
2453                 i = xfs_btree_firstrec(tcur, level);
2454                 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
2455
2456                 error = xfs_btree_decrement(tcur, level, &i);
2457                 if (error)
2458                         goto error1;
2459
2460                 /* Update the parent high keys of the left block, if needed. */
2461                 error = xfs_btree_update_keys(tcur, level);
2462                 if (error)
2463                         goto error1;
2464
2465                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2466         }
2467
2468         /* Update the parent keys of the right block. */
2469         error = xfs_btree_update_keys(cur, level);
2470         if (error)
2471                 goto error0;
2472
2473         /* Slide the cursor value left one. */
2474         cur->bc_ptrs[level]--;
2475
2476         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2477         *stat = 1;
2478         return 0;
2479
2480 out0:
2481         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2482         *stat = 0;
2483         return 0;
2484
2485 error0:
2486         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2487         return error;
2488
2489 error1:
2490         XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2491         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2492         return error;
2493 }
2494
2495 /*
2496  * Move 1 record right from cur/level if possible.
2497  * Update cur to reflect the new path.
2498  */
2499 STATIC int                                      /* error */
2500 xfs_btree_rshift(
2501         struct xfs_btree_cur    *cur,
2502         int                     level,
2503         int                     *stat)          /* success/failure */
2504 {
2505         struct xfs_buf          *lbp;           /* left buffer pointer */
2506         struct xfs_btree_block  *left;          /* left btree block */
2507         struct xfs_buf          *rbp;           /* right buffer pointer */
2508         struct xfs_btree_block  *right;         /* right btree block */
2509         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2510         union xfs_btree_ptr     rptr;           /* right block pointer */
2511         union xfs_btree_key     *rkp;           /* right btree key */
2512         int                     rrecs;          /* right record count */
2513         int                     lrecs;          /* left record count */
2514         int                     error;          /* error return value */
2515         int                     i;              /* loop counter */
2516
2517         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2518         XFS_BTREE_TRACE_ARGI(cur, level);
2519
2520         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2521             (level == cur->bc_nlevels - 1))
2522                 goto out0;
2523
2524         /* Set up variables for this block as "left". */
2525         left = xfs_btree_get_block(cur, level, &lbp);
2526
2527 #ifdef DEBUG
2528         error = xfs_btree_check_block(cur, left, level, lbp);
2529         if (error)
2530                 goto error0;
2531 #endif
2532
2533         /* If we've got no right sibling then we can't shift an entry right. */
2534         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2535         if (xfs_btree_ptr_is_null(cur, &rptr))
2536                 goto out0;
2537
2538         /*
2539          * If the cursor entry is the one that would be moved, don't
2540          * do it... it's too complicated.
2541          */
2542         lrecs = xfs_btree_get_numrecs(left);
2543         if (cur->bc_ptrs[level] >= lrecs)
2544                 goto out0;
2545
2546         /* Set up the right neighbor as "right". */
2547         error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2548         if (error)
2549                 goto error0;
2550
2551         /* If it's full, it can't take another entry. */
2552         rrecs = xfs_btree_get_numrecs(right);
2553         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2554                 goto out0;
2555
2556         XFS_BTREE_STATS_INC(cur, rshift);
2557         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2558
2559         /*
2560          * Make a hole at the start of the right neighbor block, then
2561          * copy the last left block entry to the hole.
2562          */
2563         if (level > 0) {
2564                 /* It's a nonleaf. make a hole in the keys and ptrs */
2565                 union xfs_btree_key     *lkp;
2566                 union xfs_btree_ptr     *lpp;
2567                 union xfs_btree_ptr     *rpp;
2568
2569                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2570                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2571                 rkp = xfs_btree_key_addr(cur, 1, right);
2572                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2573
2574 #ifdef DEBUG
2575                 for (i = rrecs - 1; i >= 0; i--) {
2576                         error = xfs_btree_check_ptr(cur, rpp, i, level);
2577                         if (error)
2578                                 goto error0;
2579                 }
2580 #endif
2581
2582                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2583                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2584
2585 #ifdef DEBUG
2586                 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2587                 if (error)
2588                         goto error0;
2589 #endif
2590
2591                 /* Now put the new data in, and log it. */
2592                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2593                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2594
2595                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2596                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2597
2598                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2599                         xfs_btree_key_addr(cur, 2, right)));
2600         } else {
2601                 /* It's a leaf. make a hole in the records */
2602                 union xfs_btree_rec     *lrp;
2603                 union xfs_btree_rec     *rrp;
2604
2605                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2606                 rrp = xfs_btree_rec_addr(cur, 1, right);
2607
2608                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2609
2610                 /* Now put the new data in, and log it. */
2611                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2612                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2613         }
2614
2615         /*
2616          * Decrement and log left's numrecs, bump and log right's numrecs.
2617          */
2618         xfs_btree_set_numrecs(left, --lrecs);
2619         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2620
2621         xfs_btree_set_numrecs(right, ++rrecs);
2622         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2623
2624         /*
2625          * Using a temporary cursor, update the parent key values of the
2626          * block on the right.
2627          */
2628         error = xfs_btree_dup_cursor(cur, &tcur);
2629         if (error)
2630                 goto error0;
2631         i = xfs_btree_lastrec(tcur, level);
2632         XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
2633
2634         error = xfs_btree_increment(tcur, level, &i);
2635         if (error)
2636                 goto error1;
2637
2638         /* Update the parent high keys of the left block, if needed. */
2639         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2640                 error = xfs_btree_update_keys(cur, level);
2641                 if (error)
2642                         goto error1;
2643         }
2644
2645         /* Update the parent keys of the right block. */
2646         error = xfs_btree_update_keys(tcur, level);
2647         if (error)
2648                 goto error1;
2649
2650         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2651
2652         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2653         *stat = 1;
2654         return 0;
2655
2656 out0:
2657         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2658         *stat = 0;
2659         return 0;
2660
2661 error0:
2662         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2663         return error;
2664
2665 error1:
2666         XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2667         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2668         return error;
2669 }
2670
2671 /*
2672  * Split cur/level block in half.
2673  * Return new block number and the key to its first
2674  * record (to be inserted into parent).
2675  */
2676 STATIC int                                      /* error */
2677 __xfs_btree_split(
2678         struct xfs_btree_cur    *cur,
2679         int                     level,
2680         union xfs_btree_ptr     *ptrp,
2681         union xfs_btree_key     *key,
2682         struct xfs_btree_cur    **curp,
2683         int                     *stat)          /* success/failure */
2684 {
2685         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2686         struct xfs_buf          *lbp;           /* left buffer pointer */
2687         struct xfs_btree_block  *left;          /* left btree block */
2688         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2689         struct xfs_buf          *rbp;           /* right buffer pointer */
2690         struct xfs_btree_block  *right;         /* right btree block */
2691         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2692         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2693         struct xfs_btree_block  *rrblock;       /* right-right btree block */
2694         int                     lrecs;
2695         int                     rrecs;
2696         int                     src_index;
2697         int                     error;          /* error return value */
2698 #ifdef DEBUG
2699         int                     i;
2700 #endif
2701
2702         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2703         XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2704
2705         XFS_BTREE_STATS_INC(cur, split);
2706
2707         /* Set up left block (current one). */
2708         left = xfs_btree_get_block(cur, level, &lbp);
2709
2710 #ifdef DEBUG
2711         error = xfs_btree_check_block(cur, left, level, lbp);
2712         if (error)
2713                 goto error0;
2714 #endif
2715
2716         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2717
2718         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2719         error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
2720         if (error)
2721                 goto error0;
2722         if (*stat == 0)
2723                 goto out0;
2724         XFS_BTREE_STATS_INC(cur, alloc);
2725
2726         /* Set up the new block as "right". */
2727         error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2728         if (error)
2729                 goto error0;
2730
2731         /* Fill in the btree header for the new right block. */
2732         xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2733
2734         /*
2735          * Split the entries between the old and the new block evenly.
2736          * Make sure that if there's an odd number of entries now, that
2737          * each new block will have the same number of entries.
2738          */
2739         lrecs = xfs_btree_get_numrecs(left);
2740         rrecs = lrecs / 2;
2741         if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2742                 rrecs++;
2743         src_index = (lrecs - rrecs + 1);
2744
2745         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2746
2747         /* Adjust numrecs for the later get_*_keys() calls. */
2748         lrecs -= rrecs;
2749         xfs_btree_set_numrecs(left, lrecs);
2750         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2751
2752         /*
2753          * Copy btree block entries from the left block over to the
2754          * new block, the right. Update the right block and log the
2755          * changes.
2756          */
2757         if (level > 0) {
2758                 /* It's a non-leaf.  Move keys and pointers. */
2759                 union xfs_btree_key     *lkp;   /* left btree key */
2760                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2761                 union xfs_btree_key     *rkp;   /* right btree key */
2762                 union xfs_btree_ptr     *rpp;   /* right address pointer */
2763
2764                 lkp = xfs_btree_key_addr(cur, src_index, left);
2765                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2766                 rkp = xfs_btree_key_addr(cur, 1, right);
2767                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2768
2769 #ifdef DEBUG
2770                 for (i = src_index; i < rrecs; i++) {
2771                         error = xfs_btree_check_ptr(cur, lpp, i, level);
2772                         if (error)
2773                                 goto error0;
2774                 }
2775 #endif
2776
2777                 /* Copy the keys & pointers to the new block. */
2778                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2779                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2780
2781                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2782                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2783
2784                 /* Stash the keys of the new block for later insertion. */
2785                 xfs_btree_get_node_keys(cur, right, key);
2786         } else {
2787                 /* It's a leaf.  Move records.  */
2788                 union xfs_btree_rec     *lrp;   /* left record pointer */
2789                 union xfs_btree_rec     *rrp;   /* right record pointer */
2790
2791                 lrp = xfs_btree_rec_addr(cur, src_index, left);
2792                 rrp = xfs_btree_rec_addr(cur, 1, right);
2793
2794                 /* Copy records to the new block. */
2795                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2796                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2797
2798                 /* Stash the keys of the new block for later insertion. */
2799                 xfs_btree_get_leaf_keys(cur, right, key);
2800         }
2801
2802         /*
2803          * Find the left block number by looking in the buffer.
2804          * Adjust sibling pointers.
2805          */
2806         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2807         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2808         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2809         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2810
2811         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2812         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2813
2814         /*
2815          * If there's a block to the new block's right, make that block
2816          * point back to right instead of to left.
2817          */
2818         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2819                 error = xfs_btree_read_buf_block(cur, &rrptr,
2820                                                         0, &rrblock, &rrbp);
2821                 if (error)
2822                         goto error0;
2823                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2824                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2825         }
2826
2827         /* Update the parent high keys of the left block, if needed. */
2828         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2829                 error = xfs_btree_update_keys(cur, level);
2830                 if (error)
2831                         goto error0;
2832         }
2833
2834         /*
2835          * If the cursor is really in the right block, move it there.
2836          * If it's just pointing past the last entry in left, then we'll
2837          * insert there, so don't change anything in that case.
2838          */
2839         if (cur->bc_ptrs[level] > lrecs + 1) {
2840                 xfs_btree_setbuf(cur, level, rbp);
2841                 cur->bc_ptrs[level] -= lrecs;
2842         }
2843         /*
2844          * If there are more levels, we'll need another cursor which refers
2845          * the right block, no matter where this cursor was.
2846          */
2847         if (level + 1 < cur->bc_nlevels) {
2848                 error = xfs_btree_dup_cursor(cur, curp);
2849                 if (error)
2850                         goto error0;
2851                 (*curp)->bc_ptrs[level + 1]++;
2852         }
2853         *ptrp = rptr;
2854         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2855         *stat = 1;
2856         return 0;
2857 out0:
2858         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2859         *stat = 0;
2860         return 0;
2861
2862 error0:
2863         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2864         return error;
2865 }
2866
2867 struct xfs_btree_split_args {
2868         struct xfs_btree_cur    *cur;
2869         int                     level;
2870         union xfs_btree_ptr     *ptrp;
2871         union xfs_btree_key     *key;
2872         struct xfs_btree_cur    **curp;
2873         int                     *stat;          /* success/failure */
2874         int                     result;
2875         bool                    kswapd; /* allocation in kswapd context */
2876         struct completion       *done;
2877         struct work_struct      work;
2878 };
2879
2880 /*
2881  * Stack switching interfaces for allocation
2882  */
2883 static void
2884 xfs_btree_split_worker(
2885         struct work_struct      *work)
2886 {
2887         struct xfs_btree_split_args     *args = container_of(work,
2888                                                 struct xfs_btree_split_args, work);
2889         unsigned long           pflags;
2890         unsigned long           new_pflags = PF_MEMALLOC_NOFS;
2891
2892         /*
2893          * we are in a transaction context here, but may also be doing work
2894          * in kswapd context, and hence we may need to inherit that state
2895          * temporarily to ensure that we don't block waiting for memory reclaim
2896          * in any way.
2897          */
2898         if (args->kswapd)
2899                 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2900
2901         current_set_flags_nested(&pflags, new_pflags);
2902
2903         args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2904                                          args->key, args->curp, args->stat);
2905         complete(args->done);
2906
2907         current_restore_flags_nested(&pflags, new_pflags);
2908 }
2909
2910 /*
2911  * BMBT split requests often come in with little stack to work on. Push
2912  * them off to a worker thread so there is lots of stack to use. For the other
2913  * btree types, just call directly to avoid the context switch overhead here.
2914  */
2915 STATIC int                                      /* error */
2916 xfs_btree_split(
2917         struct xfs_btree_cur    *cur,
2918         int                     level,
2919         union xfs_btree_ptr     *ptrp,
2920         union xfs_btree_key     *key,
2921         struct xfs_btree_cur    **curp,
2922         int                     *stat)          /* success/failure */
2923 {
2924         struct xfs_btree_split_args     args;
2925         DECLARE_COMPLETION_ONSTACK(done);
2926
2927         if (cur->bc_btnum != XFS_BTNUM_BMAP)
2928                 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2929
2930         args.cur = cur;
2931         args.level = level;
2932         args.ptrp = ptrp;
2933         args.key = key;
2934         args.curp = curp;
2935         args.stat = stat;
2936         args.done = &done;
2937         args.kswapd = current_is_kswapd();
2938         INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2939         queue_work(xfs_alloc_wq, &args.work);
2940         wait_for_completion(&done);
2941         destroy_work_on_stack(&args.work);
2942         return args.result;
2943 }
2944
2945
2946 /*
2947  * Copy the old inode root contents into a real block and make the
2948  * broot point to it.
2949  */
2950 int                                             /* error */
2951 xfs_btree_new_iroot(
2952         struct xfs_btree_cur    *cur,           /* btree cursor */
2953         int                     *logflags,      /* logging flags for inode */
2954         int                     *stat)          /* return status - 0 fail */
2955 {
2956         struct xfs_buf          *cbp;           /* buffer for cblock */
2957         struct xfs_btree_block  *block;         /* btree block */
2958         struct xfs_btree_block  *cblock;        /* child btree block */
2959         union xfs_btree_key     *ckp;           /* child key pointer */
2960         union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2961         union xfs_btree_key     *kp;            /* pointer to btree key */
2962         union xfs_btree_ptr     *pp;            /* pointer to block addr */
2963         union xfs_btree_ptr     nptr;           /* new block addr */
2964         int                     level;          /* btree level */
2965         int                     error;          /* error return code */
2966 #ifdef DEBUG
2967         int                     i;              /* loop counter */
2968 #endif
2969
2970         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2971         XFS_BTREE_STATS_INC(cur, newroot);
2972
2973         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2974
2975         level = cur->bc_nlevels - 1;
2976
2977         block = xfs_btree_get_iroot(cur);
2978         pp = xfs_btree_ptr_addr(cur, 1, block);
2979
2980         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2981         error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
2982         if (error)
2983                 goto error0;
2984         if (*stat == 0) {
2985                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2986                 return 0;
2987         }
2988         XFS_BTREE_STATS_INC(cur, alloc);
2989
2990         /* Copy the root into a real block. */
2991         error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2992         if (error)
2993                 goto error0;
2994
2995         /*
2996          * we can't just memcpy() the root in for CRC enabled btree blocks.
2997          * In that case have to also ensure the blkno remains correct
2998          */
2999         memcpy(cblock, block, xfs_btree_block_len(cur));
3000         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
3001                 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
3002                         cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
3003                 else
3004                         cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
3005         }
3006
3007         be16_add_cpu(&block->bb_level, 1);
3008         xfs_btree_set_numrecs(block, 1);
3009         cur->bc_nlevels++;
3010         cur->bc_ptrs[level + 1] = 1;
3011
3012         kp = xfs_btree_key_addr(cur, 1, block);
3013         ckp = xfs_btree_key_addr(cur, 1, cblock);
3014         xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
3015
3016         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3017 #ifdef DEBUG
3018         for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
3019                 error = xfs_btree_check_ptr(cur, pp, i, level);
3020                 if (error)
3021                         goto error0;
3022         }
3023 #endif
3024         xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
3025
3026 #ifdef DEBUG
3027         error = xfs_btree_check_ptr(cur, &nptr, 0, level);
3028         if (error)
3029                 goto error0;
3030 #endif
3031         xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
3032
3033         xfs_iroot_realloc(cur->bc_private.b.ip,
3034                           1 - xfs_btree_get_numrecs(cblock),
3035                           cur->bc_private.b.whichfork);
3036
3037         xfs_btree_setbuf(cur, level, cbp);
3038
3039         /*
3040          * Do all this logging at the end so that
3041          * the root is at the right level.
3042          */
3043         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
3044         xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3045         xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3046
3047         *logflags |=
3048                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
3049         *stat = 1;
3050         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3051         return 0;
3052 error0:
3053         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3054         return error;
3055 }
3056
3057 /*
3058  * Allocate a new root block, fill it in.
3059  */
3060 STATIC int                              /* error */
3061 xfs_btree_new_root(
3062         struct xfs_btree_cur    *cur,   /* btree cursor */
3063         int                     *stat)  /* success/failure */
3064 {
3065         struct xfs_btree_block  *block; /* one half of the old root block */
3066         struct xfs_buf          *bp;    /* buffer containing block */
3067         int                     error;  /* error return value */
3068         struct xfs_buf          *lbp;   /* left buffer pointer */
3069         struct xfs_btree_block  *left;  /* left btree block */
3070         struct xfs_buf          *nbp;   /* new (root) buffer */
3071         struct xfs_btree_block  *new;   /* new (root) btree block */
3072         int                     nptr;   /* new value for key index, 1 or 2 */
3073         struct xfs_buf          *rbp;   /* right buffer pointer */
3074         struct xfs_btree_block  *right; /* right btree block */
3075         union xfs_btree_ptr     rptr;
3076         union xfs_btree_ptr     lptr;
3077
3078         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3079         XFS_BTREE_STATS_INC(cur, newroot);
3080
3081         /* initialise our start point from the cursor */
3082         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
3083
3084         /* Allocate the new block. If we can't do it, we're toast. Give up. */
3085         error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
3086         if (error)
3087                 goto error0;
3088         if (*stat == 0)
3089                 goto out0;
3090         XFS_BTREE_STATS_INC(cur, alloc);
3091
3092         /* Set up the new block. */
3093         error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
3094         if (error)
3095                 goto error0;
3096
3097         /* Set the root in the holding structure  increasing the level by 1. */
3098         cur->bc_ops->set_root(cur, &lptr, 1);
3099
3100         /*
3101          * At the previous root level there are now two blocks: the old root,
3102          * and the new block generated when it was split.  We don't know which
3103          * one the cursor is pointing at, so we set up variables "left" and
3104          * "right" for each case.
3105          */
3106         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
3107
3108 #ifdef DEBUG
3109         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
3110         if (error)
3111                 goto error0;
3112 #endif
3113
3114         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3115         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3116                 /* Our block is left, pick up the right block. */
3117                 lbp = bp;
3118                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
3119                 left = block;
3120                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
3121                 if (error)
3122                         goto error0;
3123                 bp = rbp;
3124                 nptr = 1;
3125         } else {
3126                 /* Our block is right, pick up the left block. */
3127                 rbp = bp;
3128                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
3129                 right = block;
3130                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
3131                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
3132                 if (error)
3133                         goto error0;
3134                 bp = lbp;
3135                 nptr = 2;
3136         }
3137
3138         /* Fill in the new block's btree header and log it. */
3139         xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
3140         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
3141         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
3142                         !xfs_btree_ptr_is_null(cur, &rptr));
3143
3144         /* Fill in the key data in the new root. */
3145         if (xfs_btree_get_level(left) > 0) {
3146                 /*
3147                  * Get the keys for the left block's keys and put them directly
3148                  * in the parent block.  Do the same for the right block.
3149                  */
3150                 xfs_btree_get_node_keys(cur, left,
3151                                 xfs_btree_key_addr(cur, 1, new));
3152                 xfs_btree_get_node_keys(cur, right,
3153                                 xfs_btree_key_addr(cur, 2, new));
3154         } else {
3155                 /*
3156                  * Get the keys for the left block's records and put them
3157                  * directly in the parent block.  Do the same for the right
3158                  * block.
3159                  */
3160                 xfs_btree_get_leaf_keys(cur, left,
3161                         xfs_btree_key_addr(cur, 1, new));
3162                 xfs_btree_get_leaf_keys(cur, right,
3163                         xfs_btree_key_addr(cur, 2, new));
3164         }
3165         xfs_btree_log_keys(cur, nbp, 1, 2);
3166
3167         /* Fill in the pointer data in the new root. */
3168         xfs_btree_copy_ptrs(cur,
3169                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
3170         xfs_btree_copy_ptrs(cur,
3171                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
3172         xfs_btree_log_ptrs(cur, nbp, 1, 2);
3173
3174         /* Fix up the cursor. */
3175         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
3176         cur->bc_ptrs[cur->bc_nlevels] = nptr;
3177         cur->bc_nlevels++;
3178         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3179         *stat = 1;
3180         return 0;
3181 error0:
3182         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3183         return error;
3184 out0:
3185         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3186         *stat = 0;
3187         return 0;
3188 }
3189
3190 STATIC int
3191 xfs_btree_make_block_unfull(
3192         struct xfs_btree_cur    *cur,   /* btree cursor */
3193         int                     level,  /* btree level */
3194         int                     numrecs,/* # of recs in block */
3195         int                     *oindex,/* old tree index */
3196         int                     *index, /* new tree index */
3197         union xfs_btree_ptr     *nptr,  /* new btree ptr */
3198         struct xfs_btree_cur    **ncur, /* new btree cursor */
3199         union xfs_btree_key     *key,   /* key of new block */
3200         int                     *stat)
3201 {
3202         int                     error = 0;
3203
3204         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3205             level == cur->bc_nlevels - 1) {
3206                 struct xfs_inode *ip = cur->bc_private.b.ip;
3207
3208                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
3209                         /* A root block that can be made bigger. */
3210                         xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
3211                         *stat = 1;
3212                 } else {
3213                         /* A root block that needs replacing */
3214                         int     logflags = 0;
3215
3216                         error = xfs_btree_new_iroot(cur, &logflags, stat);
3217                         if (error || *stat == 0)
3218                                 return error;
3219
3220                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
3221                 }
3222
3223                 return 0;
3224         }
3225
3226         /* First, try shifting an entry to the right neighbor. */
3227         error = xfs_btree_rshift(cur, level, stat);
3228         if (error || *stat)
3229                 return error;
3230
3231         /* Next, try shifting an entry to the left neighbor. */
3232         error = xfs_btree_lshift(cur, level, stat);
3233         if (error)
3234                 return error;
3235
3236         if (*stat) {
3237                 *oindex = *index = cur->bc_ptrs[level];
3238                 return 0;
3239         }
3240
3241         /*
3242          * Next, try splitting the current block in half.
3243          *
3244          * If this works we have to re-set our variables because we
3245          * could be in a different block now.
3246          */
3247         error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
3248         if (error || *stat == 0)
3249                 return error;
3250
3251
3252         *index = cur->bc_ptrs[level];
3253         return 0;
3254 }
3255
3256 /*
3257  * Insert one record/level.  Return information to the caller
3258  * allowing the next level up to proceed if necessary.
3259  */
3260 STATIC int
3261 xfs_btree_insrec(
3262         struct xfs_btree_cur    *cur,   /* btree cursor */
3263         int                     level,  /* level to insert record at */
3264         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
3265         union xfs_btree_rec     *rec,   /* record to insert */
3266         union xfs_btree_key     *key,   /* i/o: block key for ptrp */
3267         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
3268         int                     *stat)  /* success/failure */
3269 {
3270         struct xfs_btree_block  *block; /* btree block */
3271         struct xfs_buf          *bp;    /* buffer for block */
3272         union xfs_btree_ptr     nptr;   /* new block ptr */
3273         struct xfs_btree_cur    *ncur;  /* new btree cursor */
3274         union xfs_btree_key     nkey;   /* new block key */
3275         union xfs_btree_key     *lkey;
3276         int                     optr;   /* old key/record index */
3277         int                     ptr;    /* key/record index */
3278         int                     numrecs;/* number of records */
3279         int                     error;  /* error return value */
3280 #ifdef DEBUG
3281         int                     i;
3282 #endif
3283         xfs_daddr_t             old_bn;
3284
3285         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3286         XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
3287
3288         ncur = NULL;
3289         lkey = &nkey;
3290
3291         /*
3292          * If we have an external root pointer, and we've made it to the
3293          * root level, allocate a new root block and we're done.
3294          */
3295         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3296             (level >= cur->bc_nlevels)) {
3297                 error = xfs_btree_new_root(cur, stat);
3298                 xfs_btree_set_ptr_null(cur, ptrp);
3299
3300                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3301                 return error;
3302         }
3303
3304         /* If we're off the left edge, return failure. */
3305         ptr = cur->bc_ptrs[level];
3306         if (ptr == 0) {
3307                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3308                 *stat = 0;
3309                 return 0;
3310         }
3311
3312         optr = ptr;
3313
3314         XFS_BTREE_STATS_INC(cur, insrec);
3315
3316         /* Get pointers to the btree buffer and block. */
3317         block = xfs_btree_get_block(cur, level, &bp);
3318         old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
3319         numrecs = xfs_btree_get_numrecs(block);
3320
3321 #ifdef DEBUG
3322         error = xfs_btree_check_block(cur, block, level, bp);
3323         if (error)
3324                 goto error0;
3325
3326         /* Check that the new entry is being inserted in the right place. */
3327         if (ptr <= numrecs) {
3328                 if (level == 0) {
3329                         ASSERT(cur->bc_ops->recs_inorder(cur, rec,
3330                                 xfs_btree_rec_addr(cur, ptr, block)));
3331                 } else {
3332                         ASSERT(cur->bc_ops->keys_inorder(cur, key,
3333                                 xfs_btree_key_addr(cur, ptr, block)));
3334                 }
3335         }
3336 #endif
3337
3338         /*
3339          * If the block is full, we can't insert the new entry until we
3340          * make the block un-full.
3341          */
3342         xfs_btree_set_ptr_null(cur, &nptr);
3343         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3344                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
3345                                         &optr, &ptr, &nptr, &ncur, lkey, stat);
3346                 if (error || *stat == 0)
3347                         goto error0;
3348         }
3349
3350         /*
3351          * The current block may have changed if the block was
3352          * previously full and we have just made space in it.
3353          */
3354         block = xfs_btree_get_block(cur, level, &bp);
3355         numrecs = xfs_btree_get_numrecs(block);
3356
3357 #ifdef DEBUG
3358         error = xfs_btree_check_block(cur, block, level, bp);
3359         if (error)
3360                 return error;
3361 #endif
3362
3363         /*
3364          * At this point we know there's room for our new entry in the block
3365          * we're pointing at.
3366          */
3367         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3368
3369         if (level > 0) {
3370                 /* It's a nonleaf. make a hole in the keys and ptrs */
3371                 union xfs_btree_key     *kp;
3372                 union xfs_btree_ptr     *pp;
3373
3374                 kp = xfs_btree_key_addr(cur, ptr, block);
3375                 pp = xfs_btree_ptr_addr(cur, ptr, block);
3376
3377 #ifdef DEBUG
3378                 for (i = numrecs - ptr; i >= 0; i--) {
3379                         error = xfs_btree_check_ptr(cur, pp, i, level);
3380                         if (error)
3381                                 return error;
3382                 }
3383 #endif
3384
3385                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3386                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3387
3388 #ifdef DEBUG
3389                 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
3390                 if (error)
3391                         goto error0;
3392 #endif
3393
3394                 /* Now put the new data in, bump numrecs and log it. */
3395                 xfs_btree_copy_keys(cur, kp, key, 1);
3396                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3397                 numrecs++;
3398                 xfs_btree_set_numrecs(block, numrecs);
3399                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3400                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
3401 #ifdef DEBUG
3402                 if (ptr < numrecs) {
3403                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3404                                 xfs_btree_key_addr(cur, ptr + 1, block)));
3405                 }
3406 #endif
3407         } else {
3408                 /* It's a leaf. make a hole in the records */
3409                 union xfs_btree_rec             *rp;
3410
3411                 rp = xfs_btree_rec_addr(cur, ptr, block);
3412
3413                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3414
3415                 /* Now put the new data in, bump numrecs and log it. */
3416                 xfs_btree_copy_recs(cur, rp, rec, 1);
3417                 xfs_btree_set_numrecs(block, ++numrecs);
3418                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
3419 #ifdef DEBUG
3420                 if (ptr < numrecs) {
3421                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3422                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
3423                 }
3424 #endif
3425         }
3426
3427         /* Log the new number of records in the btree header. */
3428         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3429
3430         /*
3431          * If we just inserted into a new tree block, we have to
3432          * recalculate nkey here because nkey is out of date.
3433          *
3434          * Otherwise we're just updating an existing block (having shoved
3435          * some records into the new tree block), so use the regular key
3436          * update mechanism.
3437          */
3438         if (bp && bp->b_bn != old_bn) {
3439                 xfs_btree_get_keys(cur, block, lkey);
3440         } else if (xfs_btree_needs_key_update(cur, optr)) {
3441                 error = xfs_btree_update_keys(cur, level);
3442                 if (error)
3443                         goto error0;
3444         }
3445
3446         /*
3447          * If we are tracking the last record in the tree and
3448          * we are at the far right edge of the tree, update it.
3449          */
3450         if (xfs_btree_is_lastrec(cur, block, level)) {
3451                 cur->bc_ops->update_lastrec(cur, block, rec,
3452                                             ptr, LASTREC_INSREC);
3453         }
3454
3455         /*
3456          * Return the new block number, if any.
3457          * If there is one, give back a record value and a cursor too.
3458          */
3459         *ptrp = nptr;
3460         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3461                 xfs_btree_copy_keys(cur, key, lkey, 1);
3462                 *curp = ncur;
3463         }
3464
3465         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3466         *stat = 1;
3467         return 0;
3468
3469 error0:
3470         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3471         return error;
3472 }
3473
3474 /*
3475  * Insert the record at the point referenced by cur.
3476  *
3477  * A multi-level split of the tree on insert will invalidate the original
3478  * cursor.  All callers of this function should assume that the cursor is
3479  * no longer valid and revalidate it.
3480  */
3481 int
3482 xfs_btree_insert(
3483         struct xfs_btree_cur    *cur,
3484         int                     *stat)
3485 {
3486         int                     error;  /* error return value */
3487         int                     i;      /* result value, 0 for failure */
3488         int                     level;  /* current level number in btree */
3489         union xfs_btree_ptr     nptr;   /* new block number (split result) */
3490         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
3491         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
3492         union xfs_btree_key     bkey;   /* key of block to insert */
3493         union xfs_btree_key     *key;
3494         union xfs_btree_rec     rec;    /* record to insert */
3495
3496         level = 0;
3497         ncur = NULL;
3498         pcur = cur;
3499         key = &bkey;
3500
3501         xfs_btree_set_ptr_null(cur, &nptr);
3502
3503         /* Make a key out of the record data to be inserted, and save it. */
3504         cur->bc_ops->init_rec_from_cur(cur, &rec);
3505         cur->bc_ops->init_key_from_rec(key, &rec);
3506
3507         /*
3508          * Loop going up the tree, starting at the leaf level.
3509          * Stop when we don't get a split block, that must mean that
3510          * the insert is finished with this level.
3511          */
3512         do {
3513                 /*
3514                  * Insert nrec/nptr into this level of the tree.
3515                  * Note if we fail, nptr will be null.
3516                  */
3517                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
3518                                 &ncur, &i);
3519                 if (error) {
3520                         if (pcur != cur)
3521                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3522                         goto error0;
3523                 }
3524
3525                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3526                 level++;
3527
3528                 /*
3529                  * See if the cursor we just used is trash.
3530                  * Can't trash the caller's cursor, but otherwise we should
3531                  * if ncur is a new cursor or we're about to be done.
3532                  */
3533                 if (pcur != cur &&
3534                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3535                         /* Save the state from the cursor before we trash it */
3536                         if (cur->bc_ops->update_cursor)
3537                                 cur->bc_ops->update_cursor(pcur, cur);
3538                         cur->bc_nlevels = pcur->bc_nlevels;
3539                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3540                 }
3541                 /* If we got a new cursor, switch to it. */
3542                 if (ncur) {
3543                         pcur = ncur;
3544                         ncur = NULL;
3545                 }
3546         } while (!xfs_btree_ptr_is_null(cur, &nptr));
3547
3548         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3549         *stat = i;
3550         return 0;
3551 error0:
3552         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3553         return error;
3554 }
3555
3556 /*
3557  * Try to merge a non-leaf block back into the inode root.
3558  *
3559  * Note: the killroot names comes from the fact that we're effectively
3560  * killing the old root block.  But because we can't just delete the
3561  * inode we have to copy the single block it was pointing to into the
3562  * inode.
3563  */
3564 STATIC int
3565 xfs_btree_kill_iroot(
3566         struct xfs_btree_cur    *cur)
3567 {
3568         int                     whichfork = cur->bc_private.b.whichfork;
3569         struct xfs_inode        *ip = cur->bc_private.b.ip;
3570         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
3571         struct xfs_btree_block  *block;
3572         struct xfs_btree_block  *cblock;
3573         union xfs_btree_key     *kp;
3574         union xfs_btree_key     *ckp;
3575         union xfs_btree_ptr     *pp;
3576         union xfs_btree_ptr     *cpp;
3577         struct xfs_buf          *cbp;
3578         int                     level;
3579         int                     index;
3580         int                     numrecs;
3581         int                     error;
3582 #ifdef DEBUG
3583         union xfs_btree_ptr     ptr;
3584         int                     i;
3585 #endif
3586
3587         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3588
3589         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3590         ASSERT(cur->bc_nlevels > 1);
3591
3592         /*
3593          * Don't deal with the root block needs to be a leaf case.
3594          * We're just going to turn the thing back into extents anyway.
3595          */
3596         level = cur->bc_nlevels - 1;
3597         if (level == 1)
3598                 goto out0;
3599
3600         /*
3601          * Give up if the root has multiple children.
3602          */
3603         block = xfs_btree_get_iroot(cur);
3604         if (xfs_btree_get_numrecs(block) != 1)
3605                 goto out0;
3606
3607         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3608         numrecs = xfs_btree_get_numrecs(cblock);
3609
3610         /*
3611          * Only do this if the next level will fit.
3612          * Then the data must be copied up to the inode,
3613          * instead of freeing the root you free the next level.
3614          */
3615         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3616                 goto out0;
3617
3618         XFS_BTREE_STATS_INC(cur, killroot);
3619
3620 #ifdef DEBUG
3621         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3622         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3623         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3624         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3625 #endif
3626
3627         index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3628         if (index) {
3629                 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3630                                   cur->bc_private.b.whichfork);
3631                 block = ifp->if_broot;
3632         }
3633
3634         be16_add_cpu(&block->bb_numrecs, index);
3635         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3636
3637         kp = xfs_btree_key_addr(cur, 1, block);
3638         ckp = xfs_btree_key_addr(cur, 1, cblock);
3639         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3640
3641         pp = xfs_btree_ptr_addr(cur, 1, block);
3642         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3643 #ifdef DEBUG
3644         for (i = 0; i < numrecs; i++) {
3645                 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3646                 if (error) {
3647                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3648                         return error;
3649                 }
3650         }
3651 #endif
3652         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3653
3654         error = xfs_btree_free_block(cur, cbp);
3655         if (error) {
3656                 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3657                 return error;
3658         }
3659
3660         cur->bc_bufs[level - 1] = NULL;
3661         be16_add_cpu(&block->bb_level, -1);
3662         xfs_trans_log_inode(cur->bc_tp, ip,
3663                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3664         cur->bc_nlevels--;
3665 out0:
3666         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3667         return 0;
3668 }
3669
3670 /*
3671  * Kill the current root node, and replace it with it's only child node.
3672  */
3673 STATIC int
3674 xfs_btree_kill_root(
3675         struct xfs_btree_cur    *cur,
3676         struct xfs_buf          *bp,
3677         int                     level,
3678         union xfs_btree_ptr     *newroot)
3679 {
3680         int                     error;
3681
3682         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3683         XFS_BTREE_STATS_INC(cur, killroot);
3684
3685         /*
3686          * Update the root pointer, decreasing the level by 1 and then
3687          * free the old root.
3688          */
3689         cur->bc_ops->set_root(cur, newroot, -1);
3690
3691         error = xfs_btree_free_block(cur, bp);
3692         if (error) {
3693                 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3694                 return error;
3695         }
3696
3697         cur->bc_bufs[level] = NULL;
3698         cur->bc_ra[level] = 0;
3699         cur->bc_nlevels--;
3700
3701         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3702         return 0;
3703 }
3704
3705 STATIC int
3706 xfs_btree_dec_cursor(
3707         struct xfs_btree_cur    *cur,
3708         int                     level,
3709         int                     *stat)
3710 {
3711         int                     error;
3712         int                     i;
3713
3714         if (level > 0) {
3715                 error = xfs_btree_decrement(cur, level, &i);
3716                 if (error)
3717                         return error;
3718         }
3719
3720         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3721         *stat = 1;
3722         return 0;
3723 }
3724
3725 /*
3726  * Single level of the btree record deletion routine.
3727  * Delete record pointed to by cur/level.
3728  * Remove the record from its block then rebalance the tree.
3729  * Return 0 for error, 1 for done, 2 to go on to the next level.
3730  */
3731 STATIC int                                      /* error */
3732 xfs_btree_delrec(
3733         struct xfs_btree_cur    *cur,           /* btree cursor */
3734         int                     level,          /* level removing record from */
3735         int                     *stat)          /* fail/done/go-on */
3736 {
3737         struct xfs_btree_block  *block;         /* btree block */
3738         union xfs_btree_ptr     cptr;           /* current block ptr */
3739         struct xfs_buf          *bp;            /* buffer for block */
3740         int                     error;          /* error return value */
3741         int                     i;              /* loop counter */
3742         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3743         struct xfs_buf          *lbp;           /* left buffer pointer */
3744         struct xfs_btree_block  *left;          /* left btree block */
3745         int                     lrecs = 0;      /* left record count */
3746         int                     ptr;            /* key/record index */
3747         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3748         struct xfs_buf          *rbp;           /* right buffer pointer */
3749         struct xfs_btree_block  *right;         /* right btree block */
3750         struct xfs_btree_block  *rrblock;       /* right-right btree block */
3751         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3752         int                     rrecs = 0;      /* right record count */
3753         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3754         int                     numrecs;        /* temporary numrec count */
3755
3756         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3757         XFS_BTREE_TRACE_ARGI(cur, level);
3758
3759         tcur = NULL;
3760
3761         /* Get the index of the entry being deleted, check for nothing there. */
3762         ptr = cur->bc_ptrs[level];
3763         if (ptr == 0) {
3764                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3765                 *stat = 0;
3766                 return 0;
3767         }
3768
3769         /* Get the buffer & block containing the record or key/ptr. */
3770         block = xfs_btree_get_block(cur, level, &bp);
3771         numrecs = xfs_btree_get_numrecs(block);
3772
3773 #ifdef DEBUG
3774         error = xfs_btree_check_block(cur, block, level, bp);
3775         if (error)
3776                 goto error0;
3777 #endif
3778
3779         /* Fail if we're off the end of the block. */
3780         if (ptr > numrecs) {
3781                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3782                 *stat = 0;
3783                 return 0;
3784         }
3785
3786         XFS_BTREE_STATS_INC(cur, delrec);
3787         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3788
3789         /* Excise the entries being deleted. */
3790         if (level > 0) {
3791                 /* It's a nonleaf. operate on keys and ptrs */
3792                 union xfs_btree_key     *lkp;
3793                 union xfs_btree_ptr     *lpp;
3794
3795                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3796                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3797
3798 #ifdef DEBUG
3799                 for (i = 0; i < numrecs - ptr; i++) {
3800                         error = xfs_btree_check_ptr(cur, lpp, i, level);
3801                         if (error)
3802                                 goto error0;
3803                 }
3804 #endif
3805
3806                 if (ptr < numrecs) {
3807                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3808                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3809                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3810                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3811                 }
3812         } else {
3813                 /* It's a leaf. operate on records */
3814                 if (ptr < numrecs) {
3815                         xfs_btree_shift_recs(cur,
3816                                 xfs_btree_rec_addr(cur, ptr + 1, block),
3817                                 -1, numrecs - ptr);
3818                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3819                 }
3820         }
3821
3822         /*
3823          * Decrement and log the number of entries in the block.
3824          */
3825         xfs_btree_set_numrecs(block, --numrecs);
3826         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3827
3828         /*
3829          * If we are tracking the last record in the tree and
3830          * we are at the far right edge of the tree, update it.
3831          */
3832         if (xfs_btree_is_lastrec(cur, block, level)) {
3833                 cur->bc_ops->update_lastrec(cur, block, NULL,
3834                                             ptr, LASTREC_DELREC);
3835         }
3836
3837         /*
3838          * We're at the root level.  First, shrink the root block in-memory.
3839          * Try to get rid of the next level down.  If we can't then there's
3840          * nothing left to do.
3841          */
3842         if (level == cur->bc_nlevels - 1) {
3843                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3844                         xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3845                                           cur->bc_private.b.whichfork);
3846
3847                         error = xfs_btree_kill_iroot(cur);
3848                         if (error)
3849                                 goto error0;
3850
3851                         error = xfs_btree_dec_cursor(cur, level, stat);
3852                         if (error)
3853                                 goto error0;
3854                         *stat = 1;
3855                         return 0;
3856                 }
3857
3858                 /*
3859                  * If this is the root level, and there's only one entry left,
3860                  * and it's NOT the leaf level, then we can get rid of this
3861                  * level.
3862                  */
3863                 if (numrecs == 1 && level > 0) {
3864                         union xfs_btree_ptr     *pp;
3865                         /*
3866                          * pp is still set to the first pointer in the block.
3867                          * Make it the new root of the btree.
3868                          */
3869                         pp = xfs_btree_ptr_addr(cur, 1, block);
3870                         error = xfs_btree_kill_root(cur, bp, level, pp);
3871                         if (error)
3872                                 goto error0;
3873                 } else if (level > 0) {
3874                         error = xfs_btree_dec_cursor(cur, level, stat);
3875                         if (error)
3876                                 goto error0;
3877                 }
3878                 *stat = 1;
3879                 return 0;
3880         }
3881
3882         /*
3883          * If we deleted the leftmost entry in the block, update the
3884          * key values above us in the tree.
3885          */
3886         if (xfs_btree_needs_key_update(cur, ptr)) {
3887                 error = xfs_btree_update_keys(cur, level);
3888                 if (error)
3889                         goto error0;
3890         }
3891
3892         /*
3893          * If the number of records remaining in the block is at least
3894          * the minimum, we're done.
3895          */
3896         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3897                 error = xfs_btree_dec_cursor(cur, level, stat);
3898                 if (error)
3899                         goto error0;
3900                 return 0;
3901         }
3902
3903         /*
3904          * Otherwise, we have to move some records around to keep the
3905          * tree balanced.  Look at the left and right sibling blocks to
3906          * see if we can re-balance by moving only one record.
3907          */
3908         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3909         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3910
3911         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3912                 /*
3913                  * One child of root, need to get a chance to copy its contents
3914                  * into the root and delete it. Can't go up to next level,
3915                  * there's nothing to delete there.
3916                  */
3917                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3918                     xfs_btree_ptr_is_null(cur, &lptr) &&
3919                     level == cur->bc_nlevels - 2) {
3920                         error = xfs_btree_kill_iroot(cur);
3921                         if (!error)
3922                                 error = xfs_btree_dec_cursor(cur, level, stat);
3923                         if (error)
3924                                 goto error0;
3925                         return 0;
3926                 }
3927         }
3928
3929         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3930                !xfs_btree_ptr_is_null(cur, &lptr));
3931
3932         /*
3933          * Duplicate the cursor so our btree manipulations here won't
3934          * disrupt the next level up.
3935          */
3936         error = xfs_btree_dup_cursor(cur, &tcur);
3937         if (error)
3938                 goto error0;
3939
3940         /*
3941          * If there's a right sibling, see if it's ok to shift an entry
3942          * out of it.
3943          */
3944         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3945                 /*
3946                  * Move the temp cursor to the last entry in the next block.
3947                  * Actually any entry but the first would suffice.
3948                  */
3949                 i = xfs_btree_lastrec(tcur, level);
3950                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3951
3952                 error = xfs_btree_increment(tcur, level, &i);
3953                 if (error)
3954                         goto error0;
3955                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3956
3957                 i = xfs_btree_lastrec(tcur, level);
3958                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3959
3960                 /* Grab a pointer to the block. */
3961                 right = xfs_btree_get_block(tcur, level, &rbp);
3962 #ifdef DEBUG
3963                 error = xfs_btree_check_block(tcur, right, level, rbp);
3964                 if (error)
3965                         goto error0;
3966 #endif
3967                 /* Grab the current block number, for future use. */
3968                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3969
3970                 /*
3971                  * If right block is full enough so that removing one entry
3972                  * won't make it too empty, and left-shifting an entry out
3973                  * of right to us works, we're done.
3974                  */
3975                 if (xfs_btree_get_numrecs(right) - 1 >=
3976                     cur->bc_ops->get_minrecs(tcur, level)) {
3977                         error = xfs_btree_lshift(tcur, level, &i);
3978                         if (error)
3979                                 goto error0;
3980                         if (i) {
3981                                 ASSERT(xfs_btree_get_numrecs(block) >=
3982                                        cur->bc_ops->get_minrecs(tcur, level));
3983
3984                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3985                                 tcur = NULL;
3986
3987                                 error = xfs_btree_dec_cursor(cur, level, stat);
3988                                 if (error)
3989                                         goto error0;
3990                                 return 0;
3991                         }
3992                 }
3993
3994                 /*
3995                  * Otherwise, grab the number of records in right for
3996                  * future reference, and fix up the temp cursor to point
3997                  * to our block again (last record).
3998                  */
3999                 rrecs = xfs_btree_get_numrecs(right);
4000                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4001                         i = xfs_btree_firstrec(tcur, level);
4002                         XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
4003
4004                         error = xfs_btree_decrement(tcur, level, &i);
4005                         if (error)
4006                                 goto error0;
4007                         XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
4008                 }
4009         }
4010
4011         /*
4012          * If there's a left sibling, see if it's ok to shift an entry
4013          * out of it.
4014          */
4015         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4016                 /*
4017                  * Move the temp cursor to the first entry in the
4018                  * previous block.
4019                  */
4020                 i = xfs_btree_firstrec(tcur, level);
4021                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
4022
4023                 error = xfs_btree_decrement(tcur, level, &i);
4024                 if (error)
4025                         goto error0;
4026                 i = xfs_btree_firstrec(tcur, level);
4027                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
4028
4029                 /* Grab a pointer to the block. */
4030                 left = xfs_btree_get_block(tcur, level, &lbp);
4031 #ifdef DEBUG
4032                 error = xfs_btree_check_block(cur, left, level, lbp);
4033                 if (error)
4034                         goto error0;
4035 #endif
4036                 /* Grab the current block number, for future use. */
4037                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
4038
4039                 /*
4040                  * If left block is full enough so that removing one entry
4041                  * won't make it too empty, and right-shifting an entry out
4042                  * of left to us works, we're done.
4043                  */
4044                 if (xfs_btree_get_numrecs(left) - 1 >=
4045                     cur->bc_ops->get_minrecs(tcur, level)) {
4046                         error = xfs_btree_rshift(tcur, level, &i);
4047                         if (error)
4048                                 goto error0;
4049                         if (i) {
4050                                 ASSERT(xfs_btree_get_numrecs(block) >=
4051                                        cur->bc_ops->get_minrecs(tcur, level));
4052                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4053                                 tcur = NULL;
4054                                 if (level == 0)
4055                                         cur->bc_ptrs[0]++;
4056                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4057                                 *stat = 1;
4058                                 return 0;
4059                         }
4060                 }
4061
4062                 /*
4063                  * Otherwise, grab the number of records in right for
4064                  * future reference.
4065                  */
4066                 lrecs = xfs_btree_get_numrecs(left);
4067         }
4068
4069         /* Delete the temp cursor, we're done with it. */
4070         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4071         tcur = NULL;
4072
4073         /* If here, we need to do a join to keep the tree balanced. */
4074         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
4075
4076         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
4077             lrecs + xfs_btree_get_numrecs(block) <=
4078                         cur->bc_ops->get_maxrecs(cur, level)) {
4079                 /*
4080                  * Set "right" to be the starting block,
4081                  * "left" to be the left neighbor.
4082                  */
4083                 rptr = cptr;
4084                 right = block;
4085                 rbp = bp;
4086                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
4087                 if (error)
4088                         goto error0;
4089
4090         /*
4091          * If that won't work, see if we can join with the right neighbor block.
4092          */
4093         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
4094                    rrecs + xfs_btree_get_numrecs(block) <=
4095                         cur->bc_ops->get_maxrecs(cur, level)) {
4096                 /*
4097                  * Set "left" to be the starting block,
4098                  * "right" to be the right neighbor.
4099                  */
4100                 lptr = cptr;
4101                 left = block;
4102                 lbp = bp;
4103                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
4104                 if (error)
4105                         goto error0;
4106
4107         /*
4108          * Otherwise, we can't fix the imbalance.
4109          * Just return.  This is probably a logic error, but it's not fatal.
4110          */
4111         } else {
4112                 error = xfs_btree_dec_cursor(cur, level, stat);
4113                 if (error)
4114                         goto error0;
4115                 return 0;
4116         }
4117
4118         rrecs = xfs_btree_get_numrecs(right);
4119         lrecs = xfs_btree_get_numrecs(left);
4120
4121         /*
4122          * We're now going to join "left" and "right" by moving all the stuff
4123          * in "right" to "left" and deleting "right".
4124          */
4125         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
4126         if (level > 0) {
4127                 /* It's a non-leaf.  Move keys and pointers. */
4128                 union xfs_btree_key     *lkp;   /* left btree key */
4129                 union xfs_btree_ptr     *lpp;   /* left address pointer */
4130                 union xfs_btree_key     *rkp;   /* right btree key */
4131                 union xfs_btree_ptr     *rpp;   /* right address pointer */
4132
4133                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
4134                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
4135                 rkp = xfs_btree_key_addr(cur, 1, right);
4136                 rpp = xfs_btree_ptr_addr(cur, 1, right);
4137 #ifdef DEBUG
4138                 for (i = 1; i < rrecs; i++) {
4139                         error = xfs_btree_check_ptr(cur, rpp, i, level);
4140                         if (error)
4141                                 goto error0;
4142                 }
4143 #endif
4144                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
4145                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
4146
4147                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
4148                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
4149         } else {
4150                 /* It's a leaf.  Move records.  */
4151                 union xfs_btree_rec     *lrp;   /* left record pointer */
4152                 union xfs_btree_rec     *rrp;   /* right record pointer */
4153
4154                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
4155                 rrp = xfs_btree_rec_addr(cur, 1, right);
4156
4157                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
4158                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
4159         }
4160
4161         XFS_BTREE_STATS_INC(cur, join);
4162
4163         /*
4164          * Fix up the number of records and right block pointer in the
4165          * surviving block, and log it.
4166          */
4167         xfs_btree_set_numrecs(left, lrecs + rrecs);
4168         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
4169         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4170         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
4171
4172         /* If there is a right sibling, point it to the remaining block. */
4173         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4174         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
4175                 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
4176                 if (error)
4177                         goto error0;
4178                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
4179                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
4180         }
4181
4182         /* Free the deleted block. */
4183         error = xfs_btree_free_block(cur, rbp);
4184         if (error)
4185                 goto error0;
4186
4187         /*
4188          * If we joined with the left neighbor, set the buffer in the
4189          * cursor to the left block, and fix up the index.
4190          */
4191         if (bp != lbp) {
4192                 cur->bc_bufs[level] = lbp;
4193                 cur->bc_ptrs[level] += lrecs;
4194                 cur->bc_ra[level] = 0;
4195         }
4196         /*
4197          * If we joined with the right neighbor and there's a level above
4198          * us, increment the cursor at that level.
4199          */
4200         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
4201                    (level + 1 < cur->bc_nlevels)) {
4202                 error = xfs_btree_increment(cur, level + 1, &i);
4203                 if (error)
4204                         goto error0;
4205         }
4206
4207         /*
4208          * Readjust the ptr at this level if it's not a leaf, since it's
4209          * still pointing at the deletion point, which makes the cursor
4210          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
4211          * We can't use decrement because it would change the next level up.
4212          */
4213         if (level > 0)
4214                 cur->bc_ptrs[level]--;
4215
4216         /*
4217          * We combined blocks, so we have to update the parent keys if the
4218          * btree supports overlapped intervals.  However, bc_ptrs[level + 1]
4219          * points to the old block so that the caller knows which record to
4220          * delete.  Therefore, the caller must be savvy enough to call updkeys
4221          * for us if we return stat == 2.  The other exit points from this
4222          * function don't require deletions further up the tree, so they can
4223          * call updkeys directly.
4224          */
4225
4226         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4227         /* Return value means the next level up has something to do. */
4228         *stat = 2;
4229         return 0;
4230
4231 error0:
4232         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
4233         if (tcur)
4234                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
4235         return error;
4236 }
4237
4238 /*
4239  * Delete the record pointed to by cur.
4240  * The cursor refers to the place where the record was (could be inserted)
4241  * when the operation returns.
4242  */
4243 int                                     /* error */
4244 xfs_btree_delete(
4245         struct xfs_btree_cur    *cur,
4246         int                     *stat)  /* success/failure */
4247 {
4248         int                     error;  /* error return value */
4249         int                     level;
4250         int                     i;
4251         bool                    joined = false;
4252
4253         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
4254
4255         /*
4256          * Go up the tree, starting at leaf level.
4257          *
4258          * If 2 is returned then a join was done; go to the next level.
4259          * Otherwise we are done.
4260          */
4261         for (level = 0, i = 2; i == 2; level++) {
4262                 error = xfs_btree_delrec(cur, level, &i);
4263                 if (error)
4264                         goto error0;
4265                 if (i == 2)
4266                         joined = true;
4267         }
4268
4269         /*
4270          * If we combined blocks as part of deleting the record, delrec won't
4271          * have updated the parent high keys so we have to do that here.
4272          */
4273         if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4274                 error = xfs_btree_updkeys_force(cur, 0);
4275                 if (error)
4276                         goto error0;
4277         }
4278
4279         if (i == 0) {
4280                 for (level = 1; level < cur->bc_nlevels; level++) {
4281                         if (cur->bc_ptrs[level] == 0) {
4282                                 error = xfs_btree_decrement(cur, level, &i);
4283                                 if (error)
4284                                         goto error0;
4285                                 break;
4286                         }
4287                 }
4288         }
4289
4290         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4291         *stat = i;
4292         return 0;
4293 error0:
4294         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
4295         return error;
4296 }
4297
4298 /*
4299  * Get the data from the pointed-to record.
4300  */
4301 int                                     /* error */
4302 xfs_btree_get_rec(
4303         struct xfs_btree_cur    *cur,   /* btree cursor */
4304         union xfs_btree_rec     **recp, /* output: btree record */
4305         int                     *stat)  /* output: success/failure */
4306 {
4307         struct xfs_btree_block  *block; /* btree block */
4308         struct xfs_buf          *bp;    /* buffer pointer */
4309         int                     ptr;    /* record number */
4310 #ifdef DEBUG
4311         int                     error;  /* error return value */
4312 #endif
4313
4314         ptr = cur->bc_ptrs[0];
4315         block = xfs_btree_get_block(cur, 0, &bp);
4316
4317 #ifdef DEBUG
4318         error = xfs_btree_check_block(cur, block, 0, bp);
4319         if (error)
4320                 return error;
4321 #endif
4322
4323         /*
4324          * Off the right end or left end, return failure.
4325          */
4326         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
4327                 *stat = 0;
4328                 return 0;
4329         }
4330
4331         /*
4332          * Point to the record and extract its data.
4333          */
4334         *recp = xfs_btree_rec_addr(cur, ptr, block);
4335         *stat = 1;
4336         return 0;
4337 }
4338
4339 /* Visit a block in a btree. */
4340 STATIC int
4341 xfs_btree_visit_block(
4342         struct xfs_btree_cur            *cur,
4343         int                             level,
4344         xfs_btree_visit_blocks_fn       fn,
4345         void                            *data)
4346 {
4347         struct xfs_btree_block          *block;
4348         struct xfs_buf                  *bp;
4349         union xfs_btree_ptr             rptr;
4350         int                             error;
4351
4352         /* do right sibling readahead */
4353         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4354         block = xfs_btree_get_block(cur, level, &bp);
4355
4356         /* process the block */
4357         error = fn(cur, level, data);
4358         if (error)
4359                 return error;
4360
4361         /* now read rh sibling block for next iteration */
4362         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4363         if (xfs_btree_ptr_is_null(cur, &rptr))
4364                 return -ENOENT;
4365
4366         return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4367 }
4368
4369
4370 /* Visit every block in a btree. */
4371 int
4372 xfs_btree_visit_blocks(
4373         struct xfs_btree_cur            *cur,
4374         xfs_btree_visit_blocks_fn       fn,
4375         void                            *data)
4376 {
4377         union xfs_btree_ptr             lptr;
4378         int                             level;
4379         struct xfs_btree_block          *block = NULL;
4380         int                             error = 0;
4381
4382         cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4383
4384         /* for each level */
4385         for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4386                 /* grab the left hand block */
4387                 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4388                 if (error)
4389                         return error;
4390
4391                 /* readahead the left most block for the next level down */
4392                 if (level > 0) {
4393                         union xfs_btree_ptr     *ptr;
4394
4395                         ptr = xfs_btree_ptr_addr(cur, 1, block);
4396                         xfs_btree_readahead_ptr(cur, ptr, 1);
4397
4398                         /* save for the next iteration of the loop */
4399                         xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
4400                 }
4401
4402                 /* for each buffer in the level */
4403                 do {
4404                         error = xfs_btree_visit_block(cur, level, fn, data);
4405                 } while (!error);
4406
4407                 if (error != -ENOENT)
4408                         return error;
4409         }
4410
4411         return 0;
4412 }
4413
4414 /*
4415  * Change the owner of a btree.
4416  *
4417  * The mechanism we use here is ordered buffer logging. Because we don't know
4418  * how many buffers were are going to need to modify, we don't really want to
4419  * have to make transaction reservations for the worst case of every buffer in a
4420  * full size btree as that may be more space that we can fit in the log....
4421  *
4422  * We do the btree walk in the most optimal manner possible - we have sibling
4423  * pointers so we can just walk all the blocks on each level from left to right
4424  * in a single pass, and then move to the next level and do the same. We can
4425  * also do readahead on the sibling pointers to get IO moving more quickly,
4426  * though for slow disks this is unlikely to make much difference to performance
4427  * as the amount of CPU work we have to do before moving to the next block is
4428  * relatively small.
4429  *
4430  * For each btree block that we load, modify the owner appropriately, set the
4431  * buffer as an ordered buffer and log it appropriately. We need to ensure that
4432  * we mark the region we change dirty so that if the buffer is relogged in
4433  * a subsequent transaction the changes we make here as an ordered buffer are
4434  * correctly relogged in that transaction.  If we are in recovery context, then
4435  * just queue the modified buffer as delayed write buffer so the transaction
4436  * recovery completion writes the changes to disk.
4437  */
4438 struct xfs_btree_block_change_owner_info {
4439         uint64_t                new_owner;
4440         struct list_head        *buffer_list;
4441 };
4442
4443 static int
4444 xfs_btree_block_change_owner(
4445         struct xfs_btree_cur    *cur,
4446         int                     level,
4447         void                    *data)
4448 {
4449         struct xfs_btree_block_change_owner_info        *bbcoi = data;
4450         struct xfs_btree_block  *block;
4451         struct xfs_buf          *bp;
4452
4453         /* modify the owner */
4454         block = xfs_btree_get_block(cur, level, &bp);
4455         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4456                 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
4457                         return 0;
4458                 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
4459         } else {
4460                 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
4461                         return 0;
4462                 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
4463         }
4464
4465         /*
4466          * If the block is a root block hosted in an inode, we might not have a
4467          * buffer pointer here and we shouldn't attempt to log the change as the
4468          * information is already held in the inode and discarded when the root
4469          * block is formatted into the on-disk inode fork. We still change it,
4470          * though, so everything is consistent in memory.
4471          */
4472         if (!bp) {
4473                 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4474                 ASSERT(level == cur->bc_nlevels - 1);
4475                 return 0;
4476         }
4477
4478         if (cur->bc_tp) {
4479                 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
4480                         xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4481                         return -EAGAIN;
4482                 }
4483         } else {
4484                 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
4485         }
4486
4487         return 0;
4488 }
4489
4490 int
4491 xfs_btree_change_owner(
4492         struct xfs_btree_cur    *cur,
4493         uint64_t                new_owner,
4494         struct list_head        *buffer_list)
4495 {
4496         struct xfs_btree_block_change_owner_info        bbcoi;
4497
4498         bbcoi.new_owner = new_owner;
4499         bbcoi.buffer_list = buffer_list;
4500
4501         return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4502                         &bbcoi);
4503 }
4504
4505 /**
4506  * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4507  *                                    btree block
4508  *
4509  * @bp: buffer containing the btree block
4510  * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
4511  * @pag_max_level: pointer to the per-ag max level field
4512  */
4513 bool
4514 xfs_btree_sblock_v5hdr_verify(
4515         struct xfs_buf          *bp)
4516 {
4517         struct xfs_mount        *mp = bp->b_target->bt_mount;
4518         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4519         struct xfs_perag        *pag = bp->b_pag;
4520
4521         if (!xfs_sb_version_hascrc(&mp->m_sb))
4522                 return false;
4523         if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4524                 return false;
4525         if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
4526                 return false;
4527         if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4528                 return false;
4529         return true;
4530 }
4531
4532 /**
4533  * xfs_btree_sblock_verify() -- verify a short-format btree block
4534  *
4535  * @bp: buffer containing the btree block
4536  * @max_recs: maximum records allowed in this btree node
4537  */
4538 bool
4539 xfs_btree_sblock_verify(
4540         struct xfs_buf          *bp,
4541         unsigned int            max_recs)
4542 {
4543         struct xfs_mount        *mp = bp->b_target->bt_mount;
4544         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4545
4546         /* numrecs verification */
4547         if (be16_to_cpu(block->bb_numrecs) > max_recs)
4548                 return false;
4549
4550         /* sibling pointer verification */
4551         if (!block->bb_u.s.bb_leftsib ||
4552             (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
4553              block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
4554                 return false;
4555         if (!block->bb_u.s.bb_rightsib ||
4556             (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
4557              block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
4558                 return false;
4559
4560         return true;
4561 }
4562
4563 /*
4564  * Calculate the number of btree levels needed to store a given number of
4565  * records in a short-format btree.
4566  */
4567 uint
4568 xfs_btree_compute_maxlevels(
4569         struct xfs_mount        *mp,
4570         uint                    *limits,
4571         unsigned long           len)
4572 {
4573         uint                    level;
4574         unsigned long           maxblocks;
4575
4576         maxblocks = (len + limits[0] - 1) / limits[0];
4577         for (level = 1; maxblocks > 1; level++)
4578                 maxblocks = (maxblocks + limits[1] - 1) / limits[1];
4579         return level;
4580 }
4581
4582 /*
4583  * Query a regular btree for all records overlapping a given interval.
4584  * Start with a LE lookup of the key of low_rec and return all records
4585  * until we find a record with a key greater than the key of high_rec.
4586  */
4587 STATIC int
4588 xfs_btree_simple_query_range(
4589         struct xfs_btree_cur            *cur,
4590         union xfs_btree_key             *low_key,
4591         union xfs_btree_key             *high_key,
4592         xfs_btree_query_range_fn        fn,
4593         void                            *priv)
4594 {
4595         union xfs_btree_rec             *recp;
4596         union xfs_btree_key             rec_key;
4597         int64_t                         diff;
4598         int                             stat;
4599         bool                            firstrec = true;
4600         int                             error;
4601
4602         ASSERT(cur->bc_ops->init_high_key_from_rec);
4603         ASSERT(cur->bc_ops->diff_two_keys);
4604
4605         /*
4606          * Find the leftmost record.  The btree cursor must be set
4607          * to the low record used to generate low_key.
4608          */
4609         stat = 0;
4610         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
4611         if (error)
4612                 goto out;
4613
4614         /* Nothing?  See if there's anything to the right. */
4615         if (!stat) {
4616                 error = xfs_btree_increment(cur, 0, &stat);
4617                 if (error)
4618                         goto out;
4619         }
4620
4621         while (stat) {
4622                 /* Find the record. */
4623                 error = xfs_btree_get_rec(cur, &recp, &stat);
4624                 if (error || !stat)
4625                         break;
4626
4627                 /* Skip if high_key(rec) < low_key. */
4628                 if (firstrec) {
4629                         cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
4630                         firstrec = false;
4631                         diff = cur->bc_ops->diff_two_keys(cur, low_key,
4632                                         &rec_key);
4633                         if (diff > 0)
4634                                 goto advloop;
4635                 }
4636
4637                 /* Stop if high_key < low_key(rec). */
4638                 cur->bc_ops->init_key_from_rec(&rec_key, recp);
4639                 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
4640                 if (diff > 0)
4641                         break;
4642
4643                 /* Callback */
4644                 error = fn(cur, recp, priv);
4645                 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
4646                         break;
4647
4648 advloop:
4649                 /* Move on to the next record. */
4650                 error = xfs_btree_increment(cur, 0, &stat);
4651                 if (error)
4652                         break;
4653         }
4654
4655 out:
4656         return error;
4657 }
4658
4659 /*
4660  * Query an overlapped interval btree for all records overlapping a given
4661  * interval.  This function roughly follows the algorithm given in
4662  * "Interval Trees" of _Introduction to Algorithms_, which is section
4663  * 14.3 in the 2nd and 3rd editions.
4664  *
4665  * First, generate keys for the low and high records passed in.
4666  *
4667  * For any leaf node, generate the high and low keys for the record.
4668  * If the record keys overlap with the query low/high keys, pass the
4669  * record to the function iterator.
4670  *
4671  * For any internal node, compare the low and high keys of each
4672  * pointer against the query low/high keys.  If there's an overlap,
4673  * follow the pointer.
4674  *
4675  * As an optimization, we stop scanning a block when we find a low key
4676  * that is greater than the query's high key.
4677  */
4678 STATIC int
4679 xfs_btree_overlapped_query_range(
4680         struct xfs_btree_cur            *cur,
4681         union xfs_btree_key             *low_key,
4682         union xfs_btree_key             *high_key,
4683         xfs_btree_query_range_fn        fn,
4684         void                            *priv)
4685 {
4686         union xfs_btree_ptr             ptr;
4687         union xfs_btree_ptr             *pp;
4688         union xfs_btree_key             rec_key;
4689         union xfs_btree_key             rec_hkey;
4690         union xfs_btree_key             *lkp;
4691         union xfs_btree_key             *hkp;
4692         union xfs_btree_rec             *recp;
4693         struct xfs_btree_block          *block;
4694         int64_t                         ldiff;
4695         int64_t                         hdiff;
4696         int                             level;
4697         struct xfs_buf                  *bp;
4698         int                             i;
4699         int                             error;
4700
4701         /* Load the root of the btree. */
4702         level = cur->bc_nlevels - 1;
4703         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
4704         error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
4705         if (error)
4706                 return error;
4707         xfs_btree_get_block(cur, level, &bp);
4708         trace_xfs_btree_overlapped_query_range(cur, level, bp);
4709 #ifdef DEBUG
4710         error = xfs_btree_check_block(cur, block, level, bp);
4711         if (error)
4712                 goto out;
4713 #endif
4714         cur->bc_ptrs[level] = 1;
4715
4716         while (level < cur->bc_nlevels) {
4717                 block = xfs_btree_get_block(cur, level, &bp);
4718
4719                 /* End of node, pop back towards the root. */
4720                 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
4721 pop_up:
4722                         if (level < cur->bc_nlevels - 1)
4723                                 cur->bc_ptrs[level + 1]++;
4724                         level++;
4725                         continue;
4726                 }
4727
4728                 if (level == 0) {
4729                         /* Handle a leaf node. */
4730                         recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
4731
4732                         cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
4733                         ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
4734                                         low_key);
4735
4736                         cur->bc_ops->init_key_from_rec(&rec_key, recp);
4737                         hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
4738                                         &rec_key);
4739
4740                         /*
4741                          * If (record's high key >= query's low key) and
4742                          *    (query's high key >= record's low key), then
4743                          * this record overlaps the query range; callback.
4744                          */
4745                         if (ldiff >= 0 && hdiff >= 0) {
4746                                 error = fn(cur, recp, priv);
4747                                 if (error < 0 ||
4748                                     error == XFS_BTREE_QUERY_RANGE_ABORT)
4749                                         break;
4750                         } else if (hdiff < 0) {
4751                                 /* Record is larger than high key; pop. */
4752                                 goto pop_up;
4753                         }
4754                         cur->bc_ptrs[level]++;
4755                         continue;
4756                 }
4757
4758                 /* Handle an internal node. */
4759                 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
4760                 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
4761                 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
4762
4763                 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
4764                 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
4765
4766                 /*
4767                  * If (pointer's high key >= query's low key) and
4768                  *    (query's high key >= pointer's low key), then
4769                  * this record overlaps the query range; follow pointer.
4770                  */
4771                 if (ldiff >= 0 && hdiff >= 0) {
4772                         level--;
4773                         error = xfs_btree_lookup_get_block(cur, level, pp,
4774                                         &block);
4775                         if (error)
4776                                 goto out;
4777                         xfs_btree_get_block(cur, level, &bp);
4778                         trace_xfs_btree_overlapped_query_range(cur, level, bp);
4779 #ifdef DEBUG
4780                         error = xfs_btree_check_block(cur, block, level, bp);
4781                         if (error)
4782                                 goto out;
4783 #endif
4784                         cur->bc_ptrs[level] = 1;
4785                         continue;
4786                 } else if (hdiff < 0) {
4787                         /* The low key is larger than the upper range; pop. */
4788                         goto pop_up;
4789                 }
4790                 cur->bc_ptrs[level]++;
4791         }
4792
4793 out:
4794         /*
4795          * If we don't end this function with the cursor pointing at a record
4796          * block, a subsequent non-error cursor deletion will not release
4797          * node-level buffers, causing a buffer leak.  This is quite possible
4798          * with a zero-results range query, so release the buffers if we
4799          * failed to return any results.
4800          */
4801         if (cur->bc_bufs[0] == NULL) {
4802                 for (i = 0; i < cur->bc_nlevels; i++) {
4803                         if (cur->bc_bufs[i]) {
4804                                 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
4805                                 cur->bc_bufs[i] = NULL;
4806                                 cur->bc_ptrs[i] = 0;
4807                                 cur->bc_ra[i] = 0;
4808                         }
4809                 }
4810         }
4811
4812         return error;
4813 }
4814
4815 /*
4816  * Query a btree for all records overlapping a given interval of keys.  The
4817  * supplied function will be called with each record found; return one of the
4818  * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
4819  * code.  This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
4820  * negative error code.
4821  */
4822 int
4823 xfs_btree_query_range(
4824         struct xfs_btree_cur            *cur,
4825         union xfs_btree_irec            *low_rec,
4826         union xfs_btree_irec            *high_rec,
4827         xfs_btree_query_range_fn        fn,
4828         void                            *priv)
4829 {
4830         union xfs_btree_rec             rec;
4831         union xfs_btree_key             low_key;
4832         union xfs_btree_key             high_key;
4833
4834         /* Find the keys of both ends of the interval. */
4835         cur->bc_rec = *high_rec;
4836         cur->bc_ops->init_rec_from_cur(cur, &rec);
4837         cur->bc_ops->init_key_from_rec(&high_key, &rec);
4838
4839         cur->bc_rec = *low_rec;
4840         cur->bc_ops->init_rec_from_cur(cur, &rec);
4841         cur->bc_ops->init_key_from_rec(&low_key, &rec);
4842
4843         /* Enforce low key < high key. */
4844         if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
4845                 return -EINVAL;
4846
4847         if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
4848                 return xfs_btree_simple_query_range(cur, &low_key,
4849                                 &high_key, fn, priv);
4850         return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
4851                         fn, priv);
4852 }
4853
4854 /* Query a btree for all records. */
4855 int
4856 xfs_btree_query_all(
4857         struct xfs_btree_cur            *cur,
4858         xfs_btree_query_range_fn        fn,
4859         void                            *priv)
4860 {
4861         union xfs_btree_key             low_key;
4862         union xfs_btree_key             high_key;
4863
4864         memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
4865         memset(&low_key, 0, sizeof(low_key));
4866         memset(&high_key, 0xFF, sizeof(high_key));
4867
4868         return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
4869 }
4870
4871 /*
4872  * Calculate the number of blocks needed to store a given number of records
4873  * in a short-format (per-AG metadata) btree.
4874  */
4875 xfs_extlen_t
4876 xfs_btree_calc_size(
4877         struct xfs_mount        *mp,
4878         uint                    *limits,
4879         unsigned long long      len)
4880 {
4881         int                     level;
4882         int                     maxrecs;
4883         xfs_extlen_t            rval;
4884
4885         maxrecs = limits[0];
4886         for (level = 0, rval = 0; len > 1; level++) {
4887                 len += maxrecs - 1;
4888                 do_div(len, maxrecs);
4889                 maxrecs = limits[1];
4890                 rval += len;
4891         }
4892         return rval;
4893 }
4894
4895 static int
4896 xfs_btree_count_blocks_helper(
4897         struct xfs_btree_cur    *cur,
4898         int                     level,
4899         void                    *data)
4900 {
4901         xfs_extlen_t            *blocks = data;
4902         (*blocks)++;
4903
4904         return 0;
4905 }
4906
4907 /* Count the blocks in a btree and return the result in *blocks. */
4908 int
4909 xfs_btree_count_blocks(
4910         struct xfs_btree_cur    *cur,
4911         xfs_extlen_t            *blocks)
4912 {
4913         *blocks = 0;
4914         return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
4915                         blocks);
4916 }