GNU Linux-libre 6.9-gnu
[releases.git] / fs / xfs / libxfs / xfs_da_btree.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4  * Copyright (c) 2013 Red Hat, Inc.
5  * All Rights Reserved.
6  */
7 #include "xfs.h"
8 #include "xfs_fs.h"
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_bit.h"
14 #include "xfs_mount.h"
15 #include "xfs_inode.h"
16 #include "xfs_dir2.h"
17 #include "xfs_dir2_priv.h"
18 #include "xfs_trans.h"
19 #include "xfs_bmap.h"
20 #include "xfs_attr_leaf.h"
21 #include "xfs_error.h"
22 #include "xfs_trace.h"
23 #include "xfs_buf_item.h"
24 #include "xfs_log.h"
25 #include "xfs_errortag.h"
26 #include "xfs_health.h"
27
28 /*
29  * xfs_da_btree.c
30  *
31  * Routines to implement directories as Btrees of hashed names.
32  */
33
34 /*========================================================================
35  * Function prototypes for the kernel.
36  *========================================================================*/
37
38 /*
39  * Routines used for growing the Btree.
40  */
41 STATIC int xfs_da3_root_split(xfs_da_state_t *state,
42                                             xfs_da_state_blk_t *existing_root,
43                                             xfs_da_state_blk_t *new_child);
44 STATIC int xfs_da3_node_split(xfs_da_state_t *state,
45                                             xfs_da_state_blk_t *existing_blk,
46                                             xfs_da_state_blk_t *split_blk,
47                                             xfs_da_state_blk_t *blk_to_add,
48                                             int treelevel,
49                                             int *result);
50 STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
51                                          xfs_da_state_blk_t *node_blk_1,
52                                          xfs_da_state_blk_t *node_blk_2);
53 STATIC void xfs_da3_node_add(xfs_da_state_t *state,
54                                    xfs_da_state_blk_t *old_node_blk,
55                                    xfs_da_state_blk_t *new_node_blk);
56
57 /*
58  * Routines used for shrinking the Btree.
59  */
60 STATIC int xfs_da3_root_join(xfs_da_state_t *state,
61                                            xfs_da_state_blk_t *root_blk);
62 STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
63 STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
64                                               xfs_da_state_blk_t *drop_blk);
65 STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
66                                          xfs_da_state_blk_t *src_node_blk,
67                                          xfs_da_state_blk_t *dst_node_blk);
68
69 /*
70  * Utility routines.
71  */
72 STATIC int      xfs_da3_blk_unlink(xfs_da_state_t *state,
73                                   xfs_da_state_blk_t *drop_blk,
74                                   xfs_da_state_blk_t *save_blk);
75
76
77 struct kmem_cache       *xfs_da_state_cache;    /* anchor for dir/attr state */
78
79 /*
80  * Allocate a dir-state structure.
81  * We don't put them on the stack since they're large.
82  */
83 struct xfs_da_state *
84 xfs_da_state_alloc(
85         struct xfs_da_args      *args)
86 {
87         struct xfs_da_state     *state;
88
89         state = kmem_cache_zalloc(xfs_da_state_cache,
90                         GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
91         state->args = args;
92         state->mp = args->dp->i_mount;
93         return state;
94 }
95
96 /*
97  * Kill the altpath contents of a da-state structure.
98  */
99 STATIC void
100 xfs_da_state_kill_altpath(xfs_da_state_t *state)
101 {
102         int     i;
103
104         for (i = 0; i < state->altpath.active; i++)
105                 state->altpath.blk[i].bp = NULL;
106         state->altpath.active = 0;
107 }
108
109 /*
110  * Free a da-state structure.
111  */
112 void
113 xfs_da_state_free(xfs_da_state_t *state)
114 {
115         xfs_da_state_kill_altpath(state);
116 #ifdef DEBUG
117         memset((char *)state, 0, sizeof(*state));
118 #endif /* DEBUG */
119         kmem_cache_free(xfs_da_state_cache, state);
120 }
121
122 void
123 xfs_da_state_reset(
124         struct xfs_da_state     *state,
125         struct xfs_da_args      *args)
126 {
127         xfs_da_state_kill_altpath(state);
128         memset(state, 0, sizeof(struct xfs_da_state));
129         state->args = args;
130         state->mp = state->args->dp->i_mount;
131 }
132
133 static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork)
134 {
135         if (whichfork == XFS_DATA_FORK)
136                 return mp->m_dir_geo->fsbcount;
137         return mp->m_attr_geo->fsbcount;
138 }
139
140 void
141 xfs_da3_node_hdr_from_disk(
142         struct xfs_mount                *mp,
143         struct xfs_da3_icnode_hdr       *to,
144         struct xfs_da_intnode           *from)
145 {
146         if (xfs_has_crc(mp)) {
147                 struct xfs_da3_intnode  *from3 = (struct xfs_da3_intnode *)from;
148
149                 to->forw = be32_to_cpu(from3->hdr.info.hdr.forw);
150                 to->back = be32_to_cpu(from3->hdr.info.hdr.back);
151                 to->magic = be16_to_cpu(from3->hdr.info.hdr.magic);
152                 to->count = be16_to_cpu(from3->hdr.__count);
153                 to->level = be16_to_cpu(from3->hdr.__level);
154                 to->btree = from3->__btree;
155                 ASSERT(to->magic == XFS_DA3_NODE_MAGIC);
156         } else {
157                 to->forw = be32_to_cpu(from->hdr.info.forw);
158                 to->back = be32_to_cpu(from->hdr.info.back);
159                 to->magic = be16_to_cpu(from->hdr.info.magic);
160                 to->count = be16_to_cpu(from->hdr.__count);
161                 to->level = be16_to_cpu(from->hdr.__level);
162                 to->btree = from->__btree;
163                 ASSERT(to->magic == XFS_DA_NODE_MAGIC);
164         }
165 }
166
167 void
168 xfs_da3_node_hdr_to_disk(
169         struct xfs_mount                *mp,
170         struct xfs_da_intnode           *to,
171         struct xfs_da3_icnode_hdr       *from)
172 {
173         if (xfs_has_crc(mp)) {
174                 struct xfs_da3_intnode  *to3 = (struct xfs_da3_intnode *)to;
175
176                 ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
177                 to3->hdr.info.hdr.forw = cpu_to_be32(from->forw);
178                 to3->hdr.info.hdr.back = cpu_to_be32(from->back);
179                 to3->hdr.info.hdr.magic = cpu_to_be16(from->magic);
180                 to3->hdr.__count = cpu_to_be16(from->count);
181                 to3->hdr.__level = cpu_to_be16(from->level);
182         } else {
183                 ASSERT(from->magic == XFS_DA_NODE_MAGIC);
184                 to->hdr.info.forw = cpu_to_be32(from->forw);
185                 to->hdr.info.back = cpu_to_be32(from->back);
186                 to->hdr.info.magic = cpu_to_be16(from->magic);
187                 to->hdr.__count = cpu_to_be16(from->count);
188                 to->hdr.__level = cpu_to_be16(from->level);
189         }
190 }
191
192 /*
193  * Verify an xfs_da3_blkinfo structure. Note that the da3 fields are only
194  * accessible on v5 filesystems. This header format is common across da node,
195  * attr leaf and dir leaf blocks.
196  */
197 xfs_failaddr_t
198 xfs_da3_blkinfo_verify(
199         struct xfs_buf          *bp,
200         struct xfs_da3_blkinfo  *hdr3)
201 {
202         struct xfs_mount        *mp = bp->b_mount;
203         struct xfs_da_blkinfo   *hdr = &hdr3->hdr;
204
205         if (!xfs_verify_magic16(bp, hdr->magic))
206                 return __this_address;
207
208         if (xfs_has_crc(mp)) {
209                 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
210                         return __this_address;
211                 if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp))
212                         return __this_address;
213                 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
214                         return __this_address;
215         }
216
217         return NULL;
218 }
219
220 static xfs_failaddr_t
221 xfs_da3_node_verify(
222         struct xfs_buf          *bp)
223 {
224         struct xfs_mount        *mp = bp->b_mount;
225         struct xfs_da_intnode   *hdr = bp->b_addr;
226         struct xfs_da3_icnode_hdr ichdr;
227         xfs_failaddr_t          fa;
228
229         xfs_da3_node_hdr_from_disk(mp, &ichdr, hdr);
230
231         fa = xfs_da3_blkinfo_verify(bp, bp->b_addr);
232         if (fa)
233                 return fa;
234
235         if (ichdr.level == 0)
236                 return __this_address;
237         if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
238                 return __this_address;
239         if (ichdr.count == 0)
240                 return __this_address;
241
242         /*
243          * we don't know if the node is for and attribute or directory tree,
244          * so only fail if the count is outside both bounds
245          */
246         if (ichdr.count > mp->m_dir_geo->node_ents &&
247             ichdr.count > mp->m_attr_geo->node_ents)
248                 return __this_address;
249
250         /* XXX: hash order check? */
251
252         return NULL;
253 }
254
255 static void
256 xfs_da3_node_write_verify(
257         struct xfs_buf  *bp)
258 {
259         struct xfs_mount        *mp = bp->b_mount;
260         struct xfs_buf_log_item *bip = bp->b_log_item;
261         struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
262         xfs_failaddr_t          fa;
263
264         fa = xfs_da3_node_verify(bp);
265         if (fa) {
266                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
267                 return;
268         }
269
270         if (!xfs_has_crc(mp))
271                 return;
272
273         if (bip)
274                 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
275
276         xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
277 }
278
279 /*
280  * leaf/node format detection on trees is sketchy, so a node read can be done on
281  * leaf level blocks when detection identifies the tree as a node format tree
282  * incorrectly. In this case, we need to swap the verifier to match the correct
283  * format of the block being read.
284  */
285 static void
286 xfs_da3_node_read_verify(
287         struct xfs_buf          *bp)
288 {
289         struct xfs_da_blkinfo   *info = bp->b_addr;
290         xfs_failaddr_t          fa;
291
292         switch (be16_to_cpu(info->magic)) {
293                 case XFS_DA3_NODE_MAGIC:
294                         if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
295                                 xfs_verifier_error(bp, -EFSBADCRC,
296                                                 __this_address);
297                                 break;
298                         }
299                         fallthrough;
300                 case XFS_DA_NODE_MAGIC:
301                         fa = xfs_da3_node_verify(bp);
302                         if (fa)
303                                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
304                         return;
305                 case XFS_ATTR_LEAF_MAGIC:
306                 case XFS_ATTR3_LEAF_MAGIC:
307                         bp->b_ops = &xfs_attr3_leaf_buf_ops;
308                         bp->b_ops->verify_read(bp);
309                         return;
310                 case XFS_DIR2_LEAFN_MAGIC:
311                 case XFS_DIR3_LEAFN_MAGIC:
312                         bp->b_ops = &xfs_dir3_leafn_buf_ops;
313                         bp->b_ops->verify_read(bp);
314                         return;
315                 default:
316                         xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
317                         break;
318         }
319 }
320
321 /* Verify the structure of a da3 block. */
322 static xfs_failaddr_t
323 xfs_da3_node_verify_struct(
324         struct xfs_buf          *bp)
325 {
326         struct xfs_da_blkinfo   *info = bp->b_addr;
327
328         switch (be16_to_cpu(info->magic)) {
329         case XFS_DA3_NODE_MAGIC:
330         case XFS_DA_NODE_MAGIC:
331                 return xfs_da3_node_verify(bp);
332         case XFS_ATTR_LEAF_MAGIC:
333         case XFS_ATTR3_LEAF_MAGIC:
334                 bp->b_ops = &xfs_attr3_leaf_buf_ops;
335                 return bp->b_ops->verify_struct(bp);
336         case XFS_DIR2_LEAFN_MAGIC:
337         case XFS_DIR3_LEAFN_MAGIC:
338                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
339                 return bp->b_ops->verify_struct(bp);
340         default:
341                 return __this_address;
342         }
343 }
344
345 const struct xfs_buf_ops xfs_da3_node_buf_ops = {
346         .name = "xfs_da3_node",
347         .magic16 = { cpu_to_be16(XFS_DA_NODE_MAGIC),
348                      cpu_to_be16(XFS_DA3_NODE_MAGIC) },
349         .verify_read = xfs_da3_node_read_verify,
350         .verify_write = xfs_da3_node_write_verify,
351         .verify_struct = xfs_da3_node_verify_struct,
352 };
353
354 static int
355 xfs_da3_node_set_type(
356         struct xfs_trans        *tp,
357         struct xfs_inode        *dp,
358         int                     whichfork,
359         struct xfs_buf          *bp)
360 {
361         struct xfs_da_blkinfo   *info = bp->b_addr;
362
363         switch (be16_to_cpu(info->magic)) {
364         case XFS_DA_NODE_MAGIC:
365         case XFS_DA3_NODE_MAGIC:
366                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
367                 return 0;
368         case XFS_ATTR_LEAF_MAGIC:
369         case XFS_ATTR3_LEAF_MAGIC:
370                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_ATTR_LEAF_BUF);
371                 return 0;
372         case XFS_DIR2_LEAFN_MAGIC:
373         case XFS_DIR3_LEAFN_MAGIC:
374                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
375                 return 0;
376         default:
377                 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp,
378                                 info, sizeof(*info));
379                 xfs_trans_brelse(tp, bp);
380                 xfs_dirattr_mark_sick(dp, whichfork);
381                 return -EFSCORRUPTED;
382         }
383 }
384
385 int
386 xfs_da3_node_read(
387         struct xfs_trans        *tp,
388         struct xfs_inode        *dp,
389         xfs_dablk_t             bno,
390         struct xfs_buf          **bpp,
391         int                     whichfork)
392 {
393         int                     error;
394
395         error = xfs_da_read_buf(tp, dp, bno, 0, bpp, whichfork,
396                         &xfs_da3_node_buf_ops);
397         if (error || !*bpp || !tp)
398                 return error;
399         return xfs_da3_node_set_type(tp, dp, whichfork, *bpp);
400 }
401
402 int
403 xfs_da3_node_read_mapped(
404         struct xfs_trans        *tp,
405         struct xfs_inode        *dp,
406         xfs_daddr_t             mappedbno,
407         struct xfs_buf          **bpp,
408         int                     whichfork)
409 {
410         struct xfs_mount        *mp = dp->i_mount;
411         int                     error;
412
413         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno,
414                         XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0,
415                         bpp, &xfs_da3_node_buf_ops);
416         if (xfs_metadata_is_sick(error))
417                 xfs_dirattr_mark_sick(dp, whichfork);
418         if (error || !*bpp)
419                 return error;
420
421         if (whichfork == XFS_ATTR_FORK)
422                 xfs_buf_set_ref(*bpp, XFS_ATTR_BTREE_REF);
423         else
424                 xfs_buf_set_ref(*bpp, XFS_DIR_BTREE_REF);
425
426         if (!tp)
427                 return 0;
428         return xfs_da3_node_set_type(tp, dp, whichfork, *bpp);
429 }
430
431 /*
432  * Copy src directory/attr leaf/node buffer to the dst.
433  * For v5 file systems make sure the right blkno is stamped in.
434  */
435 void
436 xfs_da_buf_copy(
437         struct xfs_buf *dst,
438         struct xfs_buf *src,
439         size_t size)
440 {
441         struct xfs_da3_blkinfo *da3 = dst->b_addr;
442
443         memcpy(dst->b_addr, src->b_addr, size);
444         dst->b_ops = src->b_ops;
445         xfs_trans_buf_copy_type(dst, src);
446         if (xfs_has_crc(dst->b_mount))
447                 da3->blkno = cpu_to_be64(xfs_buf_daddr(dst));
448 }
449
450 /*========================================================================
451  * Routines used for growing the Btree.
452  *========================================================================*/
453
454 /*
455  * Create the initial contents of an intermediate node.
456  */
457 int
458 xfs_da3_node_create(
459         struct xfs_da_args      *args,
460         xfs_dablk_t             blkno,
461         int                     level,
462         struct xfs_buf          **bpp,
463         int                     whichfork)
464 {
465         struct xfs_da_intnode   *node;
466         struct xfs_trans        *tp = args->trans;
467         struct xfs_mount        *mp = tp->t_mountp;
468         struct xfs_da3_icnode_hdr ichdr = {0};
469         struct xfs_buf          *bp;
470         int                     error;
471         struct xfs_inode        *dp = args->dp;
472
473         trace_xfs_da_node_create(args);
474         ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
475
476         error = xfs_da_get_buf(tp, dp, blkno, &bp, whichfork);
477         if (error)
478                 return error;
479         bp->b_ops = &xfs_da3_node_buf_ops;
480         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
481         node = bp->b_addr;
482
483         if (xfs_has_crc(mp)) {
484                 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
485
486                 memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
487                 ichdr.magic = XFS_DA3_NODE_MAGIC;
488                 hdr3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
489                 hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
490                 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
491         } else {
492                 ichdr.magic = XFS_DA_NODE_MAGIC;
493         }
494         ichdr.level = level;
495
496         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &ichdr);
497         xfs_trans_log_buf(tp, bp,
498                 XFS_DA_LOGRANGE(node, &node->hdr, args->geo->node_hdr_size));
499
500         *bpp = bp;
501         return 0;
502 }
503
504 /*
505  * Split a leaf node, rebalance, then possibly split
506  * intermediate nodes, rebalance, etc.
507  */
508 int                                                     /* error */
509 xfs_da3_split(
510         struct xfs_da_state     *state)
511 {
512         struct xfs_da_state_blk *oldblk;
513         struct xfs_da_state_blk *newblk;
514         struct xfs_da_state_blk *addblk;
515         struct xfs_da_intnode   *node;
516         int                     max;
517         int                     action = 0;
518         int                     error;
519         int                     i;
520
521         trace_xfs_da_split(state->args);
522
523         if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
524                 return -EIO;
525
526         /*
527          * Walk back up the tree splitting/inserting/adjusting as necessary.
528          * If we need to insert and there isn't room, split the node, then
529          * decide which fragment to insert the new block from below into.
530          * Note that we may split the root this way, but we need more fixup.
531          */
532         max = state->path.active - 1;
533         ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
534         ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
535                state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
536
537         addblk = &state->path.blk[max];         /* initial dummy value */
538         for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
539                 oldblk = &state->path.blk[i];
540                 newblk = &state->altpath.blk[i];
541
542                 /*
543                  * If a leaf node then
544                  *     Allocate a new leaf node, then rebalance across them.
545                  * else if an intermediate node then
546                  *     We split on the last layer, must we split the node?
547                  */
548                 switch (oldblk->magic) {
549                 case XFS_ATTR_LEAF_MAGIC:
550                         error = xfs_attr3_leaf_split(state, oldblk, newblk);
551                         if ((error != 0) && (error != -ENOSPC)) {
552                                 return error;   /* GROT: attr is inconsistent */
553                         }
554                         if (!error) {
555                                 addblk = newblk;
556                                 break;
557                         }
558                         /*
559                          * Entry wouldn't fit, split the leaf again. The new
560                          * extrablk will be consumed by xfs_da3_node_split if
561                          * the node is split.
562                          */
563                         state->extravalid = 1;
564                         if (state->inleaf) {
565                                 state->extraafter = 0;  /* before newblk */
566                                 trace_xfs_attr_leaf_split_before(state->args);
567                                 error = xfs_attr3_leaf_split(state, oldblk,
568                                                             &state->extrablk);
569                         } else {
570                                 state->extraafter = 1;  /* after newblk */
571                                 trace_xfs_attr_leaf_split_after(state->args);
572                                 error = xfs_attr3_leaf_split(state, newblk,
573                                                             &state->extrablk);
574                         }
575                         if (error)
576                                 return error;   /* GROT: attr inconsistent */
577                         addblk = newblk;
578                         break;
579                 case XFS_DIR2_LEAFN_MAGIC:
580                         error = xfs_dir2_leafn_split(state, oldblk, newblk);
581                         if (error)
582                                 return error;
583                         addblk = newblk;
584                         break;
585                 case XFS_DA_NODE_MAGIC:
586                         error = xfs_da3_node_split(state, oldblk, newblk, addblk,
587                                                          max - i, &action);
588                         addblk->bp = NULL;
589                         if (error)
590                                 return error;   /* GROT: dir is inconsistent */
591                         /*
592                          * Record the newly split block for the next time thru?
593                          */
594                         if (action)
595                                 addblk = newblk;
596                         else
597                                 addblk = NULL;
598                         break;
599                 }
600
601                 /*
602                  * Update the btree to show the new hashval for this child.
603                  */
604                 xfs_da3_fixhashpath(state, &state->path);
605         }
606         if (!addblk)
607                 return 0;
608
609         /*
610          * xfs_da3_node_split() should have consumed any extra blocks we added
611          * during a double leaf split in the attr fork. This is guaranteed as
612          * we can't be here if the attr fork only has a single leaf block.
613          */
614         ASSERT(state->extravalid == 0 ||
615                state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
616
617         /*
618          * Split the root node.
619          */
620         ASSERT(state->path.active == 0);
621         oldblk = &state->path.blk[0];
622         error = xfs_da3_root_split(state, oldblk, addblk);
623         if (error)
624                 goto out;
625
626         /*
627          * Update pointers to the node which used to be block 0 and just got
628          * bumped because of the addition of a new root node.  Note that the
629          * original block 0 could be at any position in the list of blocks in
630          * the tree.
631          *
632          * Note: the magic numbers and sibling pointers are in the same physical
633          * place for both v2 and v3 headers (by design). Hence it doesn't matter
634          * which version of the xfs_da_intnode structure we use here as the
635          * result will be the same using either structure.
636          */
637         node = oldblk->bp->b_addr;
638         if (node->hdr.info.forw) {
639                 if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
640                         xfs_buf_mark_corrupt(oldblk->bp);
641                         xfs_da_mark_sick(state->args);
642                         error = -EFSCORRUPTED;
643                         goto out;
644                 }
645                 node = addblk->bp->b_addr;
646                 node->hdr.info.back = cpu_to_be32(oldblk->blkno);
647                 xfs_trans_log_buf(state->args->trans, addblk->bp,
648                                   XFS_DA_LOGRANGE(node, &node->hdr.info,
649                                   sizeof(node->hdr.info)));
650         }
651         node = oldblk->bp->b_addr;
652         if (node->hdr.info.back) {
653                 if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
654                         xfs_buf_mark_corrupt(oldblk->bp);
655                         xfs_da_mark_sick(state->args);
656                         error = -EFSCORRUPTED;
657                         goto out;
658                 }
659                 node = addblk->bp->b_addr;
660                 node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
661                 xfs_trans_log_buf(state->args->trans, addblk->bp,
662                                   XFS_DA_LOGRANGE(node, &node->hdr.info,
663                                   sizeof(node->hdr.info)));
664         }
665 out:
666         addblk->bp = NULL;
667         return error;
668 }
669
670 /*
671  * Split the root.  We have to create a new root and point to the two
672  * parts (the split old root) that we just created.  Copy block zero to
673  * the EOF, extending the inode in process.
674  */
675 STATIC int                                              /* error */
676 xfs_da3_root_split(
677         struct xfs_da_state     *state,
678         struct xfs_da_state_blk *blk1,
679         struct xfs_da_state_blk *blk2)
680 {
681         struct xfs_da_intnode   *node;
682         struct xfs_da_intnode   *oldroot;
683         struct xfs_da_node_entry *btree;
684         struct xfs_da3_icnode_hdr nodehdr;
685         struct xfs_da_args      *args;
686         struct xfs_buf          *bp;
687         struct xfs_inode        *dp;
688         struct xfs_trans        *tp;
689         struct xfs_dir2_leaf    *leaf;
690         xfs_dablk_t             blkno;
691         int                     level;
692         int                     error;
693         int                     size;
694
695         trace_xfs_da_root_split(state->args);
696
697         /*
698          * Copy the existing (incorrect) block from the root node position
699          * to a free space somewhere.
700          */
701         args = state->args;
702         error = xfs_da_grow_inode(args, &blkno);
703         if (error)
704                 return error;
705
706         dp = args->dp;
707         tp = args->trans;
708         error = xfs_da_get_buf(tp, dp, blkno, &bp, args->whichfork);
709         if (error)
710                 return error;
711         node = bp->b_addr;
712         oldroot = blk1->bp->b_addr;
713         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
714             oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
715                 struct xfs_da3_icnode_hdr icnodehdr;
716
717                 xfs_da3_node_hdr_from_disk(dp->i_mount, &icnodehdr, oldroot);
718                 btree = icnodehdr.btree;
719                 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
720                 level = icnodehdr.level;
721         } else {
722                 struct xfs_dir3_icleaf_hdr leafhdr;
723
724                 leaf = (xfs_dir2_leaf_t *)oldroot;
725                 xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
726
727                 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
728                        leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
729                 size = (int)((char *)&leafhdr.ents[leafhdr.count] -
730                         (char *)leaf);
731                 level = 0;
732         }
733
734         /*
735          * Copy old root to new buffer and log it.
736          */
737         xfs_da_buf_copy(bp, blk1->bp, size);
738         xfs_trans_log_buf(tp, bp, 0, size - 1);
739
740         /*
741          * Update blk1 to point to new buffer.
742          */
743         blk1->bp = bp;
744         blk1->blkno = blkno;
745
746         /*
747          * Set up the new root node.
748          */
749         error = xfs_da3_node_create(args,
750                 (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
751                 level + 1, &bp, args->whichfork);
752         if (error)
753                 return error;
754
755         node = bp->b_addr;
756         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
757         btree = nodehdr.btree;
758         btree[0].hashval = cpu_to_be32(blk1->hashval);
759         btree[0].before = cpu_to_be32(blk1->blkno);
760         btree[1].hashval = cpu_to_be32(blk2->hashval);
761         btree[1].before = cpu_to_be32(blk2->blkno);
762         nodehdr.count = 2;
763         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
764
765 #ifdef DEBUG
766         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
767             oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
768                 ASSERT(blk1->blkno >= args->geo->leafblk &&
769                        blk1->blkno < args->geo->freeblk);
770                 ASSERT(blk2->blkno >= args->geo->leafblk &&
771                        blk2->blkno < args->geo->freeblk);
772         }
773 #endif
774
775         /* Header is already logged by xfs_da_node_create */
776         xfs_trans_log_buf(tp, bp,
777                 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
778
779         return 0;
780 }
781
782 /*
783  * Split the node, rebalance, then add the new entry.
784  */
785 STATIC int                                              /* error */
786 xfs_da3_node_split(
787         struct xfs_da_state     *state,
788         struct xfs_da_state_blk *oldblk,
789         struct xfs_da_state_blk *newblk,
790         struct xfs_da_state_blk *addblk,
791         int                     treelevel,
792         int                     *result)
793 {
794         struct xfs_da_intnode   *node;
795         struct xfs_da3_icnode_hdr nodehdr;
796         xfs_dablk_t             blkno;
797         int                     newcount;
798         int                     error;
799         int                     useextra;
800         struct xfs_inode        *dp = state->args->dp;
801
802         trace_xfs_da_node_split(state->args);
803
804         node = oldblk->bp->b_addr;
805         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
806
807         /*
808          * With V2 dirs the extra block is data or freespace.
809          */
810         useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
811         newcount = 1 + useextra;
812         /*
813          * Do we have to split the node?
814          */
815         if (nodehdr.count + newcount > state->args->geo->node_ents) {
816                 /*
817                  * Allocate a new node, add to the doubly linked chain of
818                  * nodes, then move some of our excess entries into it.
819                  */
820                 error = xfs_da_grow_inode(state->args, &blkno);
821                 if (error)
822                         return error;   /* GROT: dir is inconsistent */
823
824                 error = xfs_da3_node_create(state->args, blkno, treelevel,
825                                            &newblk->bp, state->args->whichfork);
826                 if (error)
827                         return error;   /* GROT: dir is inconsistent */
828                 newblk->blkno = blkno;
829                 newblk->magic = XFS_DA_NODE_MAGIC;
830                 xfs_da3_node_rebalance(state, oldblk, newblk);
831                 error = xfs_da3_blk_link(state, oldblk, newblk);
832                 if (error)
833                         return error;
834                 *result = 1;
835         } else {
836                 *result = 0;
837         }
838
839         /*
840          * Insert the new entry(s) into the correct block
841          * (updating last hashval in the process).
842          *
843          * xfs_da3_node_add() inserts BEFORE the given index,
844          * and as a result of using node_lookup_int() we always
845          * point to a valid entry (not after one), but a split
846          * operation always results in a new block whose hashvals
847          * FOLLOW the current block.
848          *
849          * If we had double-split op below us, then add the extra block too.
850          */
851         node = oldblk->bp->b_addr;
852         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
853         if (oldblk->index <= nodehdr.count) {
854                 oldblk->index++;
855                 xfs_da3_node_add(state, oldblk, addblk);
856                 if (useextra) {
857                         if (state->extraafter)
858                                 oldblk->index++;
859                         xfs_da3_node_add(state, oldblk, &state->extrablk);
860                         state->extravalid = 0;
861                 }
862         } else {
863                 newblk->index++;
864                 xfs_da3_node_add(state, newblk, addblk);
865                 if (useextra) {
866                         if (state->extraafter)
867                                 newblk->index++;
868                         xfs_da3_node_add(state, newblk, &state->extrablk);
869                         state->extravalid = 0;
870                 }
871         }
872
873         return 0;
874 }
875
876 /*
877  * Balance the btree elements between two intermediate nodes,
878  * usually one full and one empty.
879  *
880  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
881  */
882 STATIC void
883 xfs_da3_node_rebalance(
884         struct xfs_da_state     *state,
885         struct xfs_da_state_blk *blk1,
886         struct xfs_da_state_blk *blk2)
887 {
888         struct xfs_da_intnode   *node1;
889         struct xfs_da_intnode   *node2;
890         struct xfs_da_node_entry *btree1;
891         struct xfs_da_node_entry *btree2;
892         struct xfs_da_node_entry *btree_s;
893         struct xfs_da_node_entry *btree_d;
894         struct xfs_da3_icnode_hdr nodehdr1;
895         struct xfs_da3_icnode_hdr nodehdr2;
896         struct xfs_trans        *tp;
897         int                     count;
898         int                     tmp;
899         int                     swap = 0;
900         struct xfs_inode        *dp = state->args->dp;
901
902         trace_xfs_da_node_rebalance(state->args);
903
904         node1 = blk1->bp->b_addr;
905         node2 = blk2->bp->b_addr;
906         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
907         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
908         btree1 = nodehdr1.btree;
909         btree2 = nodehdr2.btree;
910
911         /*
912          * Figure out how many entries need to move, and in which direction.
913          * Swap the nodes around if that makes it simpler.
914          */
915         if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
916             ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
917              (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
918                         be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
919                 swap(node1, node2);
920                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
921                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
922                 btree1 = nodehdr1.btree;
923                 btree2 = nodehdr2.btree;
924                 swap = 1;
925         }
926
927         count = (nodehdr1.count - nodehdr2.count) / 2;
928         if (count == 0)
929                 return;
930         tp = state->args->trans;
931         /*
932          * Two cases: high-to-low and low-to-high.
933          */
934         if (count > 0) {
935                 /*
936                  * Move elements in node2 up to make a hole.
937                  */
938                 tmp = nodehdr2.count;
939                 if (tmp > 0) {
940                         tmp *= (uint)sizeof(xfs_da_node_entry_t);
941                         btree_s = &btree2[0];
942                         btree_d = &btree2[count];
943                         memmove(btree_d, btree_s, tmp);
944                 }
945
946                 /*
947                  * Move the req'd B-tree elements from high in node1 to
948                  * low in node2.
949                  */
950                 nodehdr2.count += count;
951                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
952                 btree_s = &btree1[nodehdr1.count - count];
953                 btree_d = &btree2[0];
954                 memcpy(btree_d, btree_s, tmp);
955                 nodehdr1.count -= count;
956         } else {
957                 /*
958                  * Move the req'd B-tree elements from low in node2 to
959                  * high in node1.
960                  */
961                 count = -count;
962                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
963                 btree_s = &btree2[0];
964                 btree_d = &btree1[nodehdr1.count];
965                 memcpy(btree_d, btree_s, tmp);
966                 nodehdr1.count += count;
967
968                 xfs_trans_log_buf(tp, blk1->bp,
969                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
970
971                 /*
972                  * Move elements in node2 down to fill the hole.
973                  */
974                 tmp  = nodehdr2.count - count;
975                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
976                 btree_s = &btree2[count];
977                 btree_d = &btree2[0];
978                 memmove(btree_d, btree_s, tmp);
979                 nodehdr2.count -= count;
980         }
981
982         /*
983          * Log header of node 1 and all current bits of node 2.
984          */
985         xfs_da3_node_hdr_to_disk(dp->i_mount, node1, &nodehdr1);
986         xfs_trans_log_buf(tp, blk1->bp,
987                 XFS_DA_LOGRANGE(node1, &node1->hdr,
988                                 state->args->geo->node_hdr_size));
989
990         xfs_da3_node_hdr_to_disk(dp->i_mount, node2, &nodehdr2);
991         xfs_trans_log_buf(tp, blk2->bp,
992                 XFS_DA_LOGRANGE(node2, &node2->hdr,
993                                 state->args->geo->node_hdr_size +
994                                 (sizeof(btree2[0]) * nodehdr2.count)));
995
996         /*
997          * Record the last hashval from each block for upward propagation.
998          * (note: don't use the swapped node pointers)
999          */
1000         if (swap) {
1001                 node1 = blk1->bp->b_addr;
1002                 node2 = blk2->bp->b_addr;
1003                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
1004                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
1005                 btree1 = nodehdr1.btree;
1006                 btree2 = nodehdr2.btree;
1007         }
1008         blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
1009         blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
1010
1011         /*
1012          * Adjust the expected index for insertion.
1013          */
1014         if (blk1->index >= nodehdr1.count) {
1015                 blk2->index = blk1->index - nodehdr1.count;
1016                 blk1->index = nodehdr1.count + 1;       /* make it invalid */
1017         }
1018 }
1019
1020 /*
1021  * Add a new entry to an intermediate node.
1022  */
1023 STATIC void
1024 xfs_da3_node_add(
1025         struct xfs_da_state     *state,
1026         struct xfs_da_state_blk *oldblk,
1027         struct xfs_da_state_blk *newblk)
1028 {
1029         struct xfs_da_intnode   *node;
1030         struct xfs_da3_icnode_hdr nodehdr;
1031         struct xfs_da_node_entry *btree;
1032         int                     tmp;
1033         struct xfs_inode        *dp = state->args->dp;
1034
1035         trace_xfs_da_node_add(state->args);
1036
1037         node = oldblk->bp->b_addr;
1038         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1039         btree = nodehdr.btree;
1040
1041         ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
1042         ASSERT(newblk->blkno != 0);
1043         if (state->args->whichfork == XFS_DATA_FORK)
1044                 ASSERT(newblk->blkno >= state->args->geo->leafblk &&
1045                        newblk->blkno < state->args->geo->freeblk);
1046
1047         /*
1048          * We may need to make some room before we insert the new node.
1049          */
1050         tmp = 0;
1051         if (oldblk->index < nodehdr.count) {
1052                 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
1053                 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
1054         }
1055         btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
1056         btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
1057         xfs_trans_log_buf(state->args->trans, oldblk->bp,
1058                 XFS_DA_LOGRANGE(node, &btree[oldblk->index],
1059                                 tmp + sizeof(*btree)));
1060
1061         nodehdr.count += 1;
1062         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
1063         xfs_trans_log_buf(state->args->trans, oldblk->bp,
1064                 XFS_DA_LOGRANGE(node, &node->hdr,
1065                                 state->args->geo->node_hdr_size));
1066
1067         /*
1068          * Copy the last hash value from the oldblk to propagate upwards.
1069          */
1070         oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1071 }
1072
1073 /*========================================================================
1074  * Routines used for shrinking the Btree.
1075  *========================================================================*/
1076
1077 /*
1078  * Deallocate an empty leaf node, remove it from its parent,
1079  * possibly deallocating that block, etc...
1080  */
1081 int
1082 xfs_da3_join(
1083         struct xfs_da_state     *state)
1084 {
1085         struct xfs_da_state_blk *drop_blk;
1086         struct xfs_da_state_blk *save_blk;
1087         int                     action = 0;
1088         int                     error;
1089
1090         trace_xfs_da_join(state->args);
1091
1092         drop_blk = &state->path.blk[ state->path.active-1 ];
1093         save_blk = &state->altpath.blk[ state->path.active-1 ];
1094         ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
1095         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
1096                drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
1097
1098         /*
1099          * Walk back up the tree joining/deallocating as necessary.
1100          * When we stop dropping blocks, break out.
1101          */
1102         for (  ; state->path.active >= 2; drop_blk--, save_blk--,
1103                  state->path.active--) {
1104                 /*
1105                  * See if we can combine the block with a neighbor.
1106                  *   (action == 0) => no options, just leave
1107                  *   (action == 1) => coalesce, then unlink
1108                  *   (action == 2) => block empty, unlink it
1109                  */
1110                 switch (drop_blk->magic) {
1111                 case XFS_ATTR_LEAF_MAGIC:
1112                         error = xfs_attr3_leaf_toosmall(state, &action);
1113                         if (error)
1114                                 return error;
1115                         if (action == 0)
1116                                 return 0;
1117                         xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
1118                         break;
1119                 case XFS_DIR2_LEAFN_MAGIC:
1120                         error = xfs_dir2_leafn_toosmall(state, &action);
1121                         if (error)
1122                                 return error;
1123                         if (action == 0)
1124                                 return 0;
1125                         xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
1126                         break;
1127                 case XFS_DA_NODE_MAGIC:
1128                         /*
1129                          * Remove the offending node, fixup hashvals,
1130                          * check for a toosmall neighbor.
1131                          */
1132                         xfs_da3_node_remove(state, drop_blk);
1133                         xfs_da3_fixhashpath(state, &state->path);
1134                         error = xfs_da3_node_toosmall(state, &action);
1135                         if (error)
1136                                 return error;
1137                         if (action == 0)
1138                                 return 0;
1139                         xfs_da3_node_unbalance(state, drop_blk, save_blk);
1140                         break;
1141                 }
1142                 xfs_da3_fixhashpath(state, &state->altpath);
1143                 error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
1144                 xfs_da_state_kill_altpath(state);
1145                 if (error)
1146                         return error;
1147                 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
1148                                                          drop_blk->bp);
1149                 drop_blk->bp = NULL;
1150                 if (error)
1151                         return error;
1152         }
1153         /*
1154          * We joined all the way to the top.  If it turns out that
1155          * we only have one entry in the root, make the child block
1156          * the new root.
1157          */
1158         xfs_da3_node_remove(state, drop_blk);
1159         xfs_da3_fixhashpath(state, &state->path);
1160         error = xfs_da3_root_join(state, &state->path.blk[0]);
1161         return error;
1162 }
1163
1164 #ifdef  DEBUG
1165 static void
1166 xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
1167 {
1168         __be16  magic = blkinfo->magic;
1169
1170         if (level == 1) {
1171                 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1172                        magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
1173                        magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1174                        magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1175         } else {
1176                 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1177                        magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
1178         }
1179         ASSERT(!blkinfo->forw);
1180         ASSERT(!blkinfo->back);
1181 }
1182 #else   /* !DEBUG */
1183 #define xfs_da_blkinfo_onlychild_validate(blkinfo, level)
1184 #endif  /* !DEBUG */
1185
1186 /*
1187  * We have only one entry in the root.  Copy the only remaining child of
1188  * the old root to block 0 as the new root node.
1189  */
1190 STATIC int
1191 xfs_da3_root_join(
1192         struct xfs_da_state     *state,
1193         struct xfs_da_state_blk *root_blk)
1194 {
1195         struct xfs_da_intnode   *oldroot;
1196         struct xfs_da_args      *args;
1197         xfs_dablk_t             child;
1198         struct xfs_buf          *bp;
1199         struct xfs_da3_icnode_hdr oldroothdr;
1200         int                     error;
1201         struct xfs_inode        *dp = state->args->dp;
1202
1203         trace_xfs_da_root_join(state->args);
1204
1205         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
1206
1207         args = state->args;
1208         oldroot = root_blk->bp->b_addr;
1209         xfs_da3_node_hdr_from_disk(dp->i_mount, &oldroothdr, oldroot);
1210         ASSERT(oldroothdr.forw == 0);
1211         ASSERT(oldroothdr.back == 0);
1212
1213         /*
1214          * If the root has more than one child, then don't do anything.
1215          */
1216         if (oldroothdr.count > 1)
1217                 return 0;
1218
1219         /*
1220          * Read in the (only) child block, then copy those bytes into
1221          * the root block's buffer and free the original child block.
1222          */
1223         child = be32_to_cpu(oldroothdr.btree[0].before);
1224         ASSERT(child != 0);
1225         error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork);
1226         if (error)
1227                 return error;
1228         xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
1229
1230         /*
1231          * Copy child to root buffer and log it.
1232          */
1233         xfs_da_buf_copy(root_blk->bp, bp, args->geo->blksize);
1234         xfs_trans_log_buf(args->trans, root_blk->bp, 0,
1235                           args->geo->blksize - 1);
1236         /*
1237          * Now we can drop the child buffer.
1238          */
1239         error = xfs_da_shrink_inode(args, child, bp);
1240         return error;
1241 }
1242
1243 /*
1244  * Check a node block and its neighbors to see if the block should be
1245  * collapsed into one or the other neighbor.  Always keep the block
1246  * with the smaller block number.
1247  * If the current block is over 50% full, don't try to join it, return 0.
1248  * If the block is empty, fill in the state structure and return 2.
1249  * If it can be collapsed, fill in the state structure and return 1.
1250  * If nothing can be done, return 0.
1251  */
1252 STATIC int
1253 xfs_da3_node_toosmall(
1254         struct xfs_da_state     *state,
1255         int                     *action)
1256 {
1257         struct xfs_da_intnode   *node;
1258         struct xfs_da_state_blk *blk;
1259         struct xfs_da_blkinfo   *info;
1260         xfs_dablk_t             blkno;
1261         struct xfs_buf          *bp;
1262         struct xfs_da3_icnode_hdr nodehdr;
1263         int                     count;
1264         int                     forward;
1265         int                     error;
1266         int                     retval;
1267         int                     i;
1268         struct xfs_inode        *dp = state->args->dp;
1269
1270         trace_xfs_da_node_toosmall(state->args);
1271
1272         /*
1273          * Check for the degenerate case of the block being over 50% full.
1274          * If so, it's not worth even looking to see if we might be able
1275          * to coalesce with a sibling.
1276          */
1277         blk = &state->path.blk[ state->path.active-1 ];
1278         info = blk->bp->b_addr;
1279         node = (xfs_da_intnode_t *)info;
1280         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1281         if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
1282                 *action = 0;    /* blk over 50%, don't try to join */
1283                 return 0;       /* blk over 50%, don't try to join */
1284         }
1285
1286         /*
1287          * Check for the degenerate case of the block being empty.
1288          * If the block is empty, we'll simply delete it, no need to
1289          * coalesce it with a sibling block.  We choose (arbitrarily)
1290          * to merge with the forward block unless it is NULL.
1291          */
1292         if (nodehdr.count == 0) {
1293                 /*
1294                  * Make altpath point to the block we want to keep and
1295                  * path point to the block we want to drop (this one).
1296                  */
1297                 forward = (info->forw != 0);
1298                 memcpy(&state->altpath, &state->path, sizeof(state->path));
1299                 error = xfs_da3_path_shift(state, &state->altpath, forward,
1300                                                  0, &retval);
1301                 if (error)
1302                         return error;
1303                 if (retval) {
1304                         *action = 0;
1305                 } else {
1306                         *action = 2;
1307                 }
1308                 return 0;
1309         }
1310
1311         /*
1312          * Examine each sibling block to see if we can coalesce with
1313          * at least 25% free space to spare.  We need to figure out
1314          * whether to merge with the forward or the backward block.
1315          * We prefer coalescing with the lower numbered sibling so as
1316          * to shrink a directory over time.
1317          */
1318         count  = state->args->geo->node_ents;
1319         count -= state->args->geo->node_ents >> 2;
1320         count -= nodehdr.count;
1321
1322         /* start with smaller blk num */
1323         forward = nodehdr.forw < nodehdr.back;
1324         for (i = 0; i < 2; forward = !forward, i++) {
1325                 struct xfs_da3_icnode_hdr thdr;
1326                 if (forward)
1327                         blkno = nodehdr.forw;
1328                 else
1329                         blkno = nodehdr.back;
1330                 if (blkno == 0)
1331                         continue;
1332                 error = xfs_da3_node_read(state->args->trans, dp, blkno, &bp,
1333                                 state->args->whichfork);
1334                 if (error)
1335                         return error;
1336
1337                 node = bp->b_addr;
1338                 xfs_da3_node_hdr_from_disk(dp->i_mount, &thdr, node);
1339                 xfs_trans_brelse(state->args->trans, bp);
1340
1341                 if (count - thdr.count >= 0)
1342                         break;  /* fits with at least 25% to spare */
1343         }
1344         if (i >= 2) {
1345                 *action = 0;
1346                 return 0;
1347         }
1348
1349         /*
1350          * Make altpath point to the block we want to keep (the lower
1351          * numbered block) and path point to the block we want to drop.
1352          */
1353         memcpy(&state->altpath, &state->path, sizeof(state->path));
1354         if (blkno < blk->blkno) {
1355                 error = xfs_da3_path_shift(state, &state->altpath, forward,
1356                                                  0, &retval);
1357         } else {
1358                 error = xfs_da3_path_shift(state, &state->path, forward,
1359                                                  0, &retval);
1360         }
1361         if (error)
1362                 return error;
1363         if (retval) {
1364                 *action = 0;
1365                 return 0;
1366         }
1367         *action = 1;
1368         return 0;
1369 }
1370
1371 /*
1372  * Pick up the last hashvalue from an intermediate node.
1373  */
1374 STATIC uint
1375 xfs_da3_node_lasthash(
1376         struct xfs_inode        *dp,
1377         struct xfs_buf          *bp,
1378         int                     *count)
1379 {
1380         struct xfs_da3_icnode_hdr nodehdr;
1381
1382         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, bp->b_addr);
1383         if (count)
1384                 *count = nodehdr.count;
1385         if (!nodehdr.count)
1386                 return 0;
1387         return be32_to_cpu(nodehdr.btree[nodehdr.count - 1].hashval);
1388 }
1389
1390 /*
1391  * Walk back up the tree adjusting hash values as necessary,
1392  * when we stop making changes, return.
1393  */
1394 void
1395 xfs_da3_fixhashpath(
1396         struct xfs_da_state     *state,
1397         struct xfs_da_state_path *path)
1398 {
1399         struct xfs_da_state_blk *blk;
1400         struct xfs_da_intnode   *node;
1401         struct xfs_da_node_entry *btree;
1402         xfs_dahash_t            lasthash=0;
1403         int                     level;
1404         int                     count;
1405         struct xfs_inode        *dp = state->args->dp;
1406
1407         trace_xfs_da_fixhashpath(state->args);
1408
1409         level = path->active-1;
1410         blk = &path->blk[ level ];
1411         switch (blk->magic) {
1412         case XFS_ATTR_LEAF_MAGIC:
1413                 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
1414                 if (count == 0)
1415                         return;
1416                 break;
1417         case XFS_DIR2_LEAFN_MAGIC:
1418                 lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count);
1419                 if (count == 0)
1420                         return;
1421                 break;
1422         case XFS_DA_NODE_MAGIC:
1423                 lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);
1424                 if (count == 0)
1425                         return;
1426                 break;
1427         }
1428         for (blk--, level--; level >= 0; blk--, level--) {
1429                 struct xfs_da3_icnode_hdr nodehdr;
1430
1431                 node = blk->bp->b_addr;
1432                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1433                 btree = nodehdr.btree;
1434                 if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
1435                         break;
1436                 blk->hashval = lasthash;
1437                 btree[blk->index].hashval = cpu_to_be32(lasthash);
1438                 xfs_trans_log_buf(state->args->trans, blk->bp,
1439                                   XFS_DA_LOGRANGE(node, &btree[blk->index],
1440                                                   sizeof(*btree)));
1441
1442                 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1443         }
1444 }
1445
1446 /*
1447  * Remove an entry from an intermediate node.
1448  */
1449 STATIC void
1450 xfs_da3_node_remove(
1451         struct xfs_da_state     *state,
1452         struct xfs_da_state_blk *drop_blk)
1453 {
1454         struct xfs_da_intnode   *node;
1455         struct xfs_da3_icnode_hdr nodehdr;
1456         struct xfs_da_node_entry *btree;
1457         int                     index;
1458         int                     tmp;
1459         struct xfs_inode        *dp = state->args->dp;
1460
1461         trace_xfs_da_node_remove(state->args);
1462
1463         node = drop_blk->bp->b_addr;
1464         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1465         ASSERT(drop_blk->index < nodehdr.count);
1466         ASSERT(drop_blk->index >= 0);
1467
1468         /*
1469          * Copy over the offending entry, or just zero it out.
1470          */
1471         index = drop_blk->index;
1472         btree = nodehdr.btree;
1473         if (index < nodehdr.count - 1) {
1474                 tmp  = nodehdr.count - index - 1;
1475                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1476                 memmove(&btree[index], &btree[index + 1], tmp);
1477                 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1478                     XFS_DA_LOGRANGE(node, &btree[index], tmp));
1479                 index = nodehdr.count - 1;
1480         }
1481         memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
1482         xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1483             XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
1484         nodehdr.count -= 1;
1485         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
1486         xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1487             XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size));
1488
1489         /*
1490          * Copy the last hash value from the block to propagate upwards.
1491          */
1492         drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
1493 }
1494
1495 /*
1496  * Unbalance the elements between two intermediate nodes,
1497  * move all Btree elements from one node into another.
1498  */
1499 STATIC void
1500 xfs_da3_node_unbalance(
1501         struct xfs_da_state     *state,
1502         struct xfs_da_state_blk *drop_blk,
1503         struct xfs_da_state_blk *save_blk)
1504 {
1505         struct xfs_da_intnode   *drop_node;
1506         struct xfs_da_intnode   *save_node;
1507         struct xfs_da_node_entry *drop_btree;
1508         struct xfs_da_node_entry *save_btree;
1509         struct xfs_da3_icnode_hdr drop_hdr;
1510         struct xfs_da3_icnode_hdr save_hdr;
1511         struct xfs_trans        *tp;
1512         int                     sindex;
1513         int                     tmp;
1514         struct xfs_inode        *dp = state->args->dp;
1515
1516         trace_xfs_da_node_unbalance(state->args);
1517
1518         drop_node = drop_blk->bp->b_addr;
1519         save_node = save_blk->bp->b_addr;
1520         xfs_da3_node_hdr_from_disk(dp->i_mount, &drop_hdr, drop_node);
1521         xfs_da3_node_hdr_from_disk(dp->i_mount, &save_hdr, save_node);
1522         drop_btree = drop_hdr.btree;
1523         save_btree = save_hdr.btree;
1524         tp = state->args->trans;
1525
1526         /*
1527          * If the dying block has lower hashvals, then move all the
1528          * elements in the remaining block up to make a hole.
1529          */
1530         if ((be32_to_cpu(drop_btree[0].hashval) <
1531                         be32_to_cpu(save_btree[0].hashval)) ||
1532             (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
1533                         be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
1534                 /* XXX: check this - is memmove dst correct? */
1535                 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
1536                 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
1537
1538                 sindex = 0;
1539                 xfs_trans_log_buf(tp, save_blk->bp,
1540                         XFS_DA_LOGRANGE(save_node, &save_btree[0],
1541                                 (save_hdr.count + drop_hdr.count) *
1542                                                 sizeof(xfs_da_node_entry_t)));
1543         } else {
1544                 sindex = save_hdr.count;
1545                 xfs_trans_log_buf(tp, save_blk->bp,
1546                         XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
1547                                 drop_hdr.count * sizeof(xfs_da_node_entry_t)));
1548         }
1549
1550         /*
1551          * Move all the B-tree elements from drop_blk to save_blk.
1552          */
1553         tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
1554         memcpy(&save_btree[sindex], &drop_btree[0], tmp);
1555         save_hdr.count += drop_hdr.count;
1556
1557         xfs_da3_node_hdr_to_disk(dp->i_mount, save_node, &save_hdr);
1558         xfs_trans_log_buf(tp, save_blk->bp,
1559                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1560                                 state->args->geo->node_hdr_size));
1561
1562         /*
1563          * Save the last hashval in the remaining block for upward propagation.
1564          */
1565         save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
1566 }
1567
1568 /*========================================================================
1569  * Routines used for finding things in the Btree.
1570  *========================================================================*/
1571
1572 /*
1573  * Walk down the Btree looking for a particular filename, filling
1574  * in the state structure as we go.
1575  *
1576  * We will set the state structure to point to each of the elements
1577  * in each of the nodes where either the hashval is or should be.
1578  *
1579  * We support duplicate hashval's so for each entry in the current
1580  * node that could contain the desired hashval, descend.  This is a
1581  * pruned depth-first tree search.
1582  */
1583 int                                                     /* error */
1584 xfs_da3_node_lookup_int(
1585         struct xfs_da_state     *state,
1586         int                     *result)
1587 {
1588         struct xfs_da_state_blk *blk;
1589         struct xfs_da_blkinfo   *curr;
1590         struct xfs_da_intnode   *node;
1591         struct xfs_da_node_entry *btree;
1592         struct xfs_da3_icnode_hdr nodehdr;
1593         struct xfs_da_args      *args;
1594         xfs_dablk_t             blkno;
1595         xfs_dahash_t            hashval;
1596         xfs_dahash_t            btreehashval;
1597         int                     probe;
1598         int                     span;
1599         int                     max;
1600         int                     error;
1601         int                     retval;
1602         unsigned int            expected_level = 0;
1603         uint16_t                magic;
1604         struct xfs_inode        *dp = state->args->dp;
1605
1606         args = state->args;
1607
1608         /*
1609          * Descend thru the B-tree searching each level for the right
1610          * node to use, until the right hashval is found.
1611          */
1612         blkno = args->geo->leafblk;
1613         for (blk = &state->path.blk[0], state->path.active = 1;
1614                          state->path.active <= XFS_DA_NODE_MAXDEPTH;
1615                          blk++, state->path.active++) {
1616                 /*
1617                  * Read the next node down in the tree.
1618                  */
1619                 blk->blkno = blkno;
1620                 error = xfs_da3_node_read(args->trans, args->dp, blkno,
1621                                         &blk->bp, args->whichfork);
1622                 if (error) {
1623                         blk->blkno = 0;
1624                         state->path.active--;
1625                         return error;
1626                 }
1627                 curr = blk->bp->b_addr;
1628                 magic = be16_to_cpu(curr->magic);
1629
1630                 if (magic == XFS_ATTR_LEAF_MAGIC ||
1631                     magic == XFS_ATTR3_LEAF_MAGIC) {
1632                         blk->magic = XFS_ATTR_LEAF_MAGIC;
1633                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1634                         break;
1635                 }
1636
1637                 if (magic == XFS_DIR2_LEAFN_MAGIC ||
1638                     magic == XFS_DIR3_LEAFN_MAGIC) {
1639                         blk->magic = XFS_DIR2_LEAFN_MAGIC;
1640                         blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
1641                                                               blk->bp, NULL);
1642                         break;
1643                 }
1644
1645                 if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
1646                         xfs_buf_mark_corrupt(blk->bp);
1647                         xfs_da_mark_sick(args);
1648                         return -EFSCORRUPTED;
1649                 }
1650
1651                 blk->magic = XFS_DA_NODE_MAGIC;
1652
1653                 /*
1654                  * Search an intermediate node for a match.
1655                  */
1656                 node = blk->bp->b_addr;
1657                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1658                 btree = nodehdr.btree;
1659
1660                 /* Tree taller than we can handle; bail out! */
1661                 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
1662                         xfs_buf_mark_corrupt(blk->bp);
1663                         xfs_da_mark_sick(args);
1664                         return -EFSCORRUPTED;
1665                 }
1666
1667                 /* Check the level from the root. */
1668                 if (blkno == args->geo->leafblk)
1669                         expected_level = nodehdr.level - 1;
1670                 else if (expected_level != nodehdr.level) {
1671                         xfs_buf_mark_corrupt(blk->bp);
1672                         xfs_da_mark_sick(args);
1673                         return -EFSCORRUPTED;
1674                 } else
1675                         expected_level--;
1676
1677                 max = nodehdr.count;
1678                 blk->hashval = be32_to_cpu(btree[max - 1].hashval);
1679
1680                 /*
1681                  * Binary search.  (note: small blocks will skip loop)
1682                  */
1683                 probe = span = max / 2;
1684                 hashval = args->hashval;
1685                 while (span > 4) {
1686                         span /= 2;
1687                         btreehashval = be32_to_cpu(btree[probe].hashval);
1688                         if (btreehashval < hashval)
1689                                 probe += span;
1690                         else if (btreehashval > hashval)
1691                                 probe -= span;
1692                         else
1693                                 break;
1694                 }
1695                 ASSERT((probe >= 0) && (probe < max));
1696                 ASSERT((span <= 4) ||
1697                         (be32_to_cpu(btree[probe].hashval) == hashval));
1698
1699                 /*
1700                  * Since we may have duplicate hashval's, find the first
1701                  * matching hashval in the node.
1702                  */
1703                 while (probe > 0 &&
1704                        be32_to_cpu(btree[probe].hashval) >= hashval) {
1705                         probe--;
1706                 }
1707                 while (probe < max &&
1708                        be32_to_cpu(btree[probe].hashval) < hashval) {
1709                         probe++;
1710                 }
1711
1712                 /*
1713                  * Pick the right block to descend on.
1714                  */
1715                 if (probe == max) {
1716                         blk->index = max - 1;
1717                         blkno = be32_to_cpu(btree[max - 1].before);
1718                 } else {
1719                         blk->index = probe;
1720                         blkno = be32_to_cpu(btree[probe].before);
1721                 }
1722
1723                 /* We can't point back to the root. */
1724                 if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
1725                         xfs_da_mark_sick(args);
1726                         return -EFSCORRUPTED;
1727                 }
1728         }
1729
1730         if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
1731                 xfs_da_mark_sick(args);
1732                 return -EFSCORRUPTED;
1733         }
1734
1735         /*
1736          * A leaf block that ends in the hashval that we are interested in
1737          * (final hashval == search hashval) means that the next block may
1738          * contain more entries with the same hashval, shift upward to the
1739          * next leaf and keep searching.
1740          */
1741         for (;;) {
1742                 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1743                         retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1744                                                         &blk->index, state);
1745                 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1746                         retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
1747                         blk->index = args->index;
1748                         args->blkno = blk->blkno;
1749                 } else {
1750                         ASSERT(0);
1751                         xfs_da_mark_sick(args);
1752                         return -EFSCORRUPTED;
1753                 }
1754                 if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
1755                     (blk->hashval == args->hashval)) {
1756                         error = xfs_da3_path_shift(state, &state->path, 1, 1,
1757                                                          &retval);
1758                         if (error)
1759                                 return error;
1760                         if (retval == 0) {
1761                                 continue;
1762                         } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1763                                 /* path_shift() gives ENOENT */
1764                                 retval = -ENOATTR;
1765                         }
1766                 }
1767                 break;
1768         }
1769         *result = retval;
1770         return 0;
1771 }
1772
1773 /*========================================================================
1774  * Utility routines.
1775  *========================================================================*/
1776
1777 /*
1778  * Compare two intermediate nodes for "order".
1779  */
1780 STATIC int
1781 xfs_da3_node_order(
1782         struct xfs_inode *dp,
1783         struct xfs_buf  *node1_bp,
1784         struct xfs_buf  *node2_bp)
1785 {
1786         struct xfs_da_intnode   *node1;
1787         struct xfs_da_intnode   *node2;
1788         struct xfs_da_node_entry *btree1;
1789         struct xfs_da_node_entry *btree2;
1790         struct xfs_da3_icnode_hdr node1hdr;
1791         struct xfs_da3_icnode_hdr node2hdr;
1792
1793         node1 = node1_bp->b_addr;
1794         node2 = node2_bp->b_addr;
1795         xfs_da3_node_hdr_from_disk(dp->i_mount, &node1hdr, node1);
1796         xfs_da3_node_hdr_from_disk(dp->i_mount, &node2hdr, node2);
1797         btree1 = node1hdr.btree;
1798         btree2 = node2hdr.btree;
1799
1800         if (node1hdr.count > 0 && node2hdr.count > 0 &&
1801             ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
1802              (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
1803               be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
1804                 return 1;
1805         }
1806         return 0;
1807 }
1808
1809 /*
1810  * Link a new block into a doubly linked list of blocks (of whatever type).
1811  */
1812 int                                                     /* error */
1813 xfs_da3_blk_link(
1814         struct xfs_da_state     *state,
1815         struct xfs_da_state_blk *old_blk,
1816         struct xfs_da_state_blk *new_blk)
1817 {
1818         struct xfs_da_blkinfo   *old_info;
1819         struct xfs_da_blkinfo   *new_info;
1820         struct xfs_da_blkinfo   *tmp_info;
1821         struct xfs_da_args      *args;
1822         struct xfs_buf          *bp;
1823         int                     before = 0;
1824         int                     error;
1825         struct xfs_inode        *dp = state->args->dp;
1826
1827         /*
1828          * Set up environment.
1829          */
1830         args = state->args;
1831         ASSERT(args != NULL);
1832         old_info = old_blk->bp->b_addr;
1833         new_info = new_blk->bp->b_addr;
1834         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1835                old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1836                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1837
1838         switch (old_blk->magic) {
1839         case XFS_ATTR_LEAF_MAGIC:
1840                 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1841                 break;
1842         case XFS_DIR2_LEAFN_MAGIC:
1843                 before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);
1844                 break;
1845         case XFS_DA_NODE_MAGIC:
1846                 before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);
1847                 break;
1848         }
1849
1850         /*
1851          * Link blocks in appropriate order.
1852          */
1853         if (before) {
1854                 /*
1855                  * Link new block in before existing block.
1856                  */
1857                 trace_xfs_da_link_before(args);
1858                 new_info->forw = cpu_to_be32(old_blk->blkno);
1859                 new_info->back = old_info->back;
1860                 if (old_info->back) {
1861                         error = xfs_da3_node_read(args->trans, dp,
1862                                                 be32_to_cpu(old_info->back),
1863                                                 &bp, args->whichfork);
1864                         if (error)
1865                                 return error;
1866                         ASSERT(bp != NULL);
1867                         tmp_info = bp->b_addr;
1868                         ASSERT(tmp_info->magic == old_info->magic);
1869                         ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1870                         tmp_info->forw = cpu_to_be32(new_blk->blkno);
1871                         xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1872                 }
1873                 old_info->back = cpu_to_be32(new_blk->blkno);
1874         } else {
1875                 /*
1876                  * Link new block in after existing block.
1877                  */
1878                 trace_xfs_da_link_after(args);
1879                 new_info->forw = old_info->forw;
1880                 new_info->back = cpu_to_be32(old_blk->blkno);
1881                 if (old_info->forw) {
1882                         error = xfs_da3_node_read(args->trans, dp,
1883                                                 be32_to_cpu(old_info->forw),
1884                                                 &bp, args->whichfork);
1885                         if (error)
1886                                 return error;
1887                         ASSERT(bp != NULL);
1888                         tmp_info = bp->b_addr;
1889                         ASSERT(tmp_info->magic == old_info->magic);
1890                         ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1891                         tmp_info->back = cpu_to_be32(new_blk->blkno);
1892                         xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1893                 }
1894                 old_info->forw = cpu_to_be32(new_blk->blkno);
1895         }
1896
1897         xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1898         xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1899         return 0;
1900 }
1901
1902 /*
1903  * Unlink a block from a doubly linked list of blocks.
1904  */
1905 STATIC int                                              /* error */
1906 xfs_da3_blk_unlink(
1907         struct xfs_da_state     *state,
1908         struct xfs_da_state_blk *drop_blk,
1909         struct xfs_da_state_blk *save_blk)
1910 {
1911         struct xfs_da_blkinfo   *drop_info;
1912         struct xfs_da_blkinfo   *save_info;
1913         struct xfs_da_blkinfo   *tmp_info;
1914         struct xfs_da_args      *args;
1915         struct xfs_buf          *bp;
1916         int                     error;
1917
1918         /*
1919          * Set up environment.
1920          */
1921         args = state->args;
1922         ASSERT(args != NULL);
1923         save_info = save_blk->bp->b_addr;
1924         drop_info = drop_blk->bp->b_addr;
1925         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1926                save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1927                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1928         ASSERT(save_blk->magic == drop_blk->magic);
1929         ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1930                (be32_to_cpu(save_info->back) == drop_blk->blkno));
1931         ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1932                (be32_to_cpu(drop_info->back) == save_blk->blkno));
1933
1934         /*
1935          * Unlink the leaf block from the doubly linked chain of leaves.
1936          */
1937         if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1938                 trace_xfs_da_unlink_back(args);
1939                 save_info->back = drop_info->back;
1940                 if (drop_info->back) {
1941                         error = xfs_da3_node_read(args->trans, args->dp,
1942                                                 be32_to_cpu(drop_info->back),
1943                                                 &bp, args->whichfork);
1944                         if (error)
1945                                 return error;
1946                         ASSERT(bp != NULL);
1947                         tmp_info = bp->b_addr;
1948                         ASSERT(tmp_info->magic == save_info->magic);
1949                         ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1950                         tmp_info->forw = cpu_to_be32(save_blk->blkno);
1951                         xfs_trans_log_buf(args->trans, bp, 0,
1952                                                     sizeof(*tmp_info) - 1);
1953                 }
1954         } else {
1955                 trace_xfs_da_unlink_forward(args);
1956                 save_info->forw = drop_info->forw;
1957                 if (drop_info->forw) {
1958                         error = xfs_da3_node_read(args->trans, args->dp,
1959                                                 be32_to_cpu(drop_info->forw),
1960                                                 &bp, args->whichfork);
1961                         if (error)
1962                                 return error;
1963                         ASSERT(bp != NULL);
1964                         tmp_info = bp->b_addr;
1965                         ASSERT(tmp_info->magic == save_info->magic);
1966                         ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1967                         tmp_info->back = cpu_to_be32(save_blk->blkno);
1968                         xfs_trans_log_buf(args->trans, bp, 0,
1969                                                     sizeof(*tmp_info) - 1);
1970                 }
1971         }
1972
1973         xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1974         return 0;
1975 }
1976
1977 /*
1978  * Move a path "forward" or "!forward" one block at the current level.
1979  *
1980  * This routine will adjust a "path" to point to the next block
1981  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1982  * Btree, including updating pointers to the intermediate nodes between
1983  * the new bottom and the root.
1984  */
1985 int                                                     /* error */
1986 xfs_da3_path_shift(
1987         struct xfs_da_state     *state,
1988         struct xfs_da_state_path *path,
1989         int                     forward,
1990         int                     release,
1991         int                     *result)
1992 {
1993         struct xfs_da_state_blk *blk;
1994         struct xfs_da_blkinfo   *info;
1995         struct xfs_da_args      *args;
1996         struct xfs_da_node_entry *btree;
1997         struct xfs_da3_icnode_hdr nodehdr;
1998         struct xfs_buf          *bp;
1999         xfs_dablk_t             blkno = 0;
2000         int                     level;
2001         int                     error;
2002         struct xfs_inode        *dp = state->args->dp;
2003
2004         trace_xfs_da_path_shift(state->args);
2005
2006         /*
2007          * Roll up the Btree looking for the first block where our
2008          * current index is not at the edge of the block.  Note that
2009          * we skip the bottom layer because we want the sibling block.
2010          */
2011         args = state->args;
2012         ASSERT(args != NULL);
2013         ASSERT(path != NULL);
2014         ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
2015         level = (path->active-1) - 1;   /* skip bottom layer in path */
2016         for (; level >= 0; level--) {
2017                 blk = &path->blk[level];
2018                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
2019                                            blk->bp->b_addr);
2020
2021                 if (forward && (blk->index < nodehdr.count - 1)) {
2022                         blk->index++;
2023                         blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
2024                         break;
2025                 } else if (!forward && (blk->index > 0)) {
2026                         blk->index--;
2027                         blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
2028                         break;
2029                 }
2030         }
2031         if (level < 0) {
2032                 *result = -ENOENT;      /* we're out of our tree */
2033                 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
2034                 return 0;
2035         }
2036
2037         /*
2038          * Roll down the edge of the subtree until we reach the
2039          * same depth we were at originally.
2040          */
2041         for (blk++, level++; level < path->active; blk++, level++) {
2042                 /*
2043                  * Read the next child block into a local buffer.
2044                  */
2045                 error = xfs_da3_node_read(args->trans, dp, blkno, &bp,
2046                                           args->whichfork);
2047                 if (error)
2048                         return error;
2049
2050                 /*
2051                  * Release the old block (if it's dirty, the trans doesn't
2052                  * actually let go) and swap the local buffer into the path
2053                  * structure. This ensures failure of the above read doesn't set
2054                  * a NULL buffer in an active slot in the path.
2055                  */
2056                 if (release)
2057                         xfs_trans_brelse(args->trans, blk->bp);
2058                 blk->blkno = blkno;
2059                 blk->bp = bp;
2060
2061                 info = blk->bp->b_addr;
2062                 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
2063                        info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
2064                        info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2065                        info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
2066                        info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
2067                        info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
2068
2069
2070                 /*
2071                  * Note: we flatten the magic number to a single type so we
2072                  * don't have to compare against crc/non-crc types elsewhere.
2073                  */
2074                 switch (be16_to_cpu(info->magic)) {
2075                 case XFS_DA_NODE_MAGIC:
2076                 case XFS_DA3_NODE_MAGIC:
2077                         blk->magic = XFS_DA_NODE_MAGIC;
2078                         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
2079                                                    bp->b_addr);
2080                         btree = nodehdr.btree;
2081                         blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
2082                         if (forward)
2083                                 blk->index = 0;
2084                         else
2085                                 blk->index = nodehdr.count - 1;
2086                         blkno = be32_to_cpu(btree[blk->index].before);
2087                         break;
2088                 case XFS_ATTR_LEAF_MAGIC:
2089                 case XFS_ATTR3_LEAF_MAGIC:
2090                         blk->magic = XFS_ATTR_LEAF_MAGIC;
2091                         ASSERT(level == path->active-1);
2092                         blk->index = 0;
2093                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
2094                         break;
2095                 case XFS_DIR2_LEAFN_MAGIC:
2096                 case XFS_DIR3_LEAFN_MAGIC:
2097                         blk->magic = XFS_DIR2_LEAFN_MAGIC;
2098                         ASSERT(level == path->active-1);
2099                         blk->index = 0;
2100                         blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
2101                                                               blk->bp, NULL);
2102                         break;
2103                 default:
2104                         ASSERT(0);
2105                         break;
2106                 }
2107         }
2108         *result = 0;
2109         return 0;
2110 }
2111
2112
2113 /*========================================================================
2114  * Utility routines.
2115  *========================================================================*/
2116
2117 /*
2118  * Implement a simple hash on a character string.
2119  * Rotate the hash value by 7 bits, then XOR each character in.
2120  * This is implemented with some source-level loop unrolling.
2121  */
2122 xfs_dahash_t
2123 xfs_da_hashname(const uint8_t *name, int namelen)
2124 {
2125         xfs_dahash_t hash;
2126
2127         /*
2128          * Do four characters at a time as long as we can.
2129          */
2130         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
2131                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
2132                        (name[3] << 0) ^ rol32(hash, 7 * 4);
2133
2134         /*
2135          * Now do the rest of the characters.
2136          */
2137         switch (namelen) {
2138         case 3:
2139                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
2140                        rol32(hash, 7 * 3);
2141         case 2:
2142                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
2143         case 1:
2144                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
2145         default: /* case 0: */
2146                 return hash;
2147         }
2148 }
2149
2150 enum xfs_dacmp
2151 xfs_da_compname(
2152         struct xfs_da_args *args,
2153         const unsigned char *name,
2154         int             len)
2155 {
2156         return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
2157                                         XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
2158 }
2159
2160 int
2161 xfs_da_grow_inode_int(
2162         struct xfs_da_args      *args,
2163         xfs_fileoff_t           *bno,
2164         int                     count)
2165 {
2166         struct xfs_trans        *tp = args->trans;
2167         struct xfs_inode        *dp = args->dp;
2168         int                     w = args->whichfork;
2169         xfs_rfsblock_t          nblks = dp->i_nblocks;
2170         struct xfs_bmbt_irec    map, *mapp;
2171         int                     nmap, error, got, i, mapi;
2172
2173         /*
2174          * Find a spot in the file space to put the new block.
2175          */
2176         error = xfs_bmap_first_unused(tp, dp, count, bno, w);
2177         if (error)
2178                 return error;
2179
2180         /*
2181          * Try mapping it in one filesystem block.
2182          */
2183         nmap = 1;
2184         error = xfs_bmapi_write(tp, dp, *bno, count,
2185                         xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
2186                         args->total, &map, &nmap);
2187         if (error)
2188                 return error;
2189
2190         ASSERT(nmap <= 1);
2191         if (nmap == 1) {
2192                 mapp = &map;
2193                 mapi = 1;
2194         } else if (nmap == 0 && count > 1) {
2195                 xfs_fileoff_t           b;
2196                 int                     c;
2197
2198                 /*
2199                  * If we didn't get it and the block might work if fragmented,
2200                  * try without the CONTIG flag.  Loop until we get it all.
2201                  */
2202                 mapp = kmalloc(sizeof(*mapp) * count,
2203                                 GFP_KERNEL | __GFP_NOFAIL);
2204                 for (b = *bno, mapi = 0; b < *bno + count; ) {
2205                         c = (int)(*bno + count - b);
2206                         nmap = min(XFS_BMAP_MAX_NMAP, c);
2207                         error = xfs_bmapi_write(tp, dp, b, c,
2208                                         xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2209                                         args->total, &mapp[mapi], &nmap);
2210                         if (error)
2211                                 goto out_free_map;
2212                         if (nmap < 1)
2213                                 break;
2214                         mapi += nmap;
2215                         b = mapp[mapi - 1].br_startoff +
2216                             mapp[mapi - 1].br_blockcount;
2217                 }
2218         } else {
2219                 mapi = 0;
2220                 mapp = NULL;
2221         }
2222
2223         /*
2224          * Count the blocks we got, make sure it matches the total.
2225          */
2226         for (i = 0, got = 0; i < mapi; i++)
2227                 got += mapp[i].br_blockcount;
2228         if (got != count || mapp[0].br_startoff != *bno ||
2229             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
2230             *bno + count) {
2231                 error = -ENOSPC;
2232                 goto out_free_map;
2233         }
2234
2235         /* account for newly allocated blocks in reserved blocks total */
2236         args->total -= dp->i_nblocks - nblks;
2237
2238 out_free_map:
2239         if (mapp != &map)
2240                 kfree(mapp);
2241         return error;
2242 }
2243
2244 /*
2245  * Add a block to the btree ahead of the file.
2246  * Return the new block number to the caller.
2247  */
2248 int
2249 xfs_da_grow_inode(
2250         struct xfs_da_args      *args,
2251         xfs_dablk_t             *new_blkno)
2252 {
2253         xfs_fileoff_t           bno;
2254         int                     error;
2255
2256         trace_xfs_da_grow_inode(args);
2257
2258         bno = args->geo->leafblk;
2259         error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
2260         if (!error)
2261                 *new_blkno = (xfs_dablk_t)bno;
2262         return error;
2263 }
2264
2265 /*
2266  * Ick.  We need to always be able to remove a btree block, even
2267  * if there's no space reservation because the filesystem is full.
2268  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
2269  * It swaps the target block with the last block in the file.  The
2270  * last block in the file can always be removed since it can't cause
2271  * a bmap btree split to do that.
2272  */
2273 STATIC int
2274 xfs_da3_swap_lastblock(
2275         struct xfs_da_args      *args,
2276         xfs_dablk_t             *dead_blknop,
2277         struct xfs_buf          **dead_bufp)
2278 {
2279         struct xfs_da_blkinfo   *dead_info;
2280         struct xfs_da_blkinfo   *sib_info;
2281         struct xfs_da_intnode   *par_node;
2282         struct xfs_da_intnode   *dead_node;
2283         struct xfs_dir2_leaf    *dead_leaf2;
2284         struct xfs_da_node_entry *btree;
2285         struct xfs_da3_icnode_hdr par_hdr;
2286         struct xfs_inode        *dp;
2287         struct xfs_trans        *tp;
2288         struct xfs_mount        *mp;
2289         struct xfs_buf          *dead_buf;
2290         struct xfs_buf          *last_buf;
2291         struct xfs_buf          *sib_buf;
2292         struct xfs_buf          *par_buf;
2293         xfs_dahash_t            dead_hash;
2294         xfs_fileoff_t           lastoff;
2295         xfs_dablk_t             dead_blkno;
2296         xfs_dablk_t             last_blkno;
2297         xfs_dablk_t             sib_blkno;
2298         xfs_dablk_t             par_blkno;
2299         int                     error;
2300         int                     w;
2301         int                     entno;
2302         int                     level;
2303         int                     dead_level;
2304
2305         trace_xfs_da_swap_lastblock(args);
2306
2307         dead_buf = *dead_bufp;
2308         dead_blkno = *dead_blknop;
2309         tp = args->trans;
2310         dp = args->dp;
2311         w = args->whichfork;
2312         ASSERT(w == XFS_DATA_FORK);
2313         mp = dp->i_mount;
2314         lastoff = args->geo->freeblk;
2315         error = xfs_bmap_last_before(tp, dp, &lastoff, w);
2316         if (error)
2317                 return error;
2318         if (XFS_IS_CORRUPT(mp, lastoff == 0)) {
2319                 xfs_da_mark_sick(args);
2320                 return -EFSCORRUPTED;
2321         }
2322         /*
2323          * Read the last block in the btree space.
2324          */
2325         last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
2326         error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w);
2327         if (error)
2328                 return error;
2329         /*
2330          * Copy the last block into the dead buffer and log it.
2331          */
2332         xfs_da_buf_copy(dead_buf, last_buf, args->geo->blksize);
2333         xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
2334         dead_info = dead_buf->b_addr;
2335
2336         /*
2337          * Get values from the moved block.
2338          */
2339         if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2340             dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
2341                 struct xfs_dir3_icleaf_hdr leafhdr;
2342                 struct xfs_dir2_leaf_entry *ents;
2343
2344                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
2345                 xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr,
2346                                             dead_leaf2);
2347                 ents = leafhdr.ents;
2348                 dead_level = 0;
2349                 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
2350         } else {
2351                 struct xfs_da3_icnode_hdr deadhdr;
2352
2353                 dead_node = (xfs_da_intnode_t *)dead_info;
2354                 xfs_da3_node_hdr_from_disk(dp->i_mount, &deadhdr, dead_node);
2355                 btree = deadhdr.btree;
2356                 dead_level = deadhdr.level;
2357                 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
2358         }
2359         sib_buf = par_buf = NULL;
2360         /*
2361          * If the moved block has a left sibling, fix up the pointers.
2362          */
2363         if ((sib_blkno = be32_to_cpu(dead_info->back))) {
2364                 error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
2365                 if (error)
2366                         goto done;
2367                 sib_info = sib_buf->b_addr;
2368                 if (XFS_IS_CORRUPT(mp,
2369                                    be32_to_cpu(sib_info->forw) != last_blkno ||
2370                                    sib_info->magic != dead_info->magic)) {
2371                         xfs_da_mark_sick(args);
2372                         error = -EFSCORRUPTED;
2373                         goto done;
2374                 }
2375                 sib_info->forw = cpu_to_be32(dead_blkno);
2376                 xfs_trans_log_buf(tp, sib_buf,
2377                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
2378                                         sizeof(sib_info->forw)));
2379                 sib_buf = NULL;
2380         }
2381         /*
2382          * If the moved block has a right sibling, fix up the pointers.
2383          */
2384         if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
2385                 error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
2386                 if (error)
2387                         goto done;
2388                 sib_info = sib_buf->b_addr;
2389                 if (XFS_IS_CORRUPT(mp,
2390                                    be32_to_cpu(sib_info->back) != last_blkno ||
2391                                    sib_info->magic != dead_info->magic)) {
2392                         xfs_da_mark_sick(args);
2393                         error = -EFSCORRUPTED;
2394                         goto done;
2395                 }
2396                 sib_info->back = cpu_to_be32(dead_blkno);
2397                 xfs_trans_log_buf(tp, sib_buf,
2398                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
2399                                         sizeof(sib_info->back)));
2400                 sib_buf = NULL;
2401         }
2402         par_blkno = args->geo->leafblk;
2403         level = -1;
2404         /*
2405          * Walk down the tree looking for the parent of the moved block.
2406          */
2407         for (;;) {
2408                 error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
2409                 if (error)
2410                         goto done;
2411                 par_node = par_buf->b_addr;
2412                 xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
2413                 if (XFS_IS_CORRUPT(mp,
2414                                    level >= 0 && level != par_hdr.level + 1)) {
2415                         xfs_da_mark_sick(args);
2416                         error = -EFSCORRUPTED;
2417                         goto done;
2418                 }
2419                 level = par_hdr.level;
2420                 btree = par_hdr.btree;
2421                 for (entno = 0;
2422                      entno < par_hdr.count &&
2423                      be32_to_cpu(btree[entno].hashval) < dead_hash;
2424                      entno++)
2425                         continue;
2426                 if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
2427                         xfs_da_mark_sick(args);
2428                         error = -EFSCORRUPTED;
2429                         goto done;
2430                 }
2431                 par_blkno = be32_to_cpu(btree[entno].before);
2432                 if (level == dead_level + 1)
2433                         break;
2434                 xfs_trans_brelse(tp, par_buf);
2435                 par_buf = NULL;
2436         }
2437         /*
2438          * We're in the right parent block.
2439          * Look for the right entry.
2440          */
2441         for (;;) {
2442                 for (;
2443                      entno < par_hdr.count &&
2444                      be32_to_cpu(btree[entno].before) != last_blkno;
2445                      entno++)
2446                         continue;
2447                 if (entno < par_hdr.count)
2448                         break;
2449                 par_blkno = par_hdr.forw;
2450                 xfs_trans_brelse(tp, par_buf);
2451                 par_buf = NULL;
2452                 if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
2453                         xfs_da_mark_sick(args);
2454                         error = -EFSCORRUPTED;
2455                         goto done;
2456                 }
2457                 error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
2458                 if (error)
2459                         goto done;
2460                 par_node = par_buf->b_addr;
2461                 xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
2462                 if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
2463                         xfs_da_mark_sick(args);
2464                         error = -EFSCORRUPTED;
2465                         goto done;
2466                 }
2467                 btree = par_hdr.btree;
2468                 entno = 0;
2469         }
2470         /*
2471          * Update the parent entry pointing to the moved block.
2472          */
2473         btree[entno].before = cpu_to_be32(dead_blkno);
2474         xfs_trans_log_buf(tp, par_buf,
2475                 XFS_DA_LOGRANGE(par_node, &btree[entno].before,
2476                                 sizeof(btree[entno].before)));
2477         *dead_blknop = last_blkno;
2478         *dead_bufp = last_buf;
2479         return 0;
2480 done:
2481         if (par_buf)
2482                 xfs_trans_brelse(tp, par_buf);
2483         if (sib_buf)
2484                 xfs_trans_brelse(tp, sib_buf);
2485         xfs_trans_brelse(tp, last_buf);
2486         return error;
2487 }
2488
2489 /*
2490  * Remove a btree block from a directory or attribute.
2491  */
2492 int
2493 xfs_da_shrink_inode(
2494         struct xfs_da_args      *args,
2495         xfs_dablk_t             dead_blkno,
2496         struct xfs_buf          *dead_buf)
2497 {
2498         struct xfs_inode        *dp;
2499         int                     done, error, w, count;
2500         struct xfs_trans        *tp;
2501
2502         trace_xfs_da_shrink_inode(args);
2503
2504         dp = args->dp;
2505         w = args->whichfork;
2506         tp = args->trans;
2507         count = args->geo->fsbcount;
2508         for (;;) {
2509                 /*
2510                  * Remove extents.  If we get ENOSPC for a dir we have to move
2511                  * the last block to the place we want to kill.
2512                  */
2513                 error = xfs_bunmapi(tp, dp, dead_blkno, count,
2514                                     xfs_bmapi_aflag(w), 0, &done);
2515                 if (error == -ENOSPC) {
2516                         if (w != XFS_DATA_FORK)
2517                                 break;
2518                         error = xfs_da3_swap_lastblock(args, &dead_blkno,
2519                                                       &dead_buf);
2520                         if (error)
2521                                 break;
2522                 } else {
2523                         break;
2524                 }
2525         }
2526         xfs_trans_binval(tp, dead_buf);
2527         return error;
2528 }
2529
2530 static int
2531 xfs_dabuf_map(
2532         struct xfs_inode        *dp,
2533         xfs_dablk_t             bno,
2534         unsigned int            flags,
2535         int                     whichfork,
2536         struct xfs_buf_map      **mapp,
2537         int                     *nmaps)
2538 {
2539         struct xfs_mount        *mp = dp->i_mount;
2540         int                     nfsb = xfs_dabuf_nfsb(mp, whichfork);
2541         struct xfs_bmbt_irec    irec, *irecs = &irec;
2542         struct xfs_buf_map      *map = *mapp;
2543         xfs_fileoff_t           off = bno;
2544         int                     error = 0, nirecs, i;
2545
2546         if (nfsb > 1)
2547                 irecs = kzalloc(sizeof(irec) * nfsb,
2548                                 GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
2549
2550         nirecs = nfsb;
2551         error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs,
2552                         xfs_bmapi_aflag(whichfork));
2553         if (error)
2554                 goto out_free_irecs;
2555
2556         /*
2557          * Use the caller provided map for the single map case, else allocate a
2558          * larger one that needs to be free by the caller.
2559          */
2560         if (nirecs > 1) {
2561                 map = kzalloc(nirecs * sizeof(struct xfs_buf_map),
2562                                 GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
2563                 if (!map) {
2564                         error = -ENOMEM;
2565                         goto out_free_irecs;
2566                 }
2567                 *mapp = map;
2568         }
2569
2570         for (i = 0; i < nirecs; i++) {
2571                 if (irecs[i].br_startblock == HOLESTARTBLOCK ||
2572                     irecs[i].br_startblock == DELAYSTARTBLOCK)
2573                         goto invalid_mapping;
2574                 if (off != irecs[i].br_startoff)
2575                         goto invalid_mapping;
2576
2577                 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
2578                 map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
2579                 off += irecs[i].br_blockcount;
2580         }
2581
2582         if (off != bno + nfsb)
2583                 goto invalid_mapping;
2584
2585         *nmaps = nirecs;
2586 out_free_irecs:
2587         if (irecs != &irec)
2588                 kfree(irecs);
2589         return error;
2590
2591 invalid_mapping:
2592         /* Caller ok with no mapping. */
2593         if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) {
2594                 xfs_dirattr_mark_sick(dp, whichfork);
2595                 error = -EFSCORRUPTED;
2596                 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2597                         xfs_alert(mp, "%s: bno %u inode %llu",
2598                                         __func__, bno, dp->i_ino);
2599
2600                         for (i = 0; i < nirecs; i++) {
2601                                 xfs_alert(mp,
2602 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2603                                         i, irecs[i].br_startoff,
2604                                         irecs[i].br_startblock,
2605                                         irecs[i].br_blockcount,
2606                                         irecs[i].br_state);
2607                         }
2608                 }
2609         } else {
2610                 *nmaps = 0;
2611         }
2612         goto out_free_irecs;
2613 }
2614
2615 /*
2616  * Get a buffer for the dir/attr block.
2617  */
2618 int
2619 xfs_da_get_buf(
2620         struct xfs_trans        *tp,
2621         struct xfs_inode        *dp,
2622         xfs_dablk_t             bno,
2623         struct xfs_buf          **bpp,
2624         int                     whichfork)
2625 {
2626         struct xfs_mount        *mp = dp->i_mount;
2627         struct xfs_buf          *bp;
2628         struct xfs_buf_map      map, *mapp = &map;
2629         int                     nmap = 1;
2630         int                     error;
2631
2632         *bpp = NULL;
2633         error = xfs_dabuf_map(dp, bno, 0, whichfork, &mapp, &nmap);
2634         if (error || nmap == 0)
2635                 goto out_free;
2636
2637         error = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0, &bp);
2638         if (error)
2639                 goto out_free;
2640
2641         *bpp = bp;
2642
2643 out_free:
2644         if (mapp != &map)
2645                 kfree(mapp);
2646
2647         return error;
2648 }
2649
2650 /*
2651  * Get a buffer for the dir/attr block, fill in the contents.
2652  */
2653 int
2654 xfs_da_read_buf(
2655         struct xfs_trans        *tp,
2656         struct xfs_inode        *dp,
2657         xfs_dablk_t             bno,
2658         unsigned int            flags,
2659         struct xfs_buf          **bpp,
2660         int                     whichfork,
2661         const struct xfs_buf_ops *ops)
2662 {
2663         struct xfs_mount        *mp = dp->i_mount;
2664         struct xfs_buf          *bp;
2665         struct xfs_buf_map      map, *mapp = &map;
2666         int                     nmap = 1;
2667         int                     error;
2668
2669         *bpp = NULL;
2670         error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
2671         if (error || !nmap)
2672                 goto out_free;
2673
2674         error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0,
2675                         &bp, ops);
2676         if (xfs_metadata_is_sick(error))
2677                 xfs_dirattr_mark_sick(dp, whichfork);
2678         if (error)
2679                 goto out_free;
2680
2681         if (whichfork == XFS_ATTR_FORK)
2682                 xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
2683         else
2684                 xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
2685         *bpp = bp;
2686 out_free:
2687         if (mapp != &map)
2688                 kfree(mapp);
2689
2690         return error;
2691 }
2692
2693 /*
2694  * Readahead the dir/attr block.
2695  */
2696 int
2697 xfs_da_reada_buf(
2698         struct xfs_inode        *dp,
2699         xfs_dablk_t             bno,
2700         unsigned int            flags,
2701         int                     whichfork,
2702         const struct xfs_buf_ops *ops)
2703 {
2704         struct xfs_buf_map      map;
2705         struct xfs_buf_map      *mapp;
2706         int                     nmap;
2707         int                     error;
2708
2709         mapp = &map;
2710         nmap = 1;
2711         error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
2712         if (error || !nmap)
2713                 goto out_free;
2714
2715         xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
2716
2717 out_free:
2718         if (mapp != &map)
2719                 kfree(mapp);
2720
2721         return error;
2722 }