Mention branches and keyring.
[releases.git] / libxfs / xfs_refcount.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2016 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_defer.h"
14 #include "xfs_btree.h"
15 #include "xfs_bmap.h"
16 #include "xfs_refcount_btree.h"
17 #include "xfs_alloc.h"
18 #include "xfs_errortag.h"
19 #include "xfs_error.h"
20 #include "xfs_trace.h"
21 #include "xfs_trans.h"
22 #include "xfs_bit.h"
23 #include "xfs_refcount.h"
24 #include "xfs_rmap.h"
25 #include "xfs_ag.h"
26
27 struct kmem_cache       *xfs_refcount_intent_cache;
28
29 /* Allowable refcount adjustment amounts. */
30 enum xfs_refc_adjust_op {
31         XFS_REFCOUNT_ADJUST_INCREASE    = 1,
32         XFS_REFCOUNT_ADJUST_DECREASE    = -1,
33         XFS_REFCOUNT_ADJUST_COW_ALLOC   = 0,
34         XFS_REFCOUNT_ADJUST_COW_FREE    = -1,
35 };
36
37 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur,
38                 xfs_agblock_t agbno, xfs_extlen_t aglen);
39 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
40                 xfs_agblock_t agbno, xfs_extlen_t aglen);
41
42 /*
43  * Look up the first record less than or equal to [bno, len] in the btree
44  * given by cur.
45  */
46 int
47 xfs_refcount_lookup_le(
48         struct xfs_btree_cur    *cur,
49         enum xfs_refc_domain    domain,
50         xfs_agblock_t           bno,
51         int                     *stat)
52 {
53         trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
54                         xfs_refcount_encode_startblock(bno, domain),
55                         XFS_LOOKUP_LE);
56         cur->bc_rec.rc.rc_startblock = bno;
57         cur->bc_rec.rc.rc_blockcount = 0;
58         cur->bc_rec.rc.rc_domain = domain;
59         return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
60 }
61
62 /*
63  * Look up the first record greater than or equal to [bno, len] in the btree
64  * given by cur.
65  */
66 int
67 xfs_refcount_lookup_ge(
68         struct xfs_btree_cur    *cur,
69         enum xfs_refc_domain    domain,
70         xfs_agblock_t           bno,
71         int                     *stat)
72 {
73         trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
74                         xfs_refcount_encode_startblock(bno, domain),
75                         XFS_LOOKUP_GE);
76         cur->bc_rec.rc.rc_startblock = bno;
77         cur->bc_rec.rc.rc_blockcount = 0;
78         cur->bc_rec.rc.rc_domain = domain;
79         return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
80 }
81
82 /*
83  * Look up the first record equal to [bno, len] in the btree
84  * given by cur.
85  */
86 int
87 xfs_refcount_lookup_eq(
88         struct xfs_btree_cur    *cur,
89         enum xfs_refc_domain    domain,
90         xfs_agblock_t           bno,
91         int                     *stat)
92 {
93         trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
94                         xfs_refcount_encode_startblock(bno, domain),
95                         XFS_LOOKUP_LE);
96         cur->bc_rec.rc.rc_startblock = bno;
97         cur->bc_rec.rc.rc_blockcount = 0;
98         cur->bc_rec.rc.rc_domain = domain;
99         return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
100 }
101
102 /* Convert on-disk record to in-core format. */
103 void
104 xfs_refcount_btrec_to_irec(
105         const union xfs_btree_rec       *rec,
106         struct xfs_refcount_irec        *irec)
107 {
108         uint32_t                        start;
109
110         start = be32_to_cpu(rec->refc.rc_startblock);
111         if (start & XFS_REFC_COWFLAG) {
112                 start &= ~XFS_REFC_COWFLAG;
113                 irec->rc_domain = XFS_REFC_DOMAIN_COW;
114         } else {
115                 irec->rc_domain = XFS_REFC_DOMAIN_SHARED;
116         }
117
118         irec->rc_startblock = start;
119         irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
120         irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
121 }
122
123 /* Simple checks for refcount records. */
124 xfs_failaddr_t
125 xfs_refcount_check_irec(
126         struct xfs_perag                *pag,
127         const struct xfs_refcount_irec  *irec)
128 {
129         if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
130                 return __this_address;
131
132         if (!xfs_refcount_check_domain(irec))
133                 return __this_address;
134
135         /* check for valid extent range, including overflow */
136         if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount))
137                 return __this_address;
138
139         if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
140                 return __this_address;
141
142         return NULL;
143 }
144
145 static inline int
146 xfs_refcount_complain_bad_rec(
147         struct xfs_btree_cur            *cur,
148         xfs_failaddr_t                  fa,
149         const struct xfs_refcount_irec  *irec)
150 {
151         struct xfs_mount                *mp = cur->bc_mp;
152
153         xfs_warn(mp,
154  "Refcount BTree record corruption in AG %d detected at %pS!",
155                                 cur->bc_ag.pag->pag_agno, fa);
156         xfs_warn(mp,
157                 "Start block 0x%x, block count 0x%x, references 0x%x",
158                 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
159         return -EFSCORRUPTED;
160 }
161
162 /*
163  * Get the data from the pointed-to record.
164  */
165 int
166 xfs_refcount_get_rec(
167         struct xfs_btree_cur            *cur,
168         struct xfs_refcount_irec        *irec,
169         int                             *stat)
170 {
171         union xfs_btree_rec             *rec;
172         xfs_failaddr_t                  fa;
173         int                             error;
174
175         error = xfs_btree_get_rec(cur, &rec, stat);
176         if (error || !*stat)
177                 return error;
178
179         xfs_refcount_btrec_to_irec(rec, irec);
180         fa = xfs_refcount_check_irec(cur->bc_ag.pag, irec);
181         if (fa)
182                 return xfs_refcount_complain_bad_rec(cur, fa, irec);
183
184         trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
185         return 0;
186 }
187
188 /*
189  * Update the record referred to by cur to the value given
190  * by [bno, len, refcount].
191  * This either works (return 0) or gets an EFSCORRUPTED error.
192  */
193 STATIC int
194 xfs_refcount_update(
195         struct xfs_btree_cur            *cur,
196         struct xfs_refcount_irec        *irec)
197 {
198         union xfs_btree_rec     rec;
199         uint32_t                start;
200         int                     error;
201
202         trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
203
204         start = xfs_refcount_encode_startblock(irec->rc_startblock,
205                         irec->rc_domain);
206         rec.refc.rc_startblock = cpu_to_be32(start);
207         rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
208         rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
209
210         error = xfs_btree_update(cur, &rec);
211         if (error)
212                 trace_xfs_refcount_update_error(cur->bc_mp,
213                                 cur->bc_ag.pag->pag_agno, error, _RET_IP_);
214         return error;
215 }
216
217 /*
218  * Insert the record referred to by cur to the value given
219  * by [bno, len, refcount].
220  * This either works (return 0) or gets an EFSCORRUPTED error.
221  */
222 int
223 xfs_refcount_insert(
224         struct xfs_btree_cur            *cur,
225         struct xfs_refcount_irec        *irec,
226         int                             *i)
227 {
228         int                             error;
229
230         trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
231
232         cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
233         cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
234         cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
235         cur->bc_rec.rc.rc_domain = irec->rc_domain;
236
237         error = xfs_btree_insert(cur, i);
238         if (error)
239                 goto out_error;
240         if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
241                 error = -EFSCORRUPTED;
242                 goto out_error;
243         }
244
245 out_error:
246         if (error)
247                 trace_xfs_refcount_insert_error(cur->bc_mp,
248                                 cur->bc_ag.pag->pag_agno, error, _RET_IP_);
249         return error;
250 }
251
252 /*
253  * Remove the record referred to by cur, then set the pointer to the spot
254  * where the record could be re-inserted, in case we want to increment or
255  * decrement the cursor.
256  * This either works (return 0) or gets an EFSCORRUPTED error.
257  */
258 STATIC int
259 xfs_refcount_delete(
260         struct xfs_btree_cur    *cur,
261         int                     *i)
262 {
263         struct xfs_refcount_irec        irec;
264         int                     found_rec;
265         int                     error;
266
267         error = xfs_refcount_get_rec(cur, &irec, &found_rec);
268         if (error)
269                 goto out_error;
270         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
271                 error = -EFSCORRUPTED;
272                 goto out_error;
273         }
274         trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec);
275         error = xfs_btree_delete(cur, i);
276         if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
277                 error = -EFSCORRUPTED;
278                 goto out_error;
279         }
280         if (error)
281                 goto out_error;
282         error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock,
283                         &found_rec);
284 out_error:
285         if (error)
286                 trace_xfs_refcount_delete_error(cur->bc_mp,
287                                 cur->bc_ag.pag->pag_agno, error, _RET_IP_);
288         return error;
289 }
290
291 /*
292  * Adjusting the Reference Count
293  *
294  * As stated elsewhere, the reference count btree (refcbt) stores
295  * >1 reference counts for extents of physical blocks.  In this
296  * operation, we're either raising or lowering the reference count of
297  * some subrange stored in the tree:
298  *
299  *      <------ adjustment range ------>
300  * ----+   +---+-----+ +--+--------+---------
301  *  2  |   | 3 |  4  | |17|   55   |   10
302  * ----+   +---+-----+ +--+--------+---------
303  * X axis is physical blocks number;
304  * reference counts are the numbers inside the rectangles
305  *
306  * The first thing we need to do is to ensure that there are no
307  * refcount extents crossing either boundary of the range to be
308  * adjusted.  For any extent that does cross a boundary, split it into
309  * two extents so that we can increment the refcount of one of the
310  * pieces later:
311  *
312  *      <------ adjustment range ------>
313  * ----+   +---+-----+ +--+--------+----+----
314  *  2  |   | 3 |  2  | |17|   55   | 10 | 10
315  * ----+   +---+-----+ +--+--------+----+----
316  *
317  * For this next step, let's assume that all the physical blocks in
318  * the adjustment range are mapped to a file and are therefore in use
319  * at least once.  Therefore, we can infer that any gap in the
320  * refcount tree within the adjustment range represents a physical
321  * extent with refcount == 1:
322  *
323  *      <------ adjustment range ------>
324  * ----+---+---+-----+-+--+--------+----+----
325  *  2  |"1"| 3 |  2  |1|17|   55   | 10 | 10
326  * ----+---+---+-----+-+--+--------+----+----
327  *      ^
328  *
329  * For each extent that falls within the interval range, figure out
330  * which extent is to the left or the right of that extent.  Now we
331  * have a left, current, and right extent.  If the new reference count
332  * of the center extent enables us to merge left, center, and right
333  * into one record covering all three, do so.  If the center extent is
334  * at the left end of the range, abuts the left extent, and its new
335  * reference count matches the left extent's record, then merge them.
336  * If the center extent is at the right end of the range, abuts the
337  * right extent, and the reference counts match, merge those.  In the
338  * example, we can left merge (assuming an increment operation):
339  *
340  *      <------ adjustment range ------>
341  * --------+---+-----+-+--+--------+----+----
342  *    2    | 3 |  2  |1|17|   55   | 10 | 10
343  * --------+---+-----+-+--+--------+----+----
344  *          ^
345  *
346  * For all other extents within the range, adjust the reference count
347  * or delete it if the refcount falls below 2.  If we were
348  * incrementing, the end result looks like this:
349  *
350  *      <------ adjustment range ------>
351  * --------+---+-----+-+--+--------+----+----
352  *    2    | 4 |  3  |2|18|   56   | 11 | 10
353  * --------+---+-----+-+--+--------+----+----
354  *
355  * The result of a decrement operation looks as such:
356  *
357  *      <------ adjustment range ------>
358  * ----+   +---+       +--+--------+----+----
359  *  2  |   | 2 |       |16|   54   |  9 | 10
360  * ----+   +---+       +--+--------+----+----
361  *      DDDD    111111DD
362  *
363  * The blocks marked "D" are freed; the blocks marked "1" are only
364  * referenced once and therefore the record is removed from the
365  * refcount btree.
366  */
367
368 /* Next block after this extent. */
369 static inline xfs_agblock_t
370 xfs_refc_next(
371         struct xfs_refcount_irec        *rc)
372 {
373         return rc->rc_startblock + rc->rc_blockcount;
374 }
375
376 /*
377  * Split a refcount extent that crosses agbno.
378  */
379 STATIC int
380 xfs_refcount_split_extent(
381         struct xfs_btree_cur            *cur,
382         enum xfs_refc_domain            domain,
383         xfs_agblock_t                   agbno,
384         bool                            *shape_changed)
385 {
386         struct xfs_refcount_irec        rcext, tmp;
387         int                             found_rec;
388         int                             error;
389
390         *shape_changed = false;
391         error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec);
392         if (error)
393                 goto out_error;
394         if (!found_rec)
395                 return 0;
396
397         error = xfs_refcount_get_rec(cur, &rcext, &found_rec);
398         if (error)
399                 goto out_error;
400         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
401                 error = -EFSCORRUPTED;
402                 goto out_error;
403         }
404         if (rcext.rc_domain != domain)
405                 return 0;
406         if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
407                 return 0;
408
409         *shape_changed = true;
410         trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
411                         &rcext, agbno);
412
413         /* Establish the right extent. */
414         tmp = rcext;
415         tmp.rc_startblock = agbno;
416         tmp.rc_blockcount -= (agbno - rcext.rc_startblock);
417         error = xfs_refcount_update(cur, &tmp);
418         if (error)
419                 goto out_error;
420
421         /* Insert the left extent. */
422         tmp = rcext;
423         tmp.rc_blockcount = agbno - rcext.rc_startblock;
424         error = xfs_refcount_insert(cur, &tmp, &found_rec);
425         if (error)
426                 goto out_error;
427         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
428                 error = -EFSCORRUPTED;
429                 goto out_error;
430         }
431         return error;
432
433 out_error:
434         trace_xfs_refcount_split_extent_error(cur->bc_mp,
435                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
436         return error;
437 }
438
439 /*
440  * Merge the left, center, and right extents.
441  */
442 STATIC int
443 xfs_refcount_merge_center_extents(
444         struct xfs_btree_cur            *cur,
445         struct xfs_refcount_irec        *left,
446         struct xfs_refcount_irec        *center,
447         struct xfs_refcount_irec        *right,
448         unsigned long long              extlen,
449         xfs_extlen_t                    *aglen)
450 {
451         int                             error;
452         int                             found_rec;
453
454         trace_xfs_refcount_merge_center_extents(cur->bc_mp,
455                         cur->bc_ag.pag->pag_agno, left, center, right);
456
457         ASSERT(left->rc_domain == center->rc_domain);
458         ASSERT(right->rc_domain == center->rc_domain);
459
460         /*
461          * Make sure the center and right extents are not in the btree.
462          * If the center extent was synthesized, the first delete call
463          * removes the right extent and we skip the second deletion.
464          * If center and right were in the btree, then the first delete
465          * call removes the center and the second one removes the right
466          * extent.
467          */
468         error = xfs_refcount_lookup_ge(cur, center->rc_domain,
469                         center->rc_startblock, &found_rec);
470         if (error)
471                 goto out_error;
472         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
473                 error = -EFSCORRUPTED;
474                 goto out_error;
475         }
476
477         error = xfs_refcount_delete(cur, &found_rec);
478         if (error)
479                 goto out_error;
480         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
481                 error = -EFSCORRUPTED;
482                 goto out_error;
483         }
484
485         if (center->rc_refcount > 1) {
486                 error = xfs_refcount_delete(cur, &found_rec);
487                 if (error)
488                         goto out_error;
489                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
490                         error = -EFSCORRUPTED;
491                         goto out_error;
492                 }
493         }
494
495         /* Enlarge the left extent. */
496         error = xfs_refcount_lookup_le(cur, left->rc_domain,
497                         left->rc_startblock, &found_rec);
498         if (error)
499                 goto out_error;
500         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
501                 error = -EFSCORRUPTED;
502                 goto out_error;
503         }
504
505         left->rc_blockcount = extlen;
506         error = xfs_refcount_update(cur, left);
507         if (error)
508                 goto out_error;
509
510         *aglen = 0;
511         return error;
512
513 out_error:
514         trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
515                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
516         return error;
517 }
518
519 /*
520  * Merge with the left extent.
521  */
522 STATIC int
523 xfs_refcount_merge_left_extent(
524         struct xfs_btree_cur            *cur,
525         struct xfs_refcount_irec        *left,
526         struct xfs_refcount_irec        *cleft,
527         xfs_agblock_t                   *agbno,
528         xfs_extlen_t                    *aglen)
529 {
530         int                             error;
531         int                             found_rec;
532
533         trace_xfs_refcount_merge_left_extent(cur->bc_mp,
534                         cur->bc_ag.pag->pag_agno, left, cleft);
535
536         ASSERT(left->rc_domain == cleft->rc_domain);
537
538         /* If the extent at agbno (cleft) wasn't synthesized, remove it. */
539         if (cleft->rc_refcount > 1) {
540                 error = xfs_refcount_lookup_le(cur, cleft->rc_domain,
541                                 cleft->rc_startblock, &found_rec);
542                 if (error)
543                         goto out_error;
544                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
545                         error = -EFSCORRUPTED;
546                         goto out_error;
547                 }
548
549                 error = xfs_refcount_delete(cur, &found_rec);
550                 if (error)
551                         goto out_error;
552                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
553                         error = -EFSCORRUPTED;
554                         goto out_error;
555                 }
556         }
557
558         /* Enlarge the left extent. */
559         error = xfs_refcount_lookup_le(cur, left->rc_domain,
560                         left->rc_startblock, &found_rec);
561         if (error)
562                 goto out_error;
563         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
564                 error = -EFSCORRUPTED;
565                 goto out_error;
566         }
567
568         left->rc_blockcount += cleft->rc_blockcount;
569         error = xfs_refcount_update(cur, left);
570         if (error)
571                 goto out_error;
572
573         *agbno += cleft->rc_blockcount;
574         *aglen -= cleft->rc_blockcount;
575         return error;
576
577 out_error:
578         trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
579                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
580         return error;
581 }
582
583 /*
584  * Merge with the right extent.
585  */
586 STATIC int
587 xfs_refcount_merge_right_extent(
588         struct xfs_btree_cur            *cur,
589         struct xfs_refcount_irec        *right,
590         struct xfs_refcount_irec        *cright,
591         xfs_extlen_t                    *aglen)
592 {
593         int                             error;
594         int                             found_rec;
595
596         trace_xfs_refcount_merge_right_extent(cur->bc_mp,
597                         cur->bc_ag.pag->pag_agno, cright, right);
598
599         ASSERT(right->rc_domain == cright->rc_domain);
600
601         /*
602          * If the extent ending at agbno+aglen (cright) wasn't synthesized,
603          * remove it.
604          */
605         if (cright->rc_refcount > 1) {
606                 error = xfs_refcount_lookup_le(cur, cright->rc_domain,
607                                 cright->rc_startblock, &found_rec);
608                 if (error)
609                         goto out_error;
610                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
611                         error = -EFSCORRUPTED;
612                         goto out_error;
613                 }
614
615                 error = xfs_refcount_delete(cur, &found_rec);
616                 if (error)
617                         goto out_error;
618                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
619                         error = -EFSCORRUPTED;
620                         goto out_error;
621                 }
622         }
623
624         /* Enlarge the right extent. */
625         error = xfs_refcount_lookup_le(cur, right->rc_domain,
626                         right->rc_startblock, &found_rec);
627         if (error)
628                 goto out_error;
629         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
630                 error = -EFSCORRUPTED;
631                 goto out_error;
632         }
633
634         right->rc_startblock -= cright->rc_blockcount;
635         right->rc_blockcount += cright->rc_blockcount;
636         error = xfs_refcount_update(cur, right);
637         if (error)
638                 goto out_error;
639
640         *aglen -= cright->rc_blockcount;
641         return error;
642
643 out_error:
644         trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
645                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
646         return error;
647 }
648
649 /*
650  * Find the left extent and the one after it (cleft).  This function assumes
651  * that we've already split any extent crossing agbno.
652  */
653 STATIC int
654 xfs_refcount_find_left_extents(
655         struct xfs_btree_cur            *cur,
656         struct xfs_refcount_irec        *left,
657         struct xfs_refcount_irec        *cleft,
658         enum xfs_refc_domain            domain,
659         xfs_agblock_t                   agbno,
660         xfs_extlen_t                    aglen)
661 {
662         struct xfs_refcount_irec        tmp;
663         int                             error;
664         int                             found_rec;
665
666         left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
667         error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec);
668         if (error)
669                 goto out_error;
670         if (!found_rec)
671                 return 0;
672
673         error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
674         if (error)
675                 goto out_error;
676         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
677                 error = -EFSCORRUPTED;
678                 goto out_error;
679         }
680
681         if (tmp.rc_domain != domain)
682                 return 0;
683         if (xfs_refc_next(&tmp) != agbno)
684                 return 0;
685         /* We have a left extent; retrieve (or invent) the next right one */
686         *left = tmp;
687
688         error = xfs_btree_increment(cur, 0, &found_rec);
689         if (error)
690                 goto out_error;
691         if (found_rec) {
692                 error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
693                 if (error)
694                         goto out_error;
695                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
696                         error = -EFSCORRUPTED;
697                         goto out_error;
698                 }
699
700                 if (tmp.rc_domain != domain)
701                         goto not_found;
702
703                 /* if tmp starts at the end of our range, just use that */
704                 if (tmp.rc_startblock == agbno)
705                         *cleft = tmp;
706                 else {
707                         /*
708                          * There's a gap in the refcntbt at the start of the
709                          * range we're interested in (refcount == 1) so
710                          * synthesize the implied extent and pass it back.
711                          * We assume here that the agbno/aglen range was
712                          * passed in from a data fork extent mapping and
713                          * therefore is allocated to exactly one owner.
714                          */
715                         cleft->rc_startblock = agbno;
716                         cleft->rc_blockcount = min(aglen,
717                                         tmp.rc_startblock - agbno);
718                         cleft->rc_refcount = 1;
719                         cleft->rc_domain = domain;
720                 }
721         } else {
722 not_found:
723                 /*
724                  * No extents, so pretend that there's one covering the whole
725                  * range.
726                  */
727                 cleft->rc_startblock = agbno;
728                 cleft->rc_blockcount = aglen;
729                 cleft->rc_refcount = 1;
730                 cleft->rc_domain = domain;
731         }
732         trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
733                         left, cleft, agbno);
734         return error;
735
736 out_error:
737         trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
738                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
739         return error;
740 }
741
742 /*
743  * Find the right extent and the one before it (cright).  This function
744  * assumes that we've already split any extents crossing agbno + aglen.
745  */
746 STATIC int
747 xfs_refcount_find_right_extents(
748         struct xfs_btree_cur            *cur,
749         struct xfs_refcount_irec        *right,
750         struct xfs_refcount_irec        *cright,
751         enum xfs_refc_domain            domain,
752         xfs_agblock_t                   agbno,
753         xfs_extlen_t                    aglen)
754 {
755         struct xfs_refcount_irec        tmp;
756         int                             error;
757         int                             found_rec;
758
759         right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
760         error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec);
761         if (error)
762                 goto out_error;
763         if (!found_rec)
764                 return 0;
765
766         error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
767         if (error)
768                 goto out_error;
769         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
770                 error = -EFSCORRUPTED;
771                 goto out_error;
772         }
773
774         if (tmp.rc_domain != domain)
775                 return 0;
776         if (tmp.rc_startblock != agbno + aglen)
777                 return 0;
778         /* We have a right extent; retrieve (or invent) the next left one */
779         *right = tmp;
780
781         error = xfs_btree_decrement(cur, 0, &found_rec);
782         if (error)
783                 goto out_error;
784         if (found_rec) {
785                 error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
786                 if (error)
787                         goto out_error;
788                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
789                         error = -EFSCORRUPTED;
790                         goto out_error;
791                 }
792
793                 if (tmp.rc_domain != domain)
794                         goto not_found;
795
796                 /* if tmp ends at the end of our range, just use that */
797                 if (xfs_refc_next(&tmp) == agbno + aglen)
798                         *cright = tmp;
799                 else {
800                         /*
801                          * There's a gap in the refcntbt at the end of the
802                          * range we're interested in (refcount == 1) so
803                          * create the implied extent and pass it back.
804                          * We assume here that the agbno/aglen range was
805                          * passed in from a data fork extent mapping and
806                          * therefore is allocated to exactly one owner.
807                          */
808                         cright->rc_startblock = max(agbno, xfs_refc_next(&tmp));
809                         cright->rc_blockcount = right->rc_startblock -
810                                         cright->rc_startblock;
811                         cright->rc_refcount = 1;
812                         cright->rc_domain = domain;
813                 }
814         } else {
815 not_found:
816                 /*
817                  * No extents, so pretend that there's one covering the whole
818                  * range.
819                  */
820                 cright->rc_startblock = agbno;
821                 cright->rc_blockcount = aglen;
822                 cright->rc_refcount = 1;
823                 cright->rc_domain = domain;
824         }
825         trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
826                         cright, right, agbno + aglen);
827         return error;
828
829 out_error:
830         trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
831                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
832         return error;
833 }
834
835 /* Is this extent valid? */
836 static inline bool
837 xfs_refc_valid(
838         const struct xfs_refcount_irec  *rc)
839 {
840         return rc->rc_startblock != NULLAGBLOCK;
841 }
842
843 static inline xfs_nlink_t
844 xfs_refc_merge_refcount(
845         const struct xfs_refcount_irec  *irec,
846         enum xfs_refc_adjust_op         adjust)
847 {
848         /* Once a record hits MAXREFCOUNT, it is pinned there forever */
849         if (irec->rc_refcount == MAXREFCOUNT)
850                 return MAXREFCOUNT;
851         return irec->rc_refcount + adjust;
852 }
853
854 static inline bool
855 xfs_refc_want_merge_center(
856         const struct xfs_refcount_irec  *left,
857         const struct xfs_refcount_irec  *cleft,
858         const struct xfs_refcount_irec  *cright,
859         const struct xfs_refcount_irec  *right,
860         bool                            cleft_is_cright,
861         enum xfs_refc_adjust_op         adjust,
862         unsigned long long              *ulenp)
863 {
864         unsigned long long              ulen = left->rc_blockcount;
865         xfs_nlink_t                     new_refcount;
866
867         /*
868          * To merge with a center record, both shoulder records must be
869          * adjacent to the record we want to adjust.  This is only true if
870          * find_left and find_right made all four records valid.
871          */
872         if (!xfs_refc_valid(left)  || !xfs_refc_valid(right) ||
873             !xfs_refc_valid(cleft) || !xfs_refc_valid(cright))
874                 return false;
875
876         /* There must only be one record for the entire range. */
877         if (!cleft_is_cright)
878                 return false;
879
880         /* The shoulder record refcounts must match the new refcount. */
881         new_refcount = xfs_refc_merge_refcount(cleft, adjust);
882         if (left->rc_refcount != new_refcount)
883                 return false;
884         if (right->rc_refcount != new_refcount)
885                 return false;
886
887         /*
888          * The new record cannot exceed the max length.  ulen is a ULL as the
889          * individual record block counts can be up to (u32 - 1) in length
890          * hence we need to catch u32 addition overflows here.
891          */
892         ulen += cleft->rc_blockcount + right->rc_blockcount;
893         if (ulen >= MAXREFCEXTLEN)
894                 return false;
895
896         *ulenp = ulen;
897         return true;
898 }
899
900 static inline bool
901 xfs_refc_want_merge_left(
902         const struct xfs_refcount_irec  *left,
903         const struct xfs_refcount_irec  *cleft,
904         enum xfs_refc_adjust_op         adjust)
905 {
906         unsigned long long              ulen = left->rc_blockcount;
907         xfs_nlink_t                     new_refcount;
908
909         /*
910          * For a left merge, the left shoulder record must be adjacent to the
911          * start of the range.  If this is true, find_left made left and cleft
912          * contain valid contents.
913          */
914         if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft))
915                 return false;
916
917         /* Left shoulder record refcount must match the new refcount. */
918         new_refcount = xfs_refc_merge_refcount(cleft, adjust);
919         if (left->rc_refcount != new_refcount)
920                 return false;
921
922         /*
923          * The new record cannot exceed the max length.  ulen is a ULL as the
924          * individual record block counts can be up to (u32 - 1) in length
925          * hence we need to catch u32 addition overflows here.
926          */
927         ulen += cleft->rc_blockcount;
928         if (ulen >= MAXREFCEXTLEN)
929                 return false;
930
931         return true;
932 }
933
934 static inline bool
935 xfs_refc_want_merge_right(
936         const struct xfs_refcount_irec  *cright,
937         const struct xfs_refcount_irec  *right,
938         enum xfs_refc_adjust_op         adjust)
939 {
940         unsigned long long              ulen = right->rc_blockcount;
941         xfs_nlink_t                     new_refcount;
942
943         /*
944          * For a right merge, the right shoulder record must be adjacent to the
945          * end of the range.  If this is true, find_right made cright and right
946          * contain valid contents.
947          */
948         if (!xfs_refc_valid(right) || !xfs_refc_valid(cright))
949                 return false;
950
951         /* Right shoulder record refcount must match the new refcount. */
952         new_refcount = xfs_refc_merge_refcount(cright, adjust);
953         if (right->rc_refcount != new_refcount)
954                 return false;
955
956         /*
957          * The new record cannot exceed the max length.  ulen is a ULL as the
958          * individual record block counts can be up to (u32 - 1) in length
959          * hence we need to catch u32 addition overflows here.
960          */
961         ulen += cright->rc_blockcount;
962         if (ulen >= MAXREFCEXTLEN)
963                 return false;
964
965         return true;
966 }
967
968 /*
969  * Try to merge with any extents on the boundaries of the adjustment range.
970  */
971 STATIC int
972 xfs_refcount_merge_extents(
973         struct xfs_btree_cur    *cur,
974         enum xfs_refc_domain    domain,
975         xfs_agblock_t           *agbno,
976         xfs_extlen_t            *aglen,
977         enum xfs_refc_adjust_op adjust,
978         bool                    *shape_changed)
979 {
980         struct xfs_refcount_irec        left = {0}, cleft = {0};
981         struct xfs_refcount_irec        cright = {0}, right = {0};
982         int                             error;
983         unsigned long long              ulen;
984         bool                            cequal;
985
986         *shape_changed = false;
987         /*
988          * Find the extent just below agbno [left], just above agbno [cleft],
989          * just below (agbno + aglen) [cright], and just above (agbno + aglen)
990          * [right].
991          */
992         error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain,
993                         *agbno, *aglen);
994         if (error)
995                 return error;
996         error = xfs_refcount_find_right_extents(cur, &right, &cright, domain,
997                         *agbno, *aglen);
998         if (error)
999                 return error;
1000
1001         /* No left or right extent to merge; exit. */
1002         if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right))
1003                 return 0;
1004
1005         cequal = (cleft.rc_startblock == cright.rc_startblock) &&
1006                  (cleft.rc_blockcount == cright.rc_blockcount);
1007
1008         /* Try to merge left, cleft, and right.  cleft must == cright. */
1009         if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal,
1010                                 adjust, &ulen)) {
1011                 *shape_changed = true;
1012                 return xfs_refcount_merge_center_extents(cur, &left, &cleft,
1013                                 &right, ulen, aglen);
1014         }
1015
1016         /* Try to merge left and cleft. */
1017         if (xfs_refc_want_merge_left(&left, &cleft, adjust)) {
1018                 *shape_changed = true;
1019                 error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
1020                                 agbno, aglen);
1021                 if (error)
1022                         return error;
1023
1024                 /*
1025                  * If we just merged left + cleft and cleft == cright,
1026                  * we no longer have a cright to merge with right.  We're done.
1027                  */
1028                 if (cequal)
1029                         return 0;
1030         }
1031
1032         /* Try to merge cright and right. */
1033         if (xfs_refc_want_merge_right(&cright, &right, adjust)) {
1034                 *shape_changed = true;
1035                 return xfs_refcount_merge_right_extent(cur, &right, &cright,
1036                                 aglen);
1037         }
1038
1039         return 0;
1040 }
1041
1042 /*
1043  * XXX: This is a pretty hand-wavy estimate.  The penalty for guessing
1044  * true incorrectly is a shutdown FS; the penalty for guessing false
1045  * incorrectly is more transaction rolls than might be necessary.
1046  * Be conservative here.
1047  */
1048 static bool
1049 xfs_refcount_still_have_space(
1050         struct xfs_btree_cur            *cur)
1051 {
1052         unsigned long                   overhead;
1053
1054         /*
1055          * Worst case estimate: full splits of the free space and rmap btrees
1056          * to handle each of the shape changes to the refcount btree.
1057          */
1058         overhead = xfs_allocfree_block_count(cur->bc_mp,
1059                                 cur->bc_ag.refc.shape_changes);
1060         overhead += cur->bc_mp->m_refc_maxlevels;
1061         overhead *= cur->bc_mp->m_sb.sb_blocksize;
1062
1063         /*
1064          * Only allow 2 refcount extent updates per transaction if the
1065          * refcount continue update "error" has been injected.
1066          */
1067         if (cur->bc_ag.refc.nr_ops > 2 &&
1068             XFS_TEST_ERROR(false, cur->bc_mp,
1069                         XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
1070                 return false;
1071
1072         if (cur->bc_ag.refc.nr_ops == 0)
1073                 return true;
1074         else if (overhead > cur->bc_tp->t_log_res)
1075                 return false;
1076         return  cur->bc_tp->t_log_res - overhead >
1077                 cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
1078 }
1079
1080 /*
1081  * Adjust the refcounts of middle extents.  At this point we should have
1082  * split extents that crossed the adjustment range; merged with adjacent
1083  * extents; and updated agbno/aglen to reflect the merges.  Therefore,
1084  * all we have to do is update the extents inside [agbno, agbno + aglen].
1085  */
1086 STATIC int
1087 xfs_refcount_adjust_extents(
1088         struct xfs_btree_cur    *cur,
1089         xfs_agblock_t           *agbno,
1090         xfs_extlen_t            *aglen,
1091         enum xfs_refc_adjust_op adj)
1092 {
1093         struct xfs_refcount_irec        ext, tmp;
1094         int                             error;
1095         int                             found_rec, found_tmp;
1096         xfs_fsblock_t                   fsbno;
1097
1098         /* Merging did all the work already. */
1099         if (*aglen == 0)
1100                 return 0;
1101
1102         error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno,
1103                         &found_rec);
1104         if (error)
1105                 goto out_error;
1106
1107         while (*aglen > 0 && xfs_refcount_still_have_space(cur)) {
1108                 error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1109                 if (error)
1110                         goto out_error;
1111                 if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) {
1112                         ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
1113                         ext.rc_blockcount = 0;
1114                         ext.rc_refcount = 0;
1115                         ext.rc_domain = XFS_REFC_DOMAIN_SHARED;
1116                 }
1117
1118                 /*
1119                  * Deal with a hole in the refcount tree; if a file maps to
1120                  * these blocks and there's no refcountbt record, pretend that
1121                  * there is one with refcount == 1.
1122                  */
1123                 if (ext.rc_startblock != *agbno) {
1124                         tmp.rc_startblock = *agbno;
1125                         tmp.rc_blockcount = min(*aglen,
1126                                         ext.rc_startblock - *agbno);
1127                         tmp.rc_refcount = 1 + adj;
1128                         tmp.rc_domain = XFS_REFC_DOMAIN_SHARED;
1129
1130                         trace_xfs_refcount_modify_extent(cur->bc_mp,
1131                                         cur->bc_ag.pag->pag_agno, &tmp);
1132
1133                         /*
1134                          * Either cover the hole (increment) or
1135                          * delete the range (decrement).
1136                          */
1137                         cur->bc_ag.refc.nr_ops++;
1138                         if (tmp.rc_refcount) {
1139                                 error = xfs_refcount_insert(cur, &tmp,
1140                                                 &found_tmp);
1141                                 if (error)
1142                                         goto out_error;
1143                                 if (XFS_IS_CORRUPT(cur->bc_mp,
1144                                                    found_tmp != 1)) {
1145                                         error = -EFSCORRUPTED;
1146                                         goto out_error;
1147                                 }
1148                         } else {
1149                                 fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
1150                                                 cur->bc_ag.pag->pag_agno,
1151                                                 tmp.rc_startblock);
1152                                 error = xfs_free_extent_later(cur->bc_tp, fsbno,
1153                                                   tmp.rc_blockcount, NULL,
1154                                                   XFS_AG_RESV_NONE, false);
1155                                 if (error)
1156                                         goto out_error;
1157                         }
1158
1159                         (*agbno) += tmp.rc_blockcount;
1160                         (*aglen) -= tmp.rc_blockcount;
1161
1162                         /* Stop if there's nothing left to modify */
1163                         if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
1164                                 break;
1165
1166                         /* Move the cursor to the start of ext. */
1167                         error = xfs_refcount_lookup_ge(cur,
1168                                         XFS_REFC_DOMAIN_SHARED, *agbno,
1169                                         &found_rec);
1170                         if (error)
1171                                 goto out_error;
1172                 }
1173
1174                 /*
1175                  * A previous step trimmed agbno/aglen such that the end of the
1176                  * range would not be in the middle of the record.  If this is
1177                  * no longer the case, something is seriously wrong with the
1178                  * btree.  Make sure we never feed the synthesized record into
1179                  * the processing loop below.
1180                  */
1181                 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) ||
1182                     XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) {
1183                         error = -EFSCORRUPTED;
1184                         goto out_error;
1185                 }
1186
1187                 /*
1188                  * Adjust the reference count and either update the tree
1189                  * (incr) or free the blocks (decr).
1190                  */
1191                 if (ext.rc_refcount == MAXREFCOUNT)
1192                         goto skip;
1193                 ext.rc_refcount += adj;
1194                 trace_xfs_refcount_modify_extent(cur->bc_mp,
1195                                 cur->bc_ag.pag->pag_agno, &ext);
1196                 cur->bc_ag.refc.nr_ops++;
1197                 if (ext.rc_refcount > 1) {
1198                         error = xfs_refcount_update(cur, &ext);
1199                         if (error)
1200                                 goto out_error;
1201                 } else if (ext.rc_refcount == 1) {
1202                         error = xfs_refcount_delete(cur, &found_rec);
1203                         if (error)
1204                                 goto out_error;
1205                         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
1206                                 error = -EFSCORRUPTED;
1207                                 goto out_error;
1208                         }
1209                         goto advloop;
1210                 } else {
1211                         fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
1212                                         cur->bc_ag.pag->pag_agno,
1213                                         ext.rc_startblock);
1214                         error = xfs_free_extent_later(cur->bc_tp, fsbno,
1215                                         ext.rc_blockcount, NULL,
1216                                         XFS_AG_RESV_NONE, false);
1217                         if (error)
1218                                 goto out_error;
1219                 }
1220
1221 skip:
1222                 error = xfs_btree_increment(cur, 0, &found_rec);
1223                 if (error)
1224                         goto out_error;
1225
1226 advloop:
1227                 (*agbno) += ext.rc_blockcount;
1228                 (*aglen) -= ext.rc_blockcount;
1229         }
1230
1231         return error;
1232 out_error:
1233         trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1234                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1235         return error;
1236 }
1237
1238 /* Adjust the reference count of a range of AG blocks. */
1239 STATIC int
1240 xfs_refcount_adjust(
1241         struct xfs_btree_cur    *cur,
1242         xfs_agblock_t           *agbno,
1243         xfs_extlen_t            *aglen,
1244         enum xfs_refc_adjust_op adj)
1245 {
1246         bool                    shape_changed;
1247         int                     shape_changes = 0;
1248         int                     error;
1249
1250         if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
1251                 trace_xfs_refcount_increase(cur->bc_mp,
1252                                 cur->bc_ag.pag->pag_agno, *agbno, *aglen);
1253         else
1254                 trace_xfs_refcount_decrease(cur->bc_mp,
1255                                 cur->bc_ag.pag->pag_agno, *agbno, *aglen);
1256
1257         /*
1258          * Ensure that no rcextents cross the boundary of the adjustment range.
1259          */
1260         error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
1261                         *agbno, &shape_changed);
1262         if (error)
1263                 goto out_error;
1264         if (shape_changed)
1265                 shape_changes++;
1266
1267         error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
1268                         *agbno + *aglen, &shape_changed);
1269         if (error)
1270                 goto out_error;
1271         if (shape_changed)
1272                 shape_changes++;
1273
1274         /*
1275          * Try to merge with the left or right extents of the range.
1276          */
1277         error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED,
1278                         agbno, aglen, adj, &shape_changed);
1279         if (error)
1280                 goto out_error;
1281         if (shape_changed)
1282                 shape_changes++;
1283         if (shape_changes)
1284                 cur->bc_ag.refc.shape_changes++;
1285
1286         /* Now that we've taken care of the ends, adjust the middle extents */
1287         error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj);
1288         if (error)
1289                 goto out_error;
1290
1291         return 0;
1292
1293 out_error:
1294         trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1295                         error, _RET_IP_);
1296         return error;
1297 }
1298
1299 /* Clean up after calling xfs_refcount_finish_one. */
1300 void
1301 xfs_refcount_finish_one_cleanup(
1302         struct xfs_trans        *tp,
1303         struct xfs_btree_cur    *rcur,
1304         int                     error)
1305 {
1306         struct xfs_buf          *agbp;
1307
1308         if (rcur == NULL)
1309                 return;
1310         agbp = rcur->bc_ag.agbp;
1311         xfs_btree_del_cursor(rcur, error);
1312         if (error)
1313                 xfs_trans_brelse(tp, agbp);
1314 }
1315
1316 /*
1317  * Set up a continuation a deferred refcount operation by updating the intent.
1318  * Checks to make sure we're not going to run off the end of the AG.
1319  */
1320 static inline int
1321 xfs_refcount_continue_op(
1322         struct xfs_btree_cur            *cur,
1323         struct xfs_refcount_intent      *ri,
1324         xfs_agblock_t                   new_agbno)
1325 {
1326         struct xfs_mount                *mp = cur->bc_mp;
1327         struct xfs_perag                *pag = cur->bc_ag.pag;
1328
1329         if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno,
1330                                         ri->ri_blockcount)))
1331                 return -EFSCORRUPTED;
1332
1333         ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
1334
1335         ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount));
1336         ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
1337
1338         return 0;
1339 }
1340
1341 /*
1342  * Process one of the deferred refcount operations.  We pass back the
1343  * btree cursor to maintain our lock on the btree between calls.
1344  * This saves time and eliminates a buffer deadlock between the
1345  * superblock and the AGF because we'll always grab them in the same
1346  * order.
1347  */
1348 int
1349 xfs_refcount_finish_one(
1350         struct xfs_trans                *tp,
1351         struct xfs_refcount_intent      *ri,
1352         struct xfs_btree_cur            **pcur)
1353 {
1354         struct xfs_mount                *mp = tp->t_mountp;
1355         struct xfs_btree_cur            *rcur;
1356         struct xfs_buf                  *agbp = NULL;
1357         int                             error = 0;
1358         xfs_agblock_t                   bno;
1359         unsigned long                   nr_ops = 0;
1360         int                             shape_changes = 0;
1361
1362         bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock);
1363
1364         trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock),
1365                         ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock),
1366                         ri->ri_blockcount);
1367
1368         if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
1369                 return -EIO;
1370
1371         /*
1372          * If we haven't gotten a cursor or the cursor AG doesn't match
1373          * the startblock, get one now.
1374          */
1375         rcur = *pcur;
1376         if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
1377                 nr_ops = rcur->bc_ag.refc.nr_ops;
1378                 shape_changes = rcur->bc_ag.refc.shape_changes;
1379                 xfs_refcount_finish_one_cleanup(tp, rcur, 0);
1380                 rcur = NULL;
1381                 *pcur = NULL;
1382         }
1383         if (rcur == NULL) {
1384                 error = xfs_alloc_read_agf(ri->ri_pag, tp,
1385                                 XFS_ALLOC_FLAG_FREEING, &agbp);
1386                 if (error)
1387                         return error;
1388
1389                 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag);
1390                 rcur->bc_ag.refc.nr_ops = nr_ops;
1391                 rcur->bc_ag.refc.shape_changes = shape_changes;
1392         }
1393         *pcur = rcur;
1394
1395         switch (ri->ri_type) {
1396         case XFS_REFCOUNT_INCREASE:
1397                 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
1398                                 XFS_REFCOUNT_ADJUST_INCREASE);
1399                 if (error)
1400                         return error;
1401                 if (ri->ri_blockcount > 0)
1402                         error = xfs_refcount_continue_op(rcur, ri, bno);
1403                 break;
1404         case XFS_REFCOUNT_DECREASE:
1405                 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
1406                                 XFS_REFCOUNT_ADJUST_DECREASE);
1407                 if (error)
1408                         return error;
1409                 if (ri->ri_blockcount > 0)
1410                         error = xfs_refcount_continue_op(rcur, ri, bno);
1411                 break;
1412         case XFS_REFCOUNT_ALLOC_COW:
1413                 error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount);
1414                 if (error)
1415                         return error;
1416                 ri->ri_blockcount = 0;
1417                 break;
1418         case XFS_REFCOUNT_FREE_COW:
1419                 error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount);
1420                 if (error)
1421                         return error;
1422                 ri->ri_blockcount = 0;
1423                 break;
1424         default:
1425                 ASSERT(0);
1426                 return -EFSCORRUPTED;
1427         }
1428         if (!error && ri->ri_blockcount > 0)
1429                 trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno,
1430                                 ri->ri_type, bno, ri->ri_blockcount);
1431         return error;
1432 }
1433
1434 /*
1435  * Record a refcount intent for later processing.
1436  */
1437 static void
1438 __xfs_refcount_add(
1439         struct xfs_trans                *tp,
1440         enum xfs_refcount_intent_type   type,
1441         xfs_fsblock_t                   startblock,
1442         xfs_extlen_t                    blockcount)
1443 {
1444         struct xfs_refcount_intent      *ri;
1445
1446         trace_xfs_refcount_defer(tp->t_mountp,
1447                         XFS_FSB_TO_AGNO(tp->t_mountp, startblock),
1448                         type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
1449                         blockcount);
1450
1451         ri = kmem_cache_alloc(xfs_refcount_intent_cache,
1452                         GFP_NOFS | __GFP_NOFAIL);
1453         INIT_LIST_HEAD(&ri->ri_list);
1454         ri->ri_type = type;
1455         ri->ri_startblock = startblock;
1456         ri->ri_blockcount = blockcount;
1457
1458         xfs_refcount_update_get_group(tp->t_mountp, ri);
1459         xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type);
1460 }
1461
1462 /*
1463  * Increase the reference count of the blocks backing a file's extent.
1464  */
1465 void
1466 xfs_refcount_increase_extent(
1467         struct xfs_trans                *tp,
1468         struct xfs_bmbt_irec            *PREV)
1469 {
1470         if (!xfs_has_reflink(tp->t_mountp))
1471                 return;
1472
1473         __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock,
1474                         PREV->br_blockcount);
1475 }
1476
1477 /*
1478  * Decrease the reference count of the blocks backing a file's extent.
1479  */
1480 void
1481 xfs_refcount_decrease_extent(
1482         struct xfs_trans                *tp,
1483         struct xfs_bmbt_irec            *PREV)
1484 {
1485         if (!xfs_has_reflink(tp->t_mountp))
1486                 return;
1487
1488         __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock,
1489                         PREV->br_blockcount);
1490 }
1491
1492 /*
1493  * Given an AG extent, find the lowest-numbered run of shared blocks
1494  * within that range and return the range in fbno/flen.  If
1495  * find_end_of_shared is set, return the longest contiguous extent of
1496  * shared blocks; if not, just return the first extent we find.  If no
1497  * shared blocks are found, fbno and flen will be set to NULLAGBLOCK
1498  * and 0, respectively.
1499  */
1500 int
1501 xfs_refcount_find_shared(
1502         struct xfs_btree_cur            *cur,
1503         xfs_agblock_t                   agbno,
1504         xfs_extlen_t                    aglen,
1505         xfs_agblock_t                   *fbno,
1506         xfs_extlen_t                    *flen,
1507         bool                            find_end_of_shared)
1508 {
1509         struct xfs_refcount_irec        tmp;
1510         int                             i;
1511         int                             have;
1512         int                             error;
1513
1514         trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1515                         agbno, aglen);
1516
1517         /* By default, skip the whole range */
1518         *fbno = NULLAGBLOCK;
1519         *flen = 0;
1520
1521         /* Try to find a refcount extent that crosses the start */
1522         error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno,
1523                         &have);
1524         if (error)
1525                 goto out_error;
1526         if (!have) {
1527                 /* No left extent, look at the next one */
1528                 error = xfs_btree_increment(cur, 0, &have);
1529                 if (error)
1530                         goto out_error;
1531                 if (!have)
1532                         goto done;
1533         }
1534         error = xfs_refcount_get_rec(cur, &tmp, &i);
1535         if (error)
1536                 goto out_error;
1537         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1538                 error = -EFSCORRUPTED;
1539                 goto out_error;
1540         }
1541         if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
1542                 goto done;
1543
1544         /* If the extent ends before the start, look at the next one */
1545         if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
1546                 error = xfs_btree_increment(cur, 0, &have);
1547                 if (error)
1548                         goto out_error;
1549                 if (!have)
1550                         goto done;
1551                 error = xfs_refcount_get_rec(cur, &tmp, &i);
1552                 if (error)
1553                         goto out_error;
1554                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1555                         error = -EFSCORRUPTED;
1556                         goto out_error;
1557                 }
1558                 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
1559                         goto done;
1560         }
1561
1562         /* If the extent starts after the range we want, bail out */
1563         if (tmp.rc_startblock >= agbno + aglen)
1564                 goto done;
1565
1566         /* We found the start of a shared extent! */
1567         if (tmp.rc_startblock < agbno) {
1568                 tmp.rc_blockcount -= (agbno - tmp.rc_startblock);
1569                 tmp.rc_startblock = agbno;
1570         }
1571
1572         *fbno = tmp.rc_startblock;
1573         *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno);
1574         if (!find_end_of_shared)
1575                 goto done;
1576
1577         /* Otherwise, find the end of this shared extent */
1578         while (*fbno + *flen < agbno + aglen) {
1579                 error = xfs_btree_increment(cur, 0, &have);
1580                 if (error)
1581                         goto out_error;
1582                 if (!have)
1583                         break;
1584                 error = xfs_refcount_get_rec(cur, &tmp, &i);
1585                 if (error)
1586                         goto out_error;
1587                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1588                         error = -EFSCORRUPTED;
1589                         goto out_error;
1590                 }
1591                 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED ||
1592                     tmp.rc_startblock >= agbno + aglen ||
1593                     tmp.rc_startblock != *fbno + *flen)
1594                         break;
1595                 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
1596         }
1597
1598 done:
1599         trace_xfs_refcount_find_shared_result(cur->bc_mp,
1600                         cur->bc_ag.pag->pag_agno, *fbno, *flen);
1601
1602 out_error:
1603         if (error)
1604                 trace_xfs_refcount_find_shared_error(cur->bc_mp,
1605                                 cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1606         return error;
1607 }
1608
1609 /*
1610  * Recovering CoW Blocks After a Crash
1611  *
1612  * Due to the way that the copy on write mechanism works, there's a window of
1613  * opportunity in which we can lose track of allocated blocks during a crash.
1614  * Because CoW uses delayed allocation in the in-core CoW fork, writeback
1615  * causes blocks to be allocated and stored in the CoW fork.  The blocks are
1616  * no longer in the free space btree but are not otherwise recorded anywhere
1617  * until the write completes and the blocks are mapped into the file.  A crash
1618  * in between allocation and remapping results in the replacement blocks being
1619  * lost.  This situation is exacerbated by the CoW extent size hint because
1620  * allocations can hang around for long time.
1621  *
1622  * However, there is a place where we can record these allocations before they
1623  * become mappings -- the reference count btree.  The btree does not record
1624  * extents with refcount == 1, so we can record allocations with a refcount of
1625  * 1.  Blocks being used for CoW writeout cannot be shared, so there should be
1626  * no conflict with shared block records.  These mappings should be created
1627  * when we allocate blocks to the CoW fork and deleted when they're removed
1628  * from the CoW fork.
1629  *
1630  * Minor nit: records for in-progress CoW allocations and records for shared
1631  * extents must never be merged, to preserve the property that (except for CoW
1632  * allocations) there are no refcount btree entries with refcount == 1.  The
1633  * only time this could potentially happen is when unsharing a block that's
1634  * adjacent to CoW allocations, so we must be careful to avoid this.
1635  *
1636  * At mount time we recover lost CoW allocations by searching the refcount
1637  * btree for these refcount == 1 mappings.  These represent CoW allocations
1638  * that were in progress at the time the filesystem went down, so we can free
1639  * them to get the space back.
1640  *
1641  * This mechanism is superior to creating EFIs for unmapped CoW extents for
1642  * several reasons -- first, EFIs pin the tail of the log and would have to be
1643  * periodically relogged to avoid filling up the log.  Second, CoW completions
1644  * will have to file an EFD and create new EFIs for whatever remains in the
1645  * CoW fork; this partially takes care of (1) but extent-size reservations
1646  * will have to periodically relog even if there's no writeout in progress.
1647  * This can happen if the CoW extent size hint is set, which you really want.
1648  * Third, EFIs cannot currently be automatically relogged into newer
1649  * transactions to advance the log tail.  Fourth, stuffing the log full of
1650  * EFIs places an upper bound on the number of CoW allocations that can be
1651  * held filesystem-wide at any given time.  Recording them in the refcount
1652  * btree doesn't require us to maintain any state in memory and doesn't pin
1653  * the log.
1654  */
1655 /*
1656  * Adjust the refcounts of CoW allocations.  These allocations are "magic"
1657  * in that they're not referenced anywhere else in the filesystem, so we
1658  * stash them in the refcount btree with a refcount of 1 until either file
1659  * remapping (or CoW cancellation) happens.
1660  */
1661 STATIC int
1662 xfs_refcount_adjust_cow_extents(
1663         struct xfs_btree_cur    *cur,
1664         xfs_agblock_t           agbno,
1665         xfs_extlen_t            aglen,
1666         enum xfs_refc_adjust_op adj)
1667 {
1668         struct xfs_refcount_irec        ext, tmp;
1669         int                             error;
1670         int                             found_rec, found_tmp;
1671
1672         if (aglen == 0)
1673                 return 0;
1674
1675         /* Find any overlapping refcount records */
1676         error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno,
1677                         &found_rec);
1678         if (error)
1679                 goto out_error;
1680         error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1681         if (error)
1682                 goto out_error;
1683         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec &&
1684                                 ext.rc_domain != XFS_REFC_DOMAIN_COW)) {
1685                 error = -EFSCORRUPTED;
1686                 goto out_error;
1687         }
1688         if (!found_rec) {
1689                 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
1690                 ext.rc_blockcount = 0;
1691                 ext.rc_refcount = 0;
1692                 ext.rc_domain = XFS_REFC_DOMAIN_COW;
1693         }
1694
1695         switch (adj) {
1696         case XFS_REFCOUNT_ADJUST_COW_ALLOC:
1697                 /* Adding a CoW reservation, there should be nothing here. */
1698                 if (XFS_IS_CORRUPT(cur->bc_mp,
1699                                    agbno + aglen > ext.rc_startblock)) {
1700                         error = -EFSCORRUPTED;
1701                         goto out_error;
1702                 }
1703
1704                 tmp.rc_startblock = agbno;
1705                 tmp.rc_blockcount = aglen;
1706                 tmp.rc_refcount = 1;
1707                 tmp.rc_domain = XFS_REFC_DOMAIN_COW;
1708
1709                 trace_xfs_refcount_modify_extent(cur->bc_mp,
1710                                 cur->bc_ag.pag->pag_agno, &tmp);
1711
1712                 error = xfs_refcount_insert(cur, &tmp,
1713                                 &found_tmp);
1714                 if (error)
1715                         goto out_error;
1716                 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) {
1717                         error = -EFSCORRUPTED;
1718                         goto out_error;
1719                 }
1720                 break;
1721         case XFS_REFCOUNT_ADJUST_COW_FREE:
1722                 /* Removing a CoW reservation, there should be one extent. */
1723                 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) {
1724                         error = -EFSCORRUPTED;
1725                         goto out_error;
1726                 }
1727                 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) {
1728                         error = -EFSCORRUPTED;
1729                         goto out_error;
1730                 }
1731                 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) {
1732                         error = -EFSCORRUPTED;
1733                         goto out_error;
1734                 }
1735
1736                 ext.rc_refcount = 0;
1737                 trace_xfs_refcount_modify_extent(cur->bc_mp,
1738                                 cur->bc_ag.pag->pag_agno, &ext);
1739                 error = xfs_refcount_delete(cur, &found_rec);
1740                 if (error)
1741                         goto out_error;
1742                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
1743                         error = -EFSCORRUPTED;
1744                         goto out_error;
1745                 }
1746                 break;
1747         default:
1748                 ASSERT(0);
1749         }
1750
1751         return error;
1752 out_error:
1753         trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1754                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1755         return error;
1756 }
1757
1758 /*
1759  * Add or remove refcount btree entries for CoW reservations.
1760  */
1761 STATIC int
1762 xfs_refcount_adjust_cow(
1763         struct xfs_btree_cur    *cur,
1764         xfs_agblock_t           agbno,
1765         xfs_extlen_t            aglen,
1766         enum xfs_refc_adjust_op adj)
1767 {
1768         bool                    shape_changed;
1769         int                     error;
1770
1771         /*
1772          * Ensure that no rcextents cross the boundary of the adjustment range.
1773          */
1774         error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
1775                         agbno, &shape_changed);
1776         if (error)
1777                 goto out_error;
1778
1779         error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
1780                         agbno + aglen, &shape_changed);
1781         if (error)
1782                 goto out_error;
1783
1784         /*
1785          * Try to merge with the left or right extents of the range.
1786          */
1787         error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno,
1788                         &aglen, adj, &shape_changed);
1789         if (error)
1790                 goto out_error;
1791
1792         /* Now that we've taken care of the ends, adjust the middle extents */
1793         error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj);
1794         if (error)
1795                 goto out_error;
1796
1797         return 0;
1798
1799 out_error:
1800         trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1801                         error, _RET_IP_);
1802         return error;
1803 }
1804
1805 /*
1806  * Record a CoW allocation in the refcount btree.
1807  */
1808 STATIC int
1809 __xfs_refcount_cow_alloc(
1810         struct xfs_btree_cur    *rcur,
1811         xfs_agblock_t           agbno,
1812         xfs_extlen_t            aglen)
1813 {
1814         trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
1815                         agbno, aglen);
1816
1817         /* Add refcount btree reservation */
1818         return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1819                         XFS_REFCOUNT_ADJUST_COW_ALLOC);
1820 }
1821
1822 /*
1823  * Remove a CoW allocation from the refcount btree.
1824  */
1825 STATIC int
1826 __xfs_refcount_cow_free(
1827         struct xfs_btree_cur    *rcur,
1828         xfs_agblock_t           agbno,
1829         xfs_extlen_t            aglen)
1830 {
1831         trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
1832                         agbno, aglen);
1833
1834         /* Remove refcount btree reservation */
1835         return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1836                         XFS_REFCOUNT_ADJUST_COW_FREE);
1837 }
1838
1839 /* Record a CoW staging extent in the refcount btree. */
1840 void
1841 xfs_refcount_alloc_cow_extent(
1842         struct xfs_trans                *tp,
1843         xfs_fsblock_t                   fsb,
1844         xfs_extlen_t                    len)
1845 {
1846         struct xfs_mount                *mp = tp->t_mountp;
1847
1848         if (!xfs_has_reflink(mp))
1849                 return;
1850
1851         __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len);
1852
1853         /* Add rmap entry */
1854         xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
1855                         XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1856 }
1857
1858 /* Forget a CoW staging event in the refcount btree. */
1859 void
1860 xfs_refcount_free_cow_extent(
1861         struct xfs_trans                *tp,
1862         xfs_fsblock_t                   fsb,
1863         xfs_extlen_t                    len)
1864 {
1865         struct xfs_mount                *mp = tp->t_mountp;
1866
1867         if (!xfs_has_reflink(mp))
1868                 return;
1869
1870         /* Remove rmap entry */
1871         xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
1872                         XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1873         __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len);
1874 }
1875
1876 struct xfs_refcount_recovery {
1877         struct list_head                rr_list;
1878         struct xfs_refcount_irec        rr_rrec;
1879 };
1880
1881 /* Stuff an extent on the recovery list. */
1882 STATIC int
1883 xfs_refcount_recover_extent(
1884         struct xfs_btree_cur            *cur,
1885         const union xfs_btree_rec       *rec,
1886         void                            *priv)
1887 {
1888         struct list_head                *debris = priv;
1889         struct xfs_refcount_recovery    *rr;
1890
1891         if (XFS_IS_CORRUPT(cur->bc_mp,
1892                            be32_to_cpu(rec->refc.rc_refcount) != 1))
1893                 return -EFSCORRUPTED;
1894
1895         rr = kmalloc(sizeof(struct xfs_refcount_recovery),
1896                         GFP_KERNEL | __GFP_NOFAIL);
1897         INIT_LIST_HEAD(&rr->rr_list);
1898         xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
1899
1900         if (xfs_refcount_check_irec(cur->bc_ag.pag, &rr->rr_rrec) != NULL ||
1901             XFS_IS_CORRUPT(cur->bc_mp,
1902                            rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
1903                 kfree(rr);
1904                 return -EFSCORRUPTED;
1905         }
1906
1907         list_add_tail(&rr->rr_list, debris);
1908         return 0;
1909 }
1910
1911 /* Find and remove leftover CoW reservations. */
1912 int
1913 xfs_refcount_recover_cow_leftovers(
1914         struct xfs_mount                *mp,
1915         struct xfs_perag                *pag)
1916 {
1917         struct xfs_trans                *tp;
1918         struct xfs_btree_cur            *cur;
1919         struct xfs_buf                  *agbp;
1920         struct xfs_refcount_recovery    *rr, *n;
1921         struct list_head                debris;
1922         union xfs_btree_irec            low = {
1923                 .rc.rc_domain           = XFS_REFC_DOMAIN_COW,
1924         };
1925         union xfs_btree_irec            high = {
1926                 .rc.rc_domain           = XFS_REFC_DOMAIN_COW,
1927                 .rc.rc_startblock       = -1U,
1928         };
1929         xfs_fsblock_t                   fsb;
1930         int                             error;
1931
1932         /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */
1933         BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG);
1934         if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS)
1935                 return -EOPNOTSUPP;
1936
1937         INIT_LIST_HEAD(&debris);
1938
1939         /*
1940          * In this first part, we use an empty transaction to gather up
1941          * all the leftover CoW extents so that we can subsequently
1942          * delete them.  The empty transaction is used to avoid
1943          * a buffer lock deadlock if there happens to be a loop in the
1944          * refcountbt because we're allowed to re-grab a buffer that is
1945          * already attached to our transaction.  When we're done
1946          * recording the CoW debris we cancel the (empty) transaction
1947          * and everything goes away cleanly.
1948          */
1949         error = xfs_trans_alloc_empty(mp, &tp);
1950         if (error)
1951                 return error;
1952
1953         error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
1954         if (error)
1955                 goto out_trans;
1956         cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
1957
1958         /* Find all the leftover CoW staging extents. */
1959         error = xfs_btree_query_range(cur, &low, &high,
1960                         xfs_refcount_recover_extent, &debris);
1961         xfs_btree_del_cursor(cur, error);
1962         xfs_trans_brelse(tp, agbp);
1963         xfs_trans_cancel(tp);
1964         if (error)
1965                 goto out_free;
1966
1967         /* Now iterate the list to free the leftovers */
1968         list_for_each_entry_safe(rr, n, &debris, rr_list) {
1969                 /* Set up transaction. */
1970                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
1971                 if (error)
1972                         goto out_free;
1973
1974                 trace_xfs_refcount_recover_extent(mp, pag->pag_agno,
1975                                 &rr->rr_rrec);
1976
1977                 /* Free the orphan record */
1978                 fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno,
1979                                 rr->rr_rrec.rc_startblock);
1980                 xfs_refcount_free_cow_extent(tp, fsb,
1981                                 rr->rr_rrec.rc_blockcount);
1982
1983                 /* Free the block. */
1984                 error = xfs_free_extent_later(tp, fsb,
1985                                 rr->rr_rrec.rc_blockcount, NULL,
1986                                 XFS_AG_RESV_NONE, false);
1987                 if (error)
1988                         goto out_trans;
1989
1990                 error = xfs_trans_commit(tp);
1991                 if (error)
1992                         goto out_free;
1993
1994                 list_del(&rr->rr_list);
1995                 kfree(rr);
1996         }
1997
1998         return error;
1999 out_trans:
2000         xfs_trans_cancel(tp);
2001 out_free:
2002         /* Free the leftover list */
2003         list_for_each_entry_safe(rr, n, &debris, rr_list) {
2004                 list_del(&rr->rr_list);
2005                 kfree(rr);
2006         }
2007         return error;
2008 }
2009
2010 /*
2011  * Scan part of the keyspace of the refcount records and tell us if the area
2012  * has no records, is fully mapped by records, or is partially filled.
2013  */
2014 int
2015 xfs_refcount_has_records(
2016         struct xfs_btree_cur    *cur,
2017         enum xfs_refc_domain    domain,
2018         xfs_agblock_t           bno,
2019         xfs_extlen_t            len,
2020         enum xbtree_recpacking  *outcome)
2021 {
2022         union xfs_btree_irec    low;
2023         union xfs_btree_irec    high;
2024
2025         memset(&low, 0, sizeof(low));
2026         low.rc.rc_startblock = bno;
2027         memset(&high, 0xFF, sizeof(high));
2028         high.rc.rc_startblock = bno + len - 1;
2029         low.rc.rc_domain = high.rc.rc_domain = domain;
2030
2031         return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
2032 }
2033
2034 struct xfs_refcount_query_range_info {
2035         xfs_refcount_query_range_fn     fn;
2036         void                            *priv;
2037 };
2038
2039 /* Format btree record and pass to our callback. */
2040 STATIC int
2041 xfs_refcount_query_range_helper(
2042         struct xfs_btree_cur            *cur,
2043         const union xfs_btree_rec       *rec,
2044         void                            *priv)
2045 {
2046         struct xfs_refcount_query_range_info    *query = priv;
2047         struct xfs_refcount_irec        irec;
2048         xfs_failaddr_t                  fa;
2049
2050         xfs_refcount_btrec_to_irec(rec, &irec);
2051         fa = xfs_refcount_check_irec(cur->bc_ag.pag, &irec);
2052         if (fa)
2053                 return xfs_refcount_complain_bad_rec(cur, fa, &irec);
2054
2055         return query->fn(cur, &irec, query->priv);
2056 }
2057
2058 /* Find all refcount records between two keys. */
2059 int
2060 xfs_refcount_query_range(
2061         struct xfs_btree_cur            *cur,
2062         const struct xfs_refcount_irec  *low_rec,
2063         const struct xfs_refcount_irec  *high_rec,
2064         xfs_refcount_query_range_fn     fn,
2065         void                            *priv)
2066 {
2067         union xfs_btree_irec            low_brec = { .rc = *low_rec };
2068         union xfs_btree_irec            high_brec = { .rc = *high_rec };
2069         struct xfs_refcount_query_range_info query = { .priv = priv, .fn = fn };
2070
2071         return xfs_btree_query_range(cur, &low_brec, &high_brec,
2072                         xfs_refcount_query_range_helper, &query);
2073 }
2074
2075 int __init
2076 xfs_refcount_intent_init_cache(void)
2077 {
2078         xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent",
2079                         sizeof(struct xfs_refcount_intent),
2080                         0, 0, NULL);
2081
2082         return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM;
2083 }
2084
2085 void
2086 xfs_refcount_intent_destroy_cache(void)
2087 {
2088         kmem_cache_destroy(xfs_refcount_intent_cache);
2089         xfs_refcount_intent_cache = NULL;
2090 }