Mention branches and keyring.
[releases.git] / libxfs / xfs_refcount.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2016 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_defer.h"
14 #include "xfs_btree.h"
15 #include "xfs_bmap.h"
16 #include "xfs_refcount_btree.h"
17 #include "xfs_alloc.h"
18 #include "xfs_errortag.h"
19 #include "xfs_error.h"
20 #include "xfs_trace.h"
21 #include "xfs_trans.h"
22 #include "xfs_bit.h"
23 #include "xfs_refcount.h"
24 #include "xfs_rmap.h"
25 #include "xfs_ag.h"
26
27 struct kmem_cache       *xfs_refcount_intent_cache;
28
29 /* Allowable refcount adjustment amounts. */
30 enum xfs_refc_adjust_op {
31         XFS_REFCOUNT_ADJUST_INCREASE    = 1,
32         XFS_REFCOUNT_ADJUST_DECREASE    = -1,
33         XFS_REFCOUNT_ADJUST_COW_ALLOC   = 0,
34         XFS_REFCOUNT_ADJUST_COW_FREE    = -1,
35 };
36
37 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur,
38                 xfs_agblock_t agbno, xfs_extlen_t aglen);
39 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
40                 xfs_agblock_t agbno, xfs_extlen_t aglen);
41
42 /*
43  * Look up the first record less than or equal to [bno, len] in the btree
44  * given by cur.
45  */
46 int
47 xfs_refcount_lookup_le(
48         struct xfs_btree_cur    *cur,
49         xfs_agblock_t           bno,
50         int                     *stat)
51 {
52         trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
53                         XFS_LOOKUP_LE);
54         cur->bc_rec.rc.rc_startblock = bno;
55         cur->bc_rec.rc.rc_blockcount = 0;
56         return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
57 }
58
59 /*
60  * Look up the first record greater than or equal to [bno, len] in the btree
61  * given by cur.
62  */
63 int
64 xfs_refcount_lookup_ge(
65         struct xfs_btree_cur    *cur,
66         xfs_agblock_t           bno,
67         int                     *stat)
68 {
69         trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
70                         XFS_LOOKUP_GE);
71         cur->bc_rec.rc.rc_startblock = bno;
72         cur->bc_rec.rc.rc_blockcount = 0;
73         return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
74 }
75
76 /*
77  * Look up the first record equal to [bno, len] in the btree
78  * given by cur.
79  */
80 int
81 xfs_refcount_lookup_eq(
82         struct xfs_btree_cur    *cur,
83         xfs_agblock_t           bno,
84         int                     *stat)
85 {
86         trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
87                         XFS_LOOKUP_LE);
88         cur->bc_rec.rc.rc_startblock = bno;
89         cur->bc_rec.rc.rc_blockcount = 0;
90         return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
91 }
92
93 /* Convert on-disk record to in-core format. */
94 void
95 xfs_refcount_btrec_to_irec(
96         const union xfs_btree_rec       *rec,
97         struct xfs_refcount_irec        *irec)
98 {
99         irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
100         irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
101         irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
102 }
103
104 /*
105  * Get the data from the pointed-to record.
106  */
107 int
108 xfs_refcount_get_rec(
109         struct xfs_btree_cur            *cur,
110         struct xfs_refcount_irec        *irec,
111         int                             *stat)
112 {
113         struct xfs_mount                *mp = cur->bc_mp;
114         struct xfs_perag                *pag = cur->bc_ag.pag;
115         union xfs_btree_rec             *rec;
116         int                             error;
117         xfs_agblock_t                   realstart;
118
119         error = xfs_btree_get_rec(cur, &rec, stat);
120         if (error || !*stat)
121                 return error;
122
123         xfs_refcount_btrec_to_irec(rec, irec);
124         if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
125                 goto out_bad_rec;
126
127         /* handle special COW-staging state */
128         realstart = irec->rc_startblock;
129         if (realstart & XFS_REFC_COW_START) {
130                 if (irec->rc_refcount != 1)
131                         goto out_bad_rec;
132                 realstart &= ~XFS_REFC_COW_START;
133         } else if (irec->rc_refcount < 2) {
134                 goto out_bad_rec;
135         }
136
137         /* check for valid extent range, including overflow */
138         if (!xfs_verify_agbno(pag, realstart))
139                 goto out_bad_rec;
140         if (realstart > realstart + irec->rc_blockcount)
141                 goto out_bad_rec;
142         if (!xfs_verify_agbno(pag, realstart + irec->rc_blockcount - 1))
143                 goto out_bad_rec;
144
145         if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
146                 goto out_bad_rec;
147
148         trace_xfs_refcount_get(cur->bc_mp, pag->pag_agno, irec);
149         return 0;
150
151 out_bad_rec:
152         xfs_warn(mp,
153                 "Refcount BTree record corruption in AG %d detected!",
154                 pag->pag_agno);
155         xfs_warn(mp,
156                 "Start block 0x%x, block count 0x%x, references 0x%x",
157                 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
158         return -EFSCORRUPTED;
159 }
160
161 /*
162  * Update the record referred to by cur to the value given
163  * by [bno, len, refcount].
164  * This either works (return 0) or gets an EFSCORRUPTED error.
165  */
166 STATIC int
167 xfs_refcount_update(
168         struct xfs_btree_cur            *cur,
169         struct xfs_refcount_irec        *irec)
170 {
171         union xfs_btree_rec     rec;
172         int                     error;
173
174         trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
175         rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock);
176         rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
177         rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
178         error = xfs_btree_update(cur, &rec);
179         if (error)
180                 trace_xfs_refcount_update_error(cur->bc_mp,
181                                 cur->bc_ag.pag->pag_agno, error, _RET_IP_);
182         return error;
183 }
184
185 /*
186  * Insert the record referred to by cur to the value given
187  * by [bno, len, refcount].
188  * This either works (return 0) or gets an EFSCORRUPTED error.
189  */
190 int
191 xfs_refcount_insert(
192         struct xfs_btree_cur            *cur,
193         struct xfs_refcount_irec        *irec,
194         int                             *i)
195 {
196         int                             error;
197
198         trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
199         cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
200         cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
201         cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
202         error = xfs_btree_insert(cur, i);
203         if (error)
204                 goto out_error;
205         if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
206                 error = -EFSCORRUPTED;
207                 goto out_error;
208         }
209
210 out_error:
211         if (error)
212                 trace_xfs_refcount_insert_error(cur->bc_mp,
213                                 cur->bc_ag.pag->pag_agno, error, _RET_IP_);
214         return error;
215 }
216
217 /*
218  * Remove the record referred to by cur, then set the pointer to the spot
219  * where the record could be re-inserted, in case we want to increment or
220  * decrement the cursor.
221  * This either works (return 0) or gets an EFSCORRUPTED error.
222  */
223 STATIC int
224 xfs_refcount_delete(
225         struct xfs_btree_cur    *cur,
226         int                     *i)
227 {
228         struct xfs_refcount_irec        irec;
229         int                     found_rec;
230         int                     error;
231
232         error = xfs_refcount_get_rec(cur, &irec, &found_rec);
233         if (error)
234                 goto out_error;
235         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
236                 error = -EFSCORRUPTED;
237                 goto out_error;
238         }
239         trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec);
240         error = xfs_btree_delete(cur, i);
241         if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
242                 error = -EFSCORRUPTED;
243                 goto out_error;
244         }
245         if (error)
246                 goto out_error;
247         error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec);
248 out_error:
249         if (error)
250                 trace_xfs_refcount_delete_error(cur->bc_mp,
251                                 cur->bc_ag.pag->pag_agno, error, _RET_IP_);
252         return error;
253 }
254
255 /*
256  * Adjusting the Reference Count
257  *
258  * As stated elsewhere, the reference count btree (refcbt) stores
259  * >1 reference counts for extents of physical blocks.  In this
260  * operation, we're either raising or lowering the reference count of
261  * some subrange stored in the tree:
262  *
263  *      <------ adjustment range ------>
264  * ----+   +---+-----+ +--+--------+---------
265  *  2  |   | 3 |  4  | |17|   55   |   10
266  * ----+   +---+-----+ +--+--------+---------
267  * X axis is physical blocks number;
268  * reference counts are the numbers inside the rectangles
269  *
270  * The first thing we need to do is to ensure that there are no
271  * refcount extents crossing either boundary of the range to be
272  * adjusted.  For any extent that does cross a boundary, split it into
273  * two extents so that we can increment the refcount of one of the
274  * pieces later:
275  *
276  *      <------ adjustment range ------>
277  * ----+   +---+-----+ +--+--------+----+----
278  *  2  |   | 3 |  2  | |17|   55   | 10 | 10
279  * ----+   +---+-----+ +--+--------+----+----
280  *
281  * For this next step, let's assume that all the physical blocks in
282  * the adjustment range are mapped to a file and are therefore in use
283  * at least once.  Therefore, we can infer that any gap in the
284  * refcount tree within the adjustment range represents a physical
285  * extent with refcount == 1:
286  *
287  *      <------ adjustment range ------>
288  * ----+---+---+-----+-+--+--------+----+----
289  *  2  |"1"| 3 |  2  |1|17|   55   | 10 | 10
290  * ----+---+---+-----+-+--+--------+----+----
291  *      ^
292  *
293  * For each extent that falls within the interval range, figure out
294  * which extent is to the left or the right of that extent.  Now we
295  * have a left, current, and right extent.  If the new reference count
296  * of the center extent enables us to merge left, center, and right
297  * into one record covering all three, do so.  If the center extent is
298  * at the left end of the range, abuts the left extent, and its new
299  * reference count matches the left extent's record, then merge them.
300  * If the center extent is at the right end of the range, abuts the
301  * right extent, and the reference counts match, merge those.  In the
302  * example, we can left merge (assuming an increment operation):
303  *
304  *      <------ adjustment range ------>
305  * --------+---+-----+-+--+--------+----+----
306  *    2    | 3 |  2  |1|17|   55   | 10 | 10
307  * --------+---+-----+-+--+--------+----+----
308  *          ^
309  *
310  * For all other extents within the range, adjust the reference count
311  * or delete it if the refcount falls below 2.  If we were
312  * incrementing, the end result looks like this:
313  *
314  *      <------ adjustment range ------>
315  * --------+---+-----+-+--+--------+----+----
316  *    2    | 4 |  3  |2|18|   56   | 11 | 10
317  * --------+---+-----+-+--+--------+----+----
318  *
319  * The result of a decrement operation looks as such:
320  *
321  *      <------ adjustment range ------>
322  * ----+   +---+       +--+--------+----+----
323  *  2  |   | 2 |       |16|   54   |  9 | 10
324  * ----+   +---+       +--+--------+----+----
325  *      DDDD    111111DD
326  *
327  * The blocks marked "D" are freed; the blocks marked "1" are only
328  * referenced once and therefore the record is removed from the
329  * refcount btree.
330  */
331
332 /* Next block after this extent. */
333 static inline xfs_agblock_t
334 xfs_refc_next(
335         struct xfs_refcount_irec        *rc)
336 {
337         return rc->rc_startblock + rc->rc_blockcount;
338 }
339
340 /*
341  * Split a refcount extent that crosses agbno.
342  */
343 STATIC int
344 xfs_refcount_split_extent(
345         struct xfs_btree_cur            *cur,
346         xfs_agblock_t                   agbno,
347         bool                            *shape_changed)
348 {
349         struct xfs_refcount_irec        rcext, tmp;
350         int                             found_rec;
351         int                             error;
352
353         *shape_changed = false;
354         error = xfs_refcount_lookup_le(cur, agbno, &found_rec);
355         if (error)
356                 goto out_error;
357         if (!found_rec)
358                 return 0;
359
360         error = xfs_refcount_get_rec(cur, &rcext, &found_rec);
361         if (error)
362                 goto out_error;
363         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
364                 error = -EFSCORRUPTED;
365                 goto out_error;
366         }
367         if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
368                 return 0;
369
370         *shape_changed = true;
371         trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
372                         &rcext, agbno);
373
374         /* Establish the right extent. */
375         tmp = rcext;
376         tmp.rc_startblock = agbno;
377         tmp.rc_blockcount -= (agbno - rcext.rc_startblock);
378         error = xfs_refcount_update(cur, &tmp);
379         if (error)
380                 goto out_error;
381
382         /* Insert the left extent. */
383         tmp = rcext;
384         tmp.rc_blockcount = agbno - rcext.rc_startblock;
385         error = xfs_refcount_insert(cur, &tmp, &found_rec);
386         if (error)
387                 goto out_error;
388         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
389                 error = -EFSCORRUPTED;
390                 goto out_error;
391         }
392         return error;
393
394 out_error:
395         trace_xfs_refcount_split_extent_error(cur->bc_mp,
396                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
397         return error;
398 }
399
400 /*
401  * Merge the left, center, and right extents.
402  */
403 STATIC int
404 xfs_refcount_merge_center_extents(
405         struct xfs_btree_cur            *cur,
406         struct xfs_refcount_irec        *left,
407         struct xfs_refcount_irec        *center,
408         struct xfs_refcount_irec        *right,
409         unsigned long long              extlen,
410         xfs_extlen_t                    *aglen)
411 {
412         int                             error;
413         int                             found_rec;
414
415         trace_xfs_refcount_merge_center_extents(cur->bc_mp,
416                         cur->bc_ag.pag->pag_agno, left, center, right);
417
418         /*
419          * Make sure the center and right extents are not in the btree.
420          * If the center extent was synthesized, the first delete call
421          * removes the right extent and we skip the second deletion.
422          * If center and right were in the btree, then the first delete
423          * call removes the center and the second one removes the right
424          * extent.
425          */
426         error = xfs_refcount_lookup_ge(cur, center->rc_startblock,
427                         &found_rec);
428         if (error)
429                 goto out_error;
430         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
431                 error = -EFSCORRUPTED;
432                 goto out_error;
433         }
434
435         error = xfs_refcount_delete(cur, &found_rec);
436         if (error)
437                 goto out_error;
438         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
439                 error = -EFSCORRUPTED;
440                 goto out_error;
441         }
442
443         if (center->rc_refcount > 1) {
444                 error = xfs_refcount_delete(cur, &found_rec);
445                 if (error)
446                         goto out_error;
447                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
448                         error = -EFSCORRUPTED;
449                         goto out_error;
450                 }
451         }
452
453         /* Enlarge the left extent. */
454         error = xfs_refcount_lookup_le(cur, left->rc_startblock,
455                         &found_rec);
456         if (error)
457                 goto out_error;
458         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
459                 error = -EFSCORRUPTED;
460                 goto out_error;
461         }
462
463         left->rc_blockcount = extlen;
464         error = xfs_refcount_update(cur, left);
465         if (error)
466                 goto out_error;
467
468         *aglen = 0;
469         return error;
470
471 out_error:
472         trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
473                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
474         return error;
475 }
476
477 /*
478  * Merge with the left extent.
479  */
480 STATIC int
481 xfs_refcount_merge_left_extent(
482         struct xfs_btree_cur            *cur,
483         struct xfs_refcount_irec        *left,
484         struct xfs_refcount_irec        *cleft,
485         xfs_agblock_t                   *agbno,
486         xfs_extlen_t                    *aglen)
487 {
488         int                             error;
489         int                             found_rec;
490
491         trace_xfs_refcount_merge_left_extent(cur->bc_mp,
492                         cur->bc_ag.pag->pag_agno, left, cleft);
493
494         /* If the extent at agbno (cleft) wasn't synthesized, remove it. */
495         if (cleft->rc_refcount > 1) {
496                 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock,
497                                 &found_rec);
498                 if (error)
499                         goto out_error;
500                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
501                         error = -EFSCORRUPTED;
502                         goto out_error;
503                 }
504
505                 error = xfs_refcount_delete(cur, &found_rec);
506                 if (error)
507                         goto out_error;
508                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
509                         error = -EFSCORRUPTED;
510                         goto out_error;
511                 }
512         }
513
514         /* Enlarge the left extent. */
515         error = xfs_refcount_lookup_le(cur, left->rc_startblock,
516                         &found_rec);
517         if (error)
518                 goto out_error;
519         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
520                 error = -EFSCORRUPTED;
521                 goto out_error;
522         }
523
524         left->rc_blockcount += cleft->rc_blockcount;
525         error = xfs_refcount_update(cur, left);
526         if (error)
527                 goto out_error;
528
529         *agbno += cleft->rc_blockcount;
530         *aglen -= cleft->rc_blockcount;
531         return error;
532
533 out_error:
534         trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
535                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
536         return error;
537 }
538
539 /*
540  * Merge with the right extent.
541  */
542 STATIC int
543 xfs_refcount_merge_right_extent(
544         struct xfs_btree_cur            *cur,
545         struct xfs_refcount_irec        *right,
546         struct xfs_refcount_irec        *cright,
547         xfs_extlen_t                    *aglen)
548 {
549         int                             error;
550         int                             found_rec;
551
552         trace_xfs_refcount_merge_right_extent(cur->bc_mp,
553                         cur->bc_ag.pag->pag_agno, cright, right);
554
555         /*
556          * If the extent ending at agbno+aglen (cright) wasn't synthesized,
557          * remove it.
558          */
559         if (cright->rc_refcount > 1) {
560                 error = xfs_refcount_lookup_le(cur, cright->rc_startblock,
561                         &found_rec);
562                 if (error)
563                         goto out_error;
564                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
565                         error = -EFSCORRUPTED;
566                         goto out_error;
567                 }
568
569                 error = xfs_refcount_delete(cur, &found_rec);
570                 if (error)
571                         goto out_error;
572                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
573                         error = -EFSCORRUPTED;
574                         goto out_error;
575                 }
576         }
577
578         /* Enlarge the right extent. */
579         error = xfs_refcount_lookup_le(cur, right->rc_startblock,
580                         &found_rec);
581         if (error)
582                 goto out_error;
583         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
584                 error = -EFSCORRUPTED;
585                 goto out_error;
586         }
587
588         right->rc_startblock -= cright->rc_blockcount;
589         right->rc_blockcount += cright->rc_blockcount;
590         error = xfs_refcount_update(cur, right);
591         if (error)
592                 goto out_error;
593
594         *aglen -= cright->rc_blockcount;
595         return error;
596
597 out_error:
598         trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
599                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
600         return error;
601 }
602
603 #define XFS_FIND_RCEXT_SHARED   1
604 #define XFS_FIND_RCEXT_COW      2
605 /*
606  * Find the left extent and the one after it (cleft).  This function assumes
607  * that we've already split any extent crossing agbno.
608  */
609 STATIC int
610 xfs_refcount_find_left_extents(
611         struct xfs_btree_cur            *cur,
612         struct xfs_refcount_irec        *left,
613         struct xfs_refcount_irec        *cleft,
614         xfs_agblock_t                   agbno,
615         xfs_extlen_t                    aglen,
616         int                             flags)
617 {
618         struct xfs_refcount_irec        tmp;
619         int                             error;
620         int                             found_rec;
621
622         left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
623         error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec);
624         if (error)
625                 goto out_error;
626         if (!found_rec)
627                 return 0;
628
629         error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
630         if (error)
631                 goto out_error;
632         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
633                 error = -EFSCORRUPTED;
634                 goto out_error;
635         }
636
637         if (xfs_refc_next(&tmp) != agbno)
638                 return 0;
639         if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
640                 return 0;
641         if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
642                 return 0;
643         /* We have a left extent; retrieve (or invent) the next right one */
644         *left = tmp;
645
646         error = xfs_btree_increment(cur, 0, &found_rec);
647         if (error)
648                 goto out_error;
649         if (found_rec) {
650                 error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
651                 if (error)
652                         goto out_error;
653                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
654                         error = -EFSCORRUPTED;
655                         goto out_error;
656                 }
657
658                 /* if tmp starts at the end of our range, just use that */
659                 if (tmp.rc_startblock == agbno)
660                         *cleft = tmp;
661                 else {
662                         /*
663                          * There's a gap in the refcntbt at the start of the
664                          * range we're interested in (refcount == 1) so
665                          * synthesize the implied extent and pass it back.
666                          * We assume here that the agbno/aglen range was
667                          * passed in from a data fork extent mapping and
668                          * therefore is allocated to exactly one owner.
669                          */
670                         cleft->rc_startblock = agbno;
671                         cleft->rc_blockcount = min(aglen,
672                                         tmp.rc_startblock - agbno);
673                         cleft->rc_refcount = 1;
674                 }
675         } else {
676                 /*
677                  * No extents, so pretend that there's one covering the whole
678                  * range.
679                  */
680                 cleft->rc_startblock = agbno;
681                 cleft->rc_blockcount = aglen;
682                 cleft->rc_refcount = 1;
683         }
684         trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
685                         left, cleft, agbno);
686         return error;
687
688 out_error:
689         trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
690                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
691         return error;
692 }
693
694 /*
695  * Find the right extent and the one before it (cright).  This function
696  * assumes that we've already split any extents crossing agbno + aglen.
697  */
698 STATIC int
699 xfs_refcount_find_right_extents(
700         struct xfs_btree_cur            *cur,
701         struct xfs_refcount_irec        *right,
702         struct xfs_refcount_irec        *cright,
703         xfs_agblock_t                   agbno,
704         xfs_extlen_t                    aglen,
705         int                             flags)
706 {
707         struct xfs_refcount_irec        tmp;
708         int                             error;
709         int                             found_rec;
710
711         right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
712         error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec);
713         if (error)
714                 goto out_error;
715         if (!found_rec)
716                 return 0;
717
718         error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
719         if (error)
720                 goto out_error;
721         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
722                 error = -EFSCORRUPTED;
723                 goto out_error;
724         }
725
726         if (tmp.rc_startblock != agbno + aglen)
727                 return 0;
728         if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
729                 return 0;
730         if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
731                 return 0;
732         /* We have a right extent; retrieve (or invent) the next left one */
733         *right = tmp;
734
735         error = xfs_btree_decrement(cur, 0, &found_rec);
736         if (error)
737                 goto out_error;
738         if (found_rec) {
739                 error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
740                 if (error)
741                         goto out_error;
742                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
743                         error = -EFSCORRUPTED;
744                         goto out_error;
745                 }
746
747                 /* if tmp ends at the end of our range, just use that */
748                 if (xfs_refc_next(&tmp) == agbno + aglen)
749                         *cright = tmp;
750                 else {
751                         /*
752                          * There's a gap in the refcntbt at the end of the
753                          * range we're interested in (refcount == 1) so
754                          * create the implied extent and pass it back.
755                          * We assume here that the agbno/aglen range was
756                          * passed in from a data fork extent mapping and
757                          * therefore is allocated to exactly one owner.
758                          */
759                         cright->rc_startblock = max(agbno, xfs_refc_next(&tmp));
760                         cright->rc_blockcount = right->rc_startblock -
761                                         cright->rc_startblock;
762                         cright->rc_refcount = 1;
763                 }
764         } else {
765                 /*
766                  * No extents, so pretend that there's one covering the whole
767                  * range.
768                  */
769                 cright->rc_startblock = agbno;
770                 cright->rc_blockcount = aglen;
771                 cright->rc_refcount = 1;
772         }
773         trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
774                         cright, right, agbno + aglen);
775         return error;
776
777 out_error:
778         trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
779                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
780         return error;
781 }
782
783 /* Is this extent valid? */
784 static inline bool
785 xfs_refc_valid(
786         struct xfs_refcount_irec        *rc)
787 {
788         return rc->rc_startblock != NULLAGBLOCK;
789 }
790
791 /*
792  * Try to merge with any extents on the boundaries of the adjustment range.
793  */
794 STATIC int
795 xfs_refcount_merge_extents(
796         struct xfs_btree_cur    *cur,
797         xfs_agblock_t           *agbno,
798         xfs_extlen_t            *aglen,
799         enum xfs_refc_adjust_op adjust,
800         int                     flags,
801         bool                    *shape_changed)
802 {
803         struct xfs_refcount_irec        left = {0}, cleft = {0};
804         struct xfs_refcount_irec        cright = {0}, right = {0};
805         int                             error;
806         unsigned long long              ulen;
807         bool                            cequal;
808
809         *shape_changed = false;
810         /*
811          * Find the extent just below agbno [left], just above agbno [cleft],
812          * just below (agbno + aglen) [cright], and just above (agbno + aglen)
813          * [right].
814          */
815         error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno,
816                         *aglen, flags);
817         if (error)
818                 return error;
819         error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno,
820                         *aglen, flags);
821         if (error)
822                 return error;
823
824         /* No left or right extent to merge; exit. */
825         if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right))
826                 return 0;
827
828         cequal = (cleft.rc_startblock == cright.rc_startblock) &&
829                  (cleft.rc_blockcount == cright.rc_blockcount);
830
831         /* Try to merge left, cleft, and right.  cleft must == cright. */
832         ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
833                         right.rc_blockcount;
834         if (xfs_refc_valid(&left) && xfs_refc_valid(&right) &&
835             xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal &&
836             left.rc_refcount == cleft.rc_refcount + adjust &&
837             right.rc_refcount == cleft.rc_refcount + adjust &&
838             ulen < MAXREFCEXTLEN) {
839                 *shape_changed = true;
840                 return xfs_refcount_merge_center_extents(cur, &left, &cleft,
841                                 &right, ulen, aglen);
842         }
843
844         /* Try to merge left and cleft. */
845         ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
846         if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) &&
847             left.rc_refcount == cleft.rc_refcount + adjust &&
848             ulen < MAXREFCEXTLEN) {
849                 *shape_changed = true;
850                 error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
851                                 agbno, aglen);
852                 if (error)
853                         return error;
854
855                 /*
856                  * If we just merged left + cleft and cleft == cright,
857                  * we no longer have a cright to merge with right.  We're done.
858                  */
859                 if (cequal)
860                         return 0;
861         }
862
863         /* Try to merge cright and right. */
864         ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
865         if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) &&
866             right.rc_refcount == cright.rc_refcount + adjust &&
867             ulen < MAXREFCEXTLEN) {
868                 *shape_changed = true;
869                 return xfs_refcount_merge_right_extent(cur, &right, &cright,
870                                 aglen);
871         }
872
873         return error;
874 }
875
876 /*
877  * XXX: This is a pretty hand-wavy estimate.  The penalty for guessing
878  * true incorrectly is a shutdown FS; the penalty for guessing false
879  * incorrectly is more transaction rolls than might be necessary.
880  * Be conservative here.
881  */
882 static bool
883 xfs_refcount_still_have_space(
884         struct xfs_btree_cur            *cur)
885 {
886         unsigned long                   overhead;
887
888         /*
889          * Worst case estimate: full splits of the free space and rmap btrees
890          * to handle each of the shape changes to the refcount btree.
891          */
892         overhead = xfs_allocfree_block_count(cur->bc_mp,
893                                 cur->bc_ag.refc.shape_changes);
894         overhead += cur->bc_mp->m_refc_maxlevels;
895         overhead *= cur->bc_mp->m_sb.sb_blocksize;
896
897         /*
898          * Only allow 2 refcount extent updates per transaction if the
899          * refcount continue update "error" has been injected.
900          */
901         if (cur->bc_ag.refc.nr_ops > 2 &&
902             XFS_TEST_ERROR(false, cur->bc_mp,
903                         XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
904                 return false;
905
906         if (cur->bc_ag.refc.nr_ops == 0)
907                 return true;
908         else if (overhead > cur->bc_tp->t_log_res)
909                 return false;
910         return  cur->bc_tp->t_log_res - overhead >
911                 cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
912 }
913
914 /*
915  * Adjust the refcounts of middle extents.  At this point we should have
916  * split extents that crossed the adjustment range; merged with adjacent
917  * extents; and updated agbno/aglen to reflect the merges.  Therefore,
918  * all we have to do is update the extents inside [agbno, agbno + aglen].
919  */
920 STATIC int
921 xfs_refcount_adjust_extents(
922         struct xfs_btree_cur    *cur,
923         xfs_agblock_t           *agbno,
924         xfs_extlen_t            *aglen,
925         enum xfs_refc_adjust_op adj)
926 {
927         struct xfs_refcount_irec        ext, tmp;
928         int                             error;
929         int                             found_rec, found_tmp;
930         xfs_fsblock_t                   fsbno;
931
932         /* Merging did all the work already. */
933         if (*aglen == 0)
934                 return 0;
935
936         error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec);
937         if (error)
938                 goto out_error;
939
940         while (*aglen > 0 && xfs_refcount_still_have_space(cur)) {
941                 error = xfs_refcount_get_rec(cur, &ext, &found_rec);
942                 if (error)
943                         goto out_error;
944                 if (!found_rec) {
945                         ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
946                         ext.rc_blockcount = 0;
947                         ext.rc_refcount = 0;
948                 }
949
950                 /*
951                  * Deal with a hole in the refcount tree; if a file maps to
952                  * these blocks and there's no refcountbt record, pretend that
953                  * there is one with refcount == 1.
954                  */
955                 if (ext.rc_startblock != *agbno) {
956                         tmp.rc_startblock = *agbno;
957                         tmp.rc_blockcount = min(*aglen,
958                                         ext.rc_startblock - *agbno);
959                         tmp.rc_refcount = 1 + adj;
960                         trace_xfs_refcount_modify_extent(cur->bc_mp,
961                                         cur->bc_ag.pag->pag_agno, &tmp);
962
963                         /*
964                          * Either cover the hole (increment) or
965                          * delete the range (decrement).
966                          */
967                         cur->bc_ag.refc.nr_ops++;
968                         if (tmp.rc_refcount) {
969                                 error = xfs_refcount_insert(cur, &tmp,
970                                                 &found_tmp);
971                                 if (error)
972                                         goto out_error;
973                                 if (XFS_IS_CORRUPT(cur->bc_mp,
974                                                    found_tmp != 1)) {
975                                         error = -EFSCORRUPTED;
976                                         goto out_error;
977                                 }
978                         } else {
979                                 fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
980                                                 cur->bc_ag.pag->pag_agno,
981                                                 tmp.rc_startblock);
982                                 xfs_free_extent_later(cur->bc_tp, fsbno,
983                                                   tmp.rc_blockcount, NULL);
984                         }
985
986                         (*agbno) += tmp.rc_blockcount;
987                         (*aglen) -= tmp.rc_blockcount;
988
989                         error = xfs_refcount_lookup_ge(cur, *agbno,
990                                         &found_rec);
991                         if (error)
992                                 goto out_error;
993                 }
994
995                 /* Stop if there's nothing left to modify */
996                 if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
997                         break;
998
999                 /*
1000                  * Adjust the reference count and either update the tree
1001                  * (incr) or free the blocks (decr).
1002                  */
1003                 if (ext.rc_refcount == MAXREFCOUNT)
1004                         goto skip;
1005                 ext.rc_refcount += adj;
1006                 trace_xfs_refcount_modify_extent(cur->bc_mp,
1007                                 cur->bc_ag.pag->pag_agno, &ext);
1008                 cur->bc_ag.refc.nr_ops++;
1009                 if (ext.rc_refcount > 1) {
1010                         error = xfs_refcount_update(cur, &ext);
1011                         if (error)
1012                                 goto out_error;
1013                 } else if (ext.rc_refcount == 1) {
1014                         error = xfs_refcount_delete(cur, &found_rec);
1015                         if (error)
1016                                 goto out_error;
1017                         if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
1018                                 error = -EFSCORRUPTED;
1019                                 goto out_error;
1020                         }
1021                         goto advloop;
1022                 } else {
1023                         fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
1024                                         cur->bc_ag.pag->pag_agno,
1025                                         ext.rc_startblock);
1026                         xfs_free_extent_later(cur->bc_tp, fsbno,
1027                                         ext.rc_blockcount, NULL);
1028                 }
1029
1030 skip:
1031                 error = xfs_btree_increment(cur, 0, &found_rec);
1032                 if (error)
1033                         goto out_error;
1034
1035 advloop:
1036                 (*agbno) += ext.rc_blockcount;
1037                 (*aglen) -= ext.rc_blockcount;
1038         }
1039
1040         return error;
1041 out_error:
1042         trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1043                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1044         return error;
1045 }
1046
1047 /* Adjust the reference count of a range of AG blocks. */
1048 STATIC int
1049 xfs_refcount_adjust(
1050         struct xfs_btree_cur    *cur,
1051         xfs_agblock_t           agbno,
1052         xfs_extlen_t            aglen,
1053         xfs_agblock_t           *new_agbno,
1054         xfs_extlen_t            *new_aglen,
1055         enum xfs_refc_adjust_op adj)
1056 {
1057         bool                    shape_changed;
1058         int                     shape_changes = 0;
1059         int                     error;
1060
1061         *new_agbno = agbno;
1062         *new_aglen = aglen;
1063         if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
1064                 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1065                                 agbno, aglen);
1066         else
1067                 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1068                                 agbno, aglen);
1069
1070         /*
1071          * Ensure that no rcextents cross the boundary of the adjustment range.
1072          */
1073         error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
1074         if (error)
1075                 goto out_error;
1076         if (shape_changed)
1077                 shape_changes++;
1078
1079         error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
1080         if (error)
1081                 goto out_error;
1082         if (shape_changed)
1083                 shape_changes++;
1084
1085         /*
1086          * Try to merge with the left or right extents of the range.
1087          */
1088         error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj,
1089                         XFS_FIND_RCEXT_SHARED, &shape_changed);
1090         if (error)
1091                 goto out_error;
1092         if (shape_changed)
1093                 shape_changes++;
1094         if (shape_changes)
1095                 cur->bc_ag.refc.shape_changes++;
1096
1097         /* Now that we've taken care of the ends, adjust the middle extents */
1098         error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj);
1099         if (error)
1100                 goto out_error;
1101
1102         return 0;
1103
1104 out_error:
1105         trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1106                         error, _RET_IP_);
1107         return error;
1108 }
1109
1110 /* Clean up after calling xfs_refcount_finish_one. */
1111 void
1112 xfs_refcount_finish_one_cleanup(
1113         struct xfs_trans        *tp,
1114         struct xfs_btree_cur    *rcur,
1115         int                     error)
1116 {
1117         struct xfs_buf          *agbp;
1118
1119         if (rcur == NULL)
1120                 return;
1121         agbp = rcur->bc_ag.agbp;
1122         xfs_btree_del_cursor(rcur, error);
1123         if (error)
1124                 xfs_trans_brelse(tp, agbp);
1125 }
1126
1127 /*
1128  * Process one of the deferred refcount operations.  We pass back the
1129  * btree cursor to maintain our lock on the btree between calls.
1130  * This saves time and eliminates a buffer deadlock between the
1131  * superblock and the AGF because we'll always grab them in the same
1132  * order.
1133  */
1134 int
1135 xfs_refcount_finish_one(
1136         struct xfs_trans                *tp,
1137         enum xfs_refcount_intent_type   type,
1138         xfs_fsblock_t                   startblock,
1139         xfs_extlen_t                    blockcount,
1140         xfs_fsblock_t                   *new_fsb,
1141         xfs_extlen_t                    *new_len,
1142         struct xfs_btree_cur            **pcur)
1143 {
1144         struct xfs_mount                *mp = tp->t_mountp;
1145         struct xfs_btree_cur            *rcur;
1146         struct xfs_buf                  *agbp = NULL;
1147         int                             error = 0;
1148         xfs_agblock_t                   bno;
1149         xfs_agblock_t                   new_agbno;
1150         unsigned long                   nr_ops = 0;
1151         int                             shape_changes = 0;
1152         struct xfs_perag                *pag;
1153
1154         pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock));
1155         bno = XFS_FSB_TO_AGBNO(mp, startblock);
1156
1157         trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock),
1158                         type, XFS_FSB_TO_AGBNO(mp, startblock),
1159                         blockcount);
1160
1161         if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) {
1162                 error = -EIO;
1163                 goto out_drop;
1164         }
1165
1166         /*
1167          * If we haven't gotten a cursor or the cursor AG doesn't match
1168          * the startblock, get one now.
1169          */
1170         rcur = *pcur;
1171         if (rcur != NULL && rcur->bc_ag.pag != pag) {
1172                 nr_ops = rcur->bc_ag.refc.nr_ops;
1173                 shape_changes = rcur->bc_ag.refc.shape_changes;
1174                 xfs_refcount_finish_one_cleanup(tp, rcur, 0);
1175                 rcur = NULL;
1176                 *pcur = NULL;
1177         }
1178         if (rcur == NULL) {
1179                 error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_FREEING,
1180                                 &agbp);
1181                 if (error)
1182                         goto out_drop;
1183
1184                 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
1185                 rcur->bc_ag.refc.nr_ops = nr_ops;
1186                 rcur->bc_ag.refc.shape_changes = shape_changes;
1187         }
1188         *pcur = rcur;
1189
1190         switch (type) {
1191         case XFS_REFCOUNT_INCREASE:
1192                 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
1193                                 new_len, XFS_REFCOUNT_ADJUST_INCREASE);
1194                 *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
1195                 break;
1196         case XFS_REFCOUNT_DECREASE:
1197                 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
1198                                 new_len, XFS_REFCOUNT_ADJUST_DECREASE);
1199                 *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
1200                 break;
1201         case XFS_REFCOUNT_ALLOC_COW:
1202                 *new_fsb = startblock + blockcount;
1203                 *new_len = 0;
1204                 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount);
1205                 break;
1206         case XFS_REFCOUNT_FREE_COW:
1207                 *new_fsb = startblock + blockcount;
1208                 *new_len = 0;
1209                 error = __xfs_refcount_cow_free(rcur, bno, blockcount);
1210                 break;
1211         default:
1212                 ASSERT(0);
1213                 error = -EFSCORRUPTED;
1214         }
1215         if (!error && *new_len > 0)
1216                 trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type,
1217                                 bno, blockcount, new_agbno, *new_len);
1218 out_drop:
1219         xfs_perag_put(pag);
1220         return error;
1221 }
1222
1223 /*
1224  * Record a refcount intent for later processing.
1225  */
1226 static void
1227 __xfs_refcount_add(
1228         struct xfs_trans                *tp,
1229         enum xfs_refcount_intent_type   type,
1230         xfs_fsblock_t                   startblock,
1231         xfs_extlen_t                    blockcount)
1232 {
1233         struct xfs_refcount_intent      *ri;
1234
1235         trace_xfs_refcount_defer(tp->t_mountp,
1236                         XFS_FSB_TO_AGNO(tp->t_mountp, startblock),
1237                         type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
1238                         blockcount);
1239
1240         ri = kmem_cache_alloc(xfs_refcount_intent_cache,
1241                         GFP_NOFS | __GFP_NOFAIL);
1242         INIT_LIST_HEAD(&ri->ri_list);
1243         ri->ri_type = type;
1244         ri->ri_startblock = startblock;
1245         ri->ri_blockcount = blockcount;
1246
1247         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
1248 }
1249
1250 /*
1251  * Increase the reference count of the blocks backing a file's extent.
1252  */
1253 void
1254 xfs_refcount_increase_extent(
1255         struct xfs_trans                *tp,
1256         struct xfs_bmbt_irec            *PREV)
1257 {
1258         if (!xfs_has_reflink(tp->t_mountp))
1259                 return;
1260
1261         __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock,
1262                         PREV->br_blockcount);
1263 }
1264
1265 /*
1266  * Decrease the reference count of the blocks backing a file's extent.
1267  */
1268 void
1269 xfs_refcount_decrease_extent(
1270         struct xfs_trans                *tp,
1271         struct xfs_bmbt_irec            *PREV)
1272 {
1273         if (!xfs_has_reflink(tp->t_mountp))
1274                 return;
1275
1276         __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock,
1277                         PREV->br_blockcount);
1278 }
1279
1280 /*
1281  * Given an AG extent, find the lowest-numbered run of shared blocks
1282  * within that range and return the range in fbno/flen.  If
1283  * find_end_of_shared is set, return the longest contiguous extent of
1284  * shared blocks; if not, just return the first extent we find.  If no
1285  * shared blocks are found, fbno and flen will be set to NULLAGBLOCK
1286  * and 0, respectively.
1287  */
1288 int
1289 xfs_refcount_find_shared(
1290         struct xfs_btree_cur            *cur,
1291         xfs_agblock_t                   agbno,
1292         xfs_extlen_t                    aglen,
1293         xfs_agblock_t                   *fbno,
1294         xfs_extlen_t                    *flen,
1295         bool                            find_end_of_shared)
1296 {
1297         struct xfs_refcount_irec        tmp;
1298         int                             i;
1299         int                             have;
1300         int                             error;
1301
1302         trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1303                         agbno, aglen);
1304
1305         /* By default, skip the whole range */
1306         *fbno = NULLAGBLOCK;
1307         *flen = 0;
1308
1309         /* Try to find a refcount extent that crosses the start */
1310         error = xfs_refcount_lookup_le(cur, agbno, &have);
1311         if (error)
1312                 goto out_error;
1313         if (!have) {
1314                 /* No left extent, look at the next one */
1315                 error = xfs_btree_increment(cur, 0, &have);
1316                 if (error)
1317                         goto out_error;
1318                 if (!have)
1319                         goto done;
1320         }
1321         error = xfs_refcount_get_rec(cur, &tmp, &i);
1322         if (error)
1323                 goto out_error;
1324         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1325                 error = -EFSCORRUPTED;
1326                 goto out_error;
1327         }
1328
1329         /* If the extent ends before the start, look at the next one */
1330         if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
1331                 error = xfs_btree_increment(cur, 0, &have);
1332                 if (error)
1333                         goto out_error;
1334                 if (!have)
1335                         goto done;
1336                 error = xfs_refcount_get_rec(cur, &tmp, &i);
1337                 if (error)
1338                         goto out_error;
1339                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1340                         error = -EFSCORRUPTED;
1341                         goto out_error;
1342                 }
1343         }
1344
1345         /* If the extent starts after the range we want, bail out */
1346         if (tmp.rc_startblock >= agbno + aglen)
1347                 goto done;
1348
1349         /* We found the start of a shared extent! */
1350         if (tmp.rc_startblock < agbno) {
1351                 tmp.rc_blockcount -= (agbno - tmp.rc_startblock);
1352                 tmp.rc_startblock = agbno;
1353         }
1354
1355         *fbno = tmp.rc_startblock;
1356         *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno);
1357         if (!find_end_of_shared)
1358                 goto done;
1359
1360         /* Otherwise, find the end of this shared extent */
1361         while (*fbno + *flen < agbno + aglen) {
1362                 error = xfs_btree_increment(cur, 0, &have);
1363                 if (error)
1364                         goto out_error;
1365                 if (!have)
1366                         break;
1367                 error = xfs_refcount_get_rec(cur, &tmp, &i);
1368                 if (error)
1369                         goto out_error;
1370                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1371                         error = -EFSCORRUPTED;
1372                         goto out_error;
1373                 }
1374                 if (tmp.rc_startblock >= agbno + aglen ||
1375                     tmp.rc_startblock != *fbno + *flen)
1376                         break;
1377                 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
1378         }
1379
1380 done:
1381         trace_xfs_refcount_find_shared_result(cur->bc_mp,
1382                         cur->bc_ag.pag->pag_agno, *fbno, *flen);
1383
1384 out_error:
1385         if (error)
1386                 trace_xfs_refcount_find_shared_error(cur->bc_mp,
1387                                 cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1388         return error;
1389 }
1390
1391 /*
1392  * Recovering CoW Blocks After a Crash
1393  *
1394  * Due to the way that the copy on write mechanism works, there's a window of
1395  * opportunity in which we can lose track of allocated blocks during a crash.
1396  * Because CoW uses delayed allocation in the in-core CoW fork, writeback
1397  * causes blocks to be allocated and stored in the CoW fork.  The blocks are
1398  * no longer in the free space btree but are not otherwise recorded anywhere
1399  * until the write completes and the blocks are mapped into the file.  A crash
1400  * in between allocation and remapping results in the replacement blocks being
1401  * lost.  This situation is exacerbated by the CoW extent size hint because
1402  * allocations can hang around for long time.
1403  *
1404  * However, there is a place where we can record these allocations before they
1405  * become mappings -- the reference count btree.  The btree does not record
1406  * extents with refcount == 1, so we can record allocations with a refcount of
1407  * 1.  Blocks being used for CoW writeout cannot be shared, so there should be
1408  * no conflict with shared block records.  These mappings should be created
1409  * when we allocate blocks to the CoW fork and deleted when they're removed
1410  * from the CoW fork.
1411  *
1412  * Minor nit: records for in-progress CoW allocations and records for shared
1413  * extents must never be merged, to preserve the property that (except for CoW
1414  * allocations) there are no refcount btree entries with refcount == 1.  The
1415  * only time this could potentially happen is when unsharing a block that's
1416  * adjacent to CoW allocations, so we must be careful to avoid this.
1417  *
1418  * At mount time we recover lost CoW allocations by searching the refcount
1419  * btree for these refcount == 1 mappings.  These represent CoW allocations
1420  * that were in progress at the time the filesystem went down, so we can free
1421  * them to get the space back.
1422  *
1423  * This mechanism is superior to creating EFIs for unmapped CoW extents for
1424  * several reasons -- first, EFIs pin the tail of the log and would have to be
1425  * periodically relogged to avoid filling up the log.  Second, CoW completions
1426  * will have to file an EFD and create new EFIs for whatever remains in the
1427  * CoW fork; this partially takes care of (1) but extent-size reservations
1428  * will have to periodically relog even if there's no writeout in progress.
1429  * This can happen if the CoW extent size hint is set, which you really want.
1430  * Third, EFIs cannot currently be automatically relogged into newer
1431  * transactions to advance the log tail.  Fourth, stuffing the log full of
1432  * EFIs places an upper bound on the number of CoW allocations that can be
1433  * held filesystem-wide at any given time.  Recording them in the refcount
1434  * btree doesn't require us to maintain any state in memory and doesn't pin
1435  * the log.
1436  */
1437 /*
1438  * Adjust the refcounts of CoW allocations.  These allocations are "magic"
1439  * in that they're not referenced anywhere else in the filesystem, so we
1440  * stash them in the refcount btree with a refcount of 1 until either file
1441  * remapping (or CoW cancellation) happens.
1442  */
1443 STATIC int
1444 xfs_refcount_adjust_cow_extents(
1445         struct xfs_btree_cur    *cur,
1446         xfs_agblock_t           agbno,
1447         xfs_extlen_t            aglen,
1448         enum xfs_refc_adjust_op adj)
1449 {
1450         struct xfs_refcount_irec        ext, tmp;
1451         int                             error;
1452         int                             found_rec, found_tmp;
1453
1454         if (aglen == 0)
1455                 return 0;
1456
1457         /* Find any overlapping refcount records */
1458         error = xfs_refcount_lookup_ge(cur, agbno, &found_rec);
1459         if (error)
1460                 goto out_error;
1461         error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1462         if (error)
1463                 goto out_error;
1464         if (!found_rec) {
1465                 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks +
1466                                 XFS_REFC_COW_START;
1467                 ext.rc_blockcount = 0;
1468                 ext.rc_refcount = 0;
1469         }
1470
1471         switch (adj) {
1472         case XFS_REFCOUNT_ADJUST_COW_ALLOC:
1473                 /* Adding a CoW reservation, there should be nothing here. */
1474                 if (XFS_IS_CORRUPT(cur->bc_mp,
1475                                    agbno + aglen > ext.rc_startblock)) {
1476                         error = -EFSCORRUPTED;
1477                         goto out_error;
1478                 }
1479
1480                 tmp.rc_startblock = agbno;
1481                 tmp.rc_blockcount = aglen;
1482                 tmp.rc_refcount = 1;
1483                 trace_xfs_refcount_modify_extent(cur->bc_mp,
1484                                 cur->bc_ag.pag->pag_agno, &tmp);
1485
1486                 error = xfs_refcount_insert(cur, &tmp,
1487                                 &found_tmp);
1488                 if (error)
1489                         goto out_error;
1490                 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) {
1491                         error = -EFSCORRUPTED;
1492                         goto out_error;
1493                 }
1494                 break;
1495         case XFS_REFCOUNT_ADJUST_COW_FREE:
1496                 /* Removing a CoW reservation, there should be one extent. */
1497                 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) {
1498                         error = -EFSCORRUPTED;
1499                         goto out_error;
1500                 }
1501                 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) {
1502                         error = -EFSCORRUPTED;
1503                         goto out_error;
1504                 }
1505                 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) {
1506                         error = -EFSCORRUPTED;
1507                         goto out_error;
1508                 }
1509
1510                 ext.rc_refcount = 0;
1511                 trace_xfs_refcount_modify_extent(cur->bc_mp,
1512                                 cur->bc_ag.pag->pag_agno, &ext);
1513                 error = xfs_refcount_delete(cur, &found_rec);
1514                 if (error)
1515                         goto out_error;
1516                 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
1517                         error = -EFSCORRUPTED;
1518                         goto out_error;
1519                 }
1520                 break;
1521         default:
1522                 ASSERT(0);
1523         }
1524
1525         return error;
1526 out_error:
1527         trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1528                         cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1529         return error;
1530 }
1531
1532 /*
1533  * Add or remove refcount btree entries for CoW reservations.
1534  */
1535 STATIC int
1536 xfs_refcount_adjust_cow(
1537         struct xfs_btree_cur    *cur,
1538         xfs_agblock_t           agbno,
1539         xfs_extlen_t            aglen,
1540         enum xfs_refc_adjust_op adj)
1541 {
1542         bool                    shape_changed;
1543         int                     error;
1544
1545         agbno += XFS_REFC_COW_START;
1546
1547         /*
1548          * Ensure that no rcextents cross the boundary of the adjustment range.
1549          */
1550         error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
1551         if (error)
1552                 goto out_error;
1553
1554         error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
1555         if (error)
1556                 goto out_error;
1557
1558         /*
1559          * Try to merge with the left or right extents of the range.
1560          */
1561         error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj,
1562                         XFS_FIND_RCEXT_COW, &shape_changed);
1563         if (error)
1564                 goto out_error;
1565
1566         /* Now that we've taken care of the ends, adjust the middle extents */
1567         error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj);
1568         if (error)
1569                 goto out_error;
1570
1571         return 0;
1572
1573 out_error:
1574         trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1575                         error, _RET_IP_);
1576         return error;
1577 }
1578
1579 /*
1580  * Record a CoW allocation in the refcount btree.
1581  */
1582 STATIC int
1583 __xfs_refcount_cow_alloc(
1584         struct xfs_btree_cur    *rcur,
1585         xfs_agblock_t           agbno,
1586         xfs_extlen_t            aglen)
1587 {
1588         trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
1589                         agbno, aglen);
1590
1591         /* Add refcount btree reservation */
1592         return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1593                         XFS_REFCOUNT_ADJUST_COW_ALLOC);
1594 }
1595
1596 /*
1597  * Remove a CoW allocation from the refcount btree.
1598  */
1599 STATIC int
1600 __xfs_refcount_cow_free(
1601         struct xfs_btree_cur    *rcur,
1602         xfs_agblock_t           agbno,
1603         xfs_extlen_t            aglen)
1604 {
1605         trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
1606                         agbno, aglen);
1607
1608         /* Remove refcount btree reservation */
1609         return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1610                         XFS_REFCOUNT_ADJUST_COW_FREE);
1611 }
1612
1613 /* Record a CoW staging extent in the refcount btree. */
1614 void
1615 xfs_refcount_alloc_cow_extent(
1616         struct xfs_trans                *tp,
1617         xfs_fsblock_t                   fsb,
1618         xfs_extlen_t                    len)
1619 {
1620         struct xfs_mount                *mp = tp->t_mountp;
1621
1622         if (!xfs_has_reflink(mp))
1623                 return;
1624
1625         __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len);
1626
1627         /* Add rmap entry */
1628         xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
1629                         XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1630 }
1631
1632 /* Forget a CoW staging event in the refcount btree. */
1633 void
1634 xfs_refcount_free_cow_extent(
1635         struct xfs_trans                *tp,
1636         xfs_fsblock_t                   fsb,
1637         xfs_extlen_t                    len)
1638 {
1639         struct xfs_mount                *mp = tp->t_mountp;
1640
1641         if (!xfs_has_reflink(mp))
1642                 return;
1643
1644         /* Remove rmap entry */
1645         xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
1646                         XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1647         __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len);
1648 }
1649
1650 struct xfs_refcount_recovery {
1651         struct list_head                rr_list;
1652         struct xfs_refcount_irec        rr_rrec;
1653 };
1654
1655 /* Stuff an extent on the recovery list. */
1656 STATIC int
1657 xfs_refcount_recover_extent(
1658         struct xfs_btree_cur            *cur,
1659         const union xfs_btree_rec       *rec,
1660         void                            *priv)
1661 {
1662         struct list_head                *debris = priv;
1663         struct xfs_refcount_recovery    *rr;
1664
1665         if (XFS_IS_CORRUPT(cur->bc_mp,
1666                            be32_to_cpu(rec->refc.rc_refcount) != 1))
1667                 return -EFSCORRUPTED;
1668
1669         rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0);
1670         xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
1671         list_add_tail(&rr->rr_list, debris);
1672
1673         return 0;
1674 }
1675
1676 /* Find and remove leftover CoW reservations. */
1677 int
1678 xfs_refcount_recover_cow_leftovers(
1679         struct xfs_mount                *mp,
1680         struct xfs_perag                *pag)
1681 {
1682         struct xfs_trans                *tp;
1683         struct xfs_btree_cur            *cur;
1684         struct xfs_buf                  *agbp;
1685         struct xfs_refcount_recovery    *rr, *n;
1686         struct list_head                debris;
1687         union xfs_btree_irec            low;
1688         union xfs_btree_irec            high;
1689         xfs_fsblock_t                   fsb;
1690         xfs_agblock_t                   agbno;
1691         int                             error;
1692
1693         if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
1694                 return -EOPNOTSUPP;
1695
1696         INIT_LIST_HEAD(&debris);
1697
1698         /*
1699          * In this first part, we use an empty transaction to gather up
1700          * all the leftover CoW extents so that we can subsequently
1701          * delete them.  The empty transaction is used to avoid
1702          * a buffer lock deadlock if there happens to be a loop in the
1703          * refcountbt because we're allowed to re-grab a buffer that is
1704          * already attached to our transaction.  When we're done
1705          * recording the CoW debris we cancel the (empty) transaction
1706          * and everything goes away cleanly.
1707          */
1708         error = xfs_trans_alloc_empty(mp, &tp);
1709         if (error)
1710                 return error;
1711
1712         error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
1713         if (error)
1714                 goto out_trans;
1715         cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
1716
1717         /* Find all the leftover CoW staging extents. */
1718         memset(&low, 0, sizeof(low));
1719         memset(&high, 0, sizeof(high));
1720         low.rc.rc_startblock = XFS_REFC_COW_START;
1721         high.rc.rc_startblock = -1U;
1722         error = xfs_btree_query_range(cur, &low, &high,
1723                         xfs_refcount_recover_extent, &debris);
1724         xfs_btree_del_cursor(cur, error);
1725         xfs_trans_brelse(tp, agbp);
1726         xfs_trans_cancel(tp);
1727         if (error)
1728                 goto out_free;
1729
1730         /* Now iterate the list to free the leftovers */
1731         list_for_each_entry_safe(rr, n, &debris, rr_list) {
1732                 /* Set up transaction. */
1733                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
1734                 if (error)
1735                         goto out_free;
1736
1737                 trace_xfs_refcount_recover_extent(mp, pag->pag_agno,
1738                                 &rr->rr_rrec);
1739
1740                 /* Free the orphan record */
1741                 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
1742                 fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno);
1743                 xfs_refcount_free_cow_extent(tp, fsb,
1744                                 rr->rr_rrec.rc_blockcount);
1745
1746                 /* Free the block. */
1747                 xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL);
1748
1749                 error = xfs_trans_commit(tp);
1750                 if (error)
1751                         goto out_free;
1752
1753                 list_del(&rr->rr_list);
1754                 kmem_free(rr);
1755         }
1756
1757         return error;
1758 out_trans:
1759         xfs_trans_cancel(tp);
1760 out_free:
1761         /* Free the leftover list */
1762         list_for_each_entry_safe(rr, n, &debris, rr_list) {
1763                 list_del(&rr->rr_list);
1764                 kmem_free(rr);
1765         }
1766         return error;
1767 }
1768
1769 /* Is there a record covering a given extent? */
1770 int
1771 xfs_refcount_has_record(
1772         struct xfs_btree_cur    *cur,
1773         xfs_agblock_t           bno,
1774         xfs_extlen_t            len,
1775         bool                    *exists)
1776 {
1777         union xfs_btree_irec    low;
1778         union xfs_btree_irec    high;
1779
1780         memset(&low, 0, sizeof(low));
1781         low.rc.rc_startblock = bno;
1782         memset(&high, 0xFF, sizeof(high));
1783         high.rc.rc_startblock = bno + len - 1;
1784
1785         return xfs_btree_has_record(cur, &low, &high, exists);
1786 }
1787
1788 int __init
1789 xfs_refcount_intent_init_cache(void)
1790 {
1791         xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent",
1792                         sizeof(struct xfs_refcount_intent),
1793                         0, 0, NULL);
1794
1795         return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM;
1796 }
1797
1798 void
1799 xfs_refcount_intent_destroy_cache(void)
1800 {
1801         kmem_cache_destroy(xfs_refcount_intent_cache);
1802         xfs_refcount_intent_cache = NULL;
1803 }