GNU Linux-libre 5.19-rc6-gnu
[releases.git] / drivers / md / persistent-data / dm-space-map-common.c
1 /*
2  * Copyright (C) 2011 Red Hat, Inc.
3  *
4  * This file is released under the GPL.
5  */
6
7 #include "dm-space-map-common.h"
8 #include "dm-transaction-manager.h"
9 #include "dm-btree-internal.h"
10 #include "dm-persistent-data-internal.h"
11
12 #include <linux/bitops.h>
13 #include <linux/device-mapper.h>
14
15 #define DM_MSG_PREFIX "space map common"
16
17 /*----------------------------------------------------------------*/
18
19 /*
20  * Index validator.
21  */
22 #define INDEX_CSUM_XOR 160478
23
24 static void index_prepare_for_write(struct dm_block_validator *v,
25                                     struct dm_block *b,
26                                     size_t block_size)
27 {
28         struct disk_metadata_index *mi_le = dm_block_data(b);
29
30         mi_le->blocknr = cpu_to_le64(dm_block_location(b));
31         mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
32                                                  block_size - sizeof(__le32),
33                                                  INDEX_CSUM_XOR));
34 }
35
36 static int index_check(struct dm_block_validator *v,
37                        struct dm_block *b,
38                        size_t block_size)
39 {
40         struct disk_metadata_index *mi_le = dm_block_data(b);
41         __le32 csum_disk;
42
43         if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
44                 DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
45                             le64_to_cpu(mi_le->blocknr), dm_block_location(b));
46                 return -ENOTBLK;
47         }
48
49         csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
50                                                block_size - sizeof(__le32),
51                                                INDEX_CSUM_XOR));
52         if (csum_disk != mi_le->csum) {
53                 DMERR_LIMIT("index_check failed: csum %u != wanted %u",
54                             le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
55                 return -EILSEQ;
56         }
57
58         return 0;
59 }
60
61 static struct dm_block_validator index_validator = {
62         .name = "index",
63         .prepare_for_write = index_prepare_for_write,
64         .check = index_check
65 };
66
67 /*----------------------------------------------------------------*/
68
69 /*
70  * Bitmap validator
71  */
72 #define BITMAP_CSUM_XOR 240779
73
74 static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
75                                         struct dm_block *b,
76                                         size_t block_size)
77 {
78         struct disk_bitmap_header *disk_header = dm_block_data(b);
79
80         disk_header->blocknr = cpu_to_le64(dm_block_location(b));
81         disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
82                                                        block_size - sizeof(__le32),
83                                                        BITMAP_CSUM_XOR));
84 }
85
86 static int dm_bitmap_check(struct dm_block_validator *v,
87                            struct dm_block *b,
88                            size_t block_size)
89 {
90         struct disk_bitmap_header *disk_header = dm_block_data(b);
91         __le32 csum_disk;
92
93         if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
94                 DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
95                             le64_to_cpu(disk_header->blocknr), dm_block_location(b));
96                 return -ENOTBLK;
97         }
98
99         csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
100                                                block_size - sizeof(__le32),
101                                                BITMAP_CSUM_XOR));
102         if (csum_disk != disk_header->csum) {
103                 DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
104                             le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
105                 return -EILSEQ;
106         }
107
108         return 0;
109 }
110
111 static struct dm_block_validator dm_sm_bitmap_validator = {
112         .name = "sm_bitmap",
113         .prepare_for_write = dm_bitmap_prepare_for_write,
114         .check = dm_bitmap_check,
115 };
116
117 /*----------------------------------------------------------------*/
118
119 #define ENTRIES_PER_WORD 32
120 #define ENTRIES_SHIFT   5
121
122 static void *dm_bitmap_data(struct dm_block *b)
123 {
124         return dm_block_data(b) + sizeof(struct disk_bitmap_header);
125 }
126
127 #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
128
129 static unsigned dm_bitmap_word_used(void *addr, unsigned b)
130 {
131         __le64 *words_le = addr;
132         __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
133
134         uint64_t bits = le64_to_cpu(*w_le);
135         uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
136
137         return !(~bits & mask);
138 }
139
140 static unsigned sm_lookup_bitmap(void *addr, unsigned b)
141 {
142         __le64 *words_le = addr;
143         __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
144         unsigned hi, lo;
145
146         b = (b & (ENTRIES_PER_WORD - 1)) << 1;
147         hi = !!test_bit_le(b, (void *) w_le);
148         lo = !!test_bit_le(b + 1, (void *) w_le);
149         return (hi << 1) | lo;
150 }
151
152 static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
153 {
154         __le64 *words_le = addr;
155         __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
156
157         b = (b & (ENTRIES_PER_WORD - 1)) << 1;
158
159         if (val & 2)
160                 __set_bit_le(b, (void *) w_le);
161         else
162                 __clear_bit_le(b, (void *) w_le);
163
164         if (val & 1)
165                 __set_bit_le(b + 1, (void *) w_le);
166         else
167                 __clear_bit_le(b + 1, (void *) w_le);
168 }
169
170 static int sm_find_free(void *addr, unsigned begin, unsigned end,
171                         unsigned *result)
172 {
173         while (begin < end) {
174                 if (!(begin & (ENTRIES_PER_WORD - 1)) &&
175                     dm_bitmap_word_used(addr, begin)) {
176                         begin += ENTRIES_PER_WORD;
177                         continue;
178                 }
179
180                 if (!sm_lookup_bitmap(addr, begin)) {
181                         *result = begin;
182                         return 0;
183                 }
184
185                 begin++;
186         }
187
188         return -ENOSPC;
189 }
190
191 /*----------------------------------------------------------------*/
192
193 static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
194 {
195         memset(ll, 0, sizeof(struct ll_disk));
196
197         ll->tm = tm;
198
199         ll->bitmap_info.tm = tm;
200         ll->bitmap_info.levels = 1;
201
202         /*
203          * Because the new bitmap blocks are created via a shadow
204          * operation, the old entry has already had its reference count
205          * decremented and we don't need the btree to do any bookkeeping.
206          */
207         ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
208         ll->bitmap_info.value_type.inc = NULL;
209         ll->bitmap_info.value_type.dec = NULL;
210         ll->bitmap_info.value_type.equal = NULL;
211
212         ll->ref_count_info.tm = tm;
213         ll->ref_count_info.levels = 1;
214         ll->ref_count_info.value_type.size = sizeof(uint32_t);
215         ll->ref_count_info.value_type.inc = NULL;
216         ll->ref_count_info.value_type.dec = NULL;
217         ll->ref_count_info.value_type.equal = NULL;
218
219         ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
220
221         if (ll->block_size > (1 << 30)) {
222                 DMERR("block size too big to hold bitmaps");
223                 return -EINVAL;
224         }
225
226         ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
227                 ENTRIES_PER_BYTE;
228         ll->nr_blocks = 0;
229         ll->bitmap_root = 0;
230         ll->ref_count_root = 0;
231         ll->bitmap_index_changed = false;
232
233         return 0;
234 }
235
236 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
237 {
238         int r;
239         dm_block_t i, nr_blocks, nr_indexes;
240         unsigned old_blocks, blocks;
241
242         nr_blocks = ll->nr_blocks + extra_blocks;
243         old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
244         blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
245
246         nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
247         if (nr_indexes > ll->max_entries(ll)) {
248                 DMERR("space map too large");
249                 return -EINVAL;
250         }
251
252         /*
253          * We need to set this before the dm_tm_new_block() call below.
254          */
255         ll->nr_blocks = nr_blocks;
256         for (i = old_blocks; i < blocks; i++) {
257                 struct dm_block *b;
258                 struct disk_index_entry idx;
259
260                 r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
261                 if (r < 0)
262                         return r;
263
264                 idx.blocknr = cpu_to_le64(dm_block_location(b));
265
266                 dm_tm_unlock(ll->tm, b);
267
268                 idx.nr_free = cpu_to_le32(ll->entries_per_block);
269                 idx.none_free_before = 0;
270
271                 r = ll->save_ie(ll, i, &idx);
272                 if (r < 0)
273                         return r;
274         }
275
276         return 0;
277 }
278
279 int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
280 {
281         int r;
282         dm_block_t index = b;
283         struct disk_index_entry ie_disk;
284         struct dm_block *blk;
285
286         if (b >= ll->nr_blocks) {
287                 DMERR_LIMIT("metadata block out of bounds");
288                 return -EINVAL;
289         }
290
291         b = do_div(index, ll->entries_per_block);
292         r = ll->load_ie(ll, index, &ie_disk);
293         if (r < 0)
294                 return r;
295
296         r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
297                             &dm_sm_bitmap_validator, &blk);
298         if (r < 0)
299                 return r;
300
301         *result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
302
303         dm_tm_unlock(ll->tm, blk);
304
305         return 0;
306 }
307
308 static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
309                                       uint32_t *result)
310 {
311         __le32 le_rc;
312         int r;
313
314         r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
315         if (r < 0)
316                 return r;
317
318         *result = le32_to_cpu(le_rc);
319
320         return r;
321 }
322
323 int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
324 {
325         int r = sm_ll_lookup_bitmap(ll, b, result);
326
327         if (r)
328                 return r;
329
330         if (*result != 3)
331                 return r;
332
333         return sm_ll_lookup_big_ref_count(ll, b, result);
334 }
335
336 int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
337                           dm_block_t end, dm_block_t *result)
338 {
339         int r;
340         struct disk_index_entry ie_disk;
341         dm_block_t i, index_begin = begin;
342         dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
343
344         /*
345          * FIXME: Use shifts
346          */
347         begin = do_div(index_begin, ll->entries_per_block);
348         end = do_div(end, ll->entries_per_block);
349         if (end == 0)
350                 end = ll->entries_per_block;
351
352         for (i = index_begin; i < index_end; i++, begin = 0) {
353                 struct dm_block *blk;
354                 unsigned position;
355                 uint32_t bit_end;
356
357                 r = ll->load_ie(ll, i, &ie_disk);
358                 if (r < 0)
359                         return r;
360
361                 if (le32_to_cpu(ie_disk.nr_free) == 0)
362                         continue;
363
364                 r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
365                                     &dm_sm_bitmap_validator, &blk);
366                 if (r < 0)
367                         return r;
368
369                 bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
370
371                 r = sm_find_free(dm_bitmap_data(blk),
372                                  max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
373                                  bit_end, &position);
374                 if (r == -ENOSPC) {
375                         /*
376                          * This might happen because we started searching
377                          * part way through the bitmap.
378                          */
379                         dm_tm_unlock(ll->tm, blk);
380                         continue;
381                 }
382
383                 dm_tm_unlock(ll->tm, blk);
384
385                 *result = i * ll->entries_per_block + (dm_block_t) position;
386                 return 0;
387         }
388
389         return -ENOSPC;
390 }
391
392 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
393                                  dm_block_t begin, dm_block_t end, dm_block_t *b)
394 {
395         int r;
396         uint32_t count;
397
398         do {
399                 r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
400                 if (r)
401                         break;
402
403                 /* double check this block wasn't used in the old transaction */
404                 if (*b >= old_ll->nr_blocks)
405                         count = 0;
406                 else {
407                         r = sm_ll_lookup(old_ll, *b, &count);
408                         if (r)
409                                 break;
410
411                         if (count)
412                                 begin = *b + 1;
413                 }
414         } while (count);
415
416         return r;
417 }
418
419 /*----------------------------------------------------------------*/
420
421 int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
422                  uint32_t ref_count, int32_t *nr_allocations)
423 {
424         int r;
425         uint32_t bit, old;
426         struct dm_block *nb;
427         dm_block_t index = b;
428         struct disk_index_entry ie_disk;
429         void *bm_le;
430         int inc;
431
432         bit = do_div(index, ll->entries_per_block);
433         r = ll->load_ie(ll, index, &ie_disk);
434         if (r < 0)
435                 return r;
436
437         r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
438                                &dm_sm_bitmap_validator, &nb, &inc);
439         if (r < 0) {
440                 DMERR("dm_tm_shadow_block() failed");
441                 return r;
442         }
443         ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
444         bm_le = dm_bitmap_data(nb);
445
446         old = sm_lookup_bitmap(bm_le, bit);
447         if (old > 2) {
448                 r = sm_ll_lookup_big_ref_count(ll, b, &old);
449                 if (r < 0) {
450                         dm_tm_unlock(ll->tm, nb);
451                         return r;
452                 }
453         }
454
455         if (r) {
456                 dm_tm_unlock(ll->tm, nb);
457                 return r;
458         }
459
460         if (ref_count <= 2) {
461                 sm_set_bitmap(bm_le, bit, ref_count);
462                 dm_tm_unlock(ll->tm, nb);
463
464                 if (old > 2) {
465                         r = dm_btree_remove(&ll->ref_count_info,
466                                             ll->ref_count_root,
467                                             &b, &ll->ref_count_root);
468                         if (r)
469                                 return r;
470                 }
471
472         } else {
473                 __le32 le_rc = cpu_to_le32(ref_count);
474
475                 sm_set_bitmap(bm_le, bit, 3);
476                 dm_tm_unlock(ll->tm, nb);
477
478                 __dm_bless_for_disk(&le_rc);
479                 r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
480                                     &b, &le_rc, &ll->ref_count_root);
481                 if (r < 0) {
482                         DMERR("ref count insert failed");
483                         return r;
484                 }
485         }
486
487         if (ref_count && !old) {
488                 *nr_allocations = 1;
489                 ll->nr_allocated++;
490                 le32_add_cpu(&ie_disk.nr_free, -1);
491                 if (le32_to_cpu(ie_disk.none_free_before) == bit)
492                         ie_disk.none_free_before = cpu_to_le32(bit + 1);
493
494         } else if (old && !ref_count) {
495                 *nr_allocations = -1;
496                 ll->nr_allocated--;
497                 le32_add_cpu(&ie_disk.nr_free, 1);
498                 ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
499         } else
500                 *nr_allocations = 0;
501
502         return ll->save_ie(ll, index, &ie_disk);
503 }
504
505 /*----------------------------------------------------------------*/
506
507 /*
508  * Holds useful intermediate results for the range based inc and dec
509  * operations.
510  */
511 struct inc_context {
512         struct disk_index_entry ie_disk;
513         struct dm_block *bitmap_block;
514         void *bitmap;
515
516         struct dm_block *overflow_leaf;
517 };
518
519 static inline void init_inc_context(struct inc_context *ic)
520 {
521         ic->bitmap_block = NULL;
522         ic->bitmap = NULL;
523         ic->overflow_leaf = NULL;
524 }
525
526 static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
527 {
528         if (ic->bitmap_block)
529                 dm_tm_unlock(ll->tm, ic->bitmap_block);
530         if (ic->overflow_leaf)
531                 dm_tm_unlock(ll->tm, ic->overflow_leaf);
532 }
533
534 static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
535 {
536         exit_inc_context(ll, ic);
537         init_inc_context(ic);
538 }
539
540 /*
541  * Confirms a btree node contains a particular key at an index.
542  */
543 static bool contains_key(struct btree_node *n, uint64_t key, int index)
544 {
545         return index >= 0 &&
546                 index < le32_to_cpu(n->header.nr_entries) &&
547                 le64_to_cpu(n->keys[index]) == key;
548 }
549
550 static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
551 {
552         int r;
553         int index;
554         struct btree_node *n;
555         __le32 *v_ptr;
556         uint32_t rc;
557
558         /*
559          * bitmap_block needs to be unlocked because getting the
560          * overflow_leaf may need to allocate, and thus use the space map.
561          */
562         reset_inc_context(ll, ic);
563
564         r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
565                                      b, &index, &ll->ref_count_root, &ic->overflow_leaf);
566         if (r < 0)
567                 return r;
568
569         n = dm_block_data(ic->overflow_leaf);
570
571         if (!contains_key(n, b, index)) {
572                 DMERR("overflow btree is missing an entry");
573                 return -EINVAL;
574         }
575
576         v_ptr = value_ptr(n, index);
577         rc = le32_to_cpu(*v_ptr) + 1;
578         *v_ptr = cpu_to_le32(rc);
579
580         return 0;
581 }
582
583 static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
584 {
585         int index;
586         struct btree_node *n;
587         __le32 *v_ptr;
588         uint32_t rc;
589
590         /*
591          * Do we already have the correct overflow leaf?
592          */
593         if (ic->overflow_leaf) {
594                 n = dm_block_data(ic->overflow_leaf);
595                 index = lower_bound(n, b);
596                 if (contains_key(n, b, index)) {
597                         v_ptr = value_ptr(n, index);
598                         rc = le32_to_cpu(*v_ptr) + 1;
599                         *v_ptr = cpu_to_le32(rc);
600
601                         return 0;
602                 }
603         }
604
605         return __sm_ll_inc_overflow(ll, b, ic);
606 }
607
608 static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
609 {
610         int r, inc;
611         r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
612                                &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
613         if (r < 0) {
614                 DMERR("dm_tm_shadow_block() failed");
615                 return r;
616         }
617         ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
618         ic->bitmap = dm_bitmap_data(ic->bitmap_block);
619         return 0;
620 }
621
622 /*
623  * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
624  * we can reopen the bitmap with a simple write lock, rather than re calling
625  * dm_tm_shadow_block().
626  */
627 static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
628 {
629         if (!ic->bitmap_block) {
630                 int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
631                                          &dm_sm_bitmap_validator, &ic->bitmap_block);
632                 if (r) {
633                         DMERR("unable to re-get write lock for bitmap");
634                         return r;
635                 }
636                 ic->bitmap = dm_bitmap_data(ic->bitmap_block);
637         }
638
639         return 0;
640 }
641
642 /*
643  * Loops round incrementing entries in a single bitmap.
644  */
645 static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
646                                    uint32_t bit, uint32_t bit_end,
647                                    int32_t *nr_allocations, dm_block_t *new_b,
648                                    struct inc_context *ic)
649 {
650         int r;
651         __le32 le_rc;
652         uint32_t old;
653
654         for (; bit != bit_end; bit++, b++) {
655                 /*
656                  * We only need to drop the bitmap if we need to find a new btree
657                  * leaf for the overflow.  So if it was dropped last iteration,
658                  * we now re-get it.
659                  */
660                 r = ensure_bitmap(ll, ic);
661                 if (r)
662                         return r;
663
664                 old = sm_lookup_bitmap(ic->bitmap, bit);
665                 switch (old) {
666                 case 0:
667                         /* inc bitmap, adjust nr_allocated */
668                         sm_set_bitmap(ic->bitmap, bit, 1);
669                         (*nr_allocations)++;
670                         ll->nr_allocated++;
671                         le32_add_cpu(&ic->ie_disk.nr_free, -1);
672                         if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
673                                 ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
674                         break;
675
676                 case 1:
677                         /* inc bitmap */
678                         sm_set_bitmap(ic->bitmap, bit, 2);
679                         break;
680
681                 case 2:
682                         /* inc bitmap and insert into overflow */
683                         sm_set_bitmap(ic->bitmap, bit, 3);
684                         reset_inc_context(ll, ic);
685
686                         le_rc = cpu_to_le32(3);
687                         __dm_bless_for_disk(&le_rc);
688                         r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
689                                             &b, &le_rc, &ll->ref_count_root);
690                         if (r < 0) {
691                                 DMERR("ref count insert failed");
692                                 return r;
693                         }
694                         break;
695
696                 default:
697                         /*
698                          * inc within the overflow tree only.
699                          */
700                         r = sm_ll_inc_overflow(ll, b, ic);
701                         if (r < 0)
702                                 return r;
703                 }
704         }
705
706         *new_b = b;
707         return 0;
708 }
709
710 /*
711  * Finds a bitmap that contains entries in the block range, and increments
712  * them.
713  */
714 static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
715                        int32_t *nr_allocations, dm_block_t *new_b)
716 {
717         int r;
718         struct inc_context ic;
719         uint32_t bit, bit_end;
720         dm_block_t index = b;
721
722         init_inc_context(&ic);
723
724         bit = do_div(index, ll->entries_per_block);
725         r = ll->load_ie(ll, index, &ic.ie_disk);
726         if (r < 0)
727                 return r;
728
729         r = shadow_bitmap(ll, &ic);
730         if (r)
731                 return r;
732
733         bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
734         r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
735
736         exit_inc_context(ll, &ic);
737
738         if (r)
739                 return r;
740
741         return ll->save_ie(ll, index, &ic.ie_disk);
742 }
743
744 int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
745               int32_t *nr_allocations)
746 {
747         *nr_allocations = 0;
748         while (b != e) {
749                 int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
750                 if (r)
751                         return r;
752         }
753
754         return 0;
755 }
756
757 /*----------------------------------------------------------------*/
758
759 static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
760                                 struct inc_context *ic)
761 {
762         reset_inc_context(ll, ic);
763         return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
764                                &b, &ll->ref_count_root);
765 }
766
767 static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
768                                 struct inc_context *ic, uint32_t *old_rc)
769 {
770         int r;
771         int index = -1;
772         struct btree_node *n;
773         __le32 *v_ptr;
774         uint32_t rc;
775
776         reset_inc_context(ll, ic);
777         r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
778                                      b, &index, &ll->ref_count_root, &ic->overflow_leaf);
779         if (r < 0)
780                 return r;
781
782         n = dm_block_data(ic->overflow_leaf);
783
784         if (!contains_key(n, b, index)) {
785                 DMERR("overflow btree is missing an entry");
786                 return -EINVAL;
787         }
788
789         v_ptr = value_ptr(n, index);
790         rc = le32_to_cpu(*v_ptr);
791         *old_rc = rc;
792
793         if (rc == 3) {
794                 return __sm_ll_del_overflow(ll, b, ic);
795         } else {
796                 rc--;
797                 *v_ptr = cpu_to_le32(rc);
798                 return 0;
799         }
800 }
801
802 static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
803                               struct inc_context *ic, uint32_t *old_rc)
804 {
805         /*
806          * Do we already have the correct overflow leaf?
807          */
808         if (ic->overflow_leaf) {
809                 int index;
810                 struct btree_node *n;
811                 __le32 *v_ptr;
812                 uint32_t rc;
813
814                 n = dm_block_data(ic->overflow_leaf);
815                 index = lower_bound(n, b);
816                 if (contains_key(n, b, index)) {
817                         v_ptr = value_ptr(n, index);
818                         rc = le32_to_cpu(*v_ptr);
819                         *old_rc = rc;
820
821                         if (rc > 3) {
822                                 rc--;
823                                 *v_ptr = cpu_to_le32(rc);
824                                 return 0;
825                         } else {
826                                 return __sm_ll_del_overflow(ll, b, ic);
827                         }
828
829                 }
830         }
831
832         return __sm_ll_dec_overflow(ll, b, ic, old_rc);
833 }
834
835 /*
836  * Loops round incrementing entries in a single bitmap.
837  */
838 static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
839                                    uint32_t bit, uint32_t bit_end,
840                                    struct inc_context *ic,
841                                    int32_t *nr_allocations, dm_block_t *new_b)
842 {
843         int r;
844         uint32_t old;
845
846         for (; bit != bit_end; bit++, b++) {
847                 /*
848                  * We only need to drop the bitmap if we need to find a new btree
849                  * leaf for the overflow.  So if it was dropped last iteration,
850                  * we now re-get it.
851                  */
852                 r = ensure_bitmap(ll, ic);
853                 if (r)
854                         return r;
855
856                 old = sm_lookup_bitmap(ic->bitmap, bit);
857                 switch (old) {
858                 case 0:
859                         DMERR("unable to decrement block");
860                         return -EINVAL;
861
862                 case 1:
863                         /* dec bitmap */
864                         sm_set_bitmap(ic->bitmap, bit, 0);
865                         (*nr_allocations)--;
866                         ll->nr_allocated--;
867                         le32_add_cpu(&ic->ie_disk.nr_free, 1);
868                         ic->ie_disk.none_free_before =
869                                 cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
870                         break;
871
872                 case 2:
873                         /* dec bitmap and insert into overflow */
874                         sm_set_bitmap(ic->bitmap, bit, 1);
875                         break;
876
877                 case 3:
878                         r = sm_ll_dec_overflow(ll, b, ic, &old);
879                         if (r < 0)
880                                 return r;
881
882                         if (old == 3) {
883                                 r = ensure_bitmap(ll, ic);
884                                 if (r)
885                                         return r;
886
887                                 sm_set_bitmap(ic->bitmap, bit, 2);
888                         }
889                         break;
890                 }
891         }
892
893         *new_b = b;
894         return 0;
895 }
896
897 static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
898                        int32_t *nr_allocations, dm_block_t *new_b)
899 {
900         int r;
901         uint32_t bit, bit_end;
902         struct inc_context ic;
903         dm_block_t index = b;
904
905         init_inc_context(&ic);
906
907         bit = do_div(index, ll->entries_per_block);
908         r = ll->load_ie(ll, index, &ic.ie_disk);
909         if (r < 0)
910                 return r;
911
912         r = shadow_bitmap(ll, &ic);
913         if (r)
914                 return r;
915
916         bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
917         r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
918         exit_inc_context(ll, &ic);
919
920         if (r)
921                 return r;
922
923         return ll->save_ie(ll, index, &ic.ie_disk);
924 }
925
926 int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
927               int32_t *nr_allocations)
928 {
929         *nr_allocations = 0;
930         while (b != e) {
931                 int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
932                 if (r)
933                         return r;
934         }
935
936         return 0;
937 }
938
939 /*----------------------------------------------------------------*/
940
941 int sm_ll_commit(struct ll_disk *ll)
942 {
943         int r = 0;
944
945         if (ll->bitmap_index_changed) {
946                 r = ll->commit(ll);
947                 if (!r)
948                         ll->bitmap_index_changed = false;
949         }
950
951         return r;
952 }
953
954 /*----------------------------------------------------------------*/
955
956 static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
957                                struct disk_index_entry *ie)
958 {
959         memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
960         return 0;
961 }
962
963 static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
964                                struct disk_index_entry *ie)
965 {
966         ll->bitmap_index_changed = true;
967         memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
968         return 0;
969 }
970
971 static int metadata_ll_init_index(struct ll_disk *ll)
972 {
973         int r;
974         struct dm_block *b;
975
976         r = dm_tm_new_block(ll->tm, &index_validator, &b);
977         if (r < 0)
978                 return r;
979
980         ll->bitmap_root = dm_block_location(b);
981
982         dm_tm_unlock(ll->tm, b);
983
984         return 0;
985 }
986
987 static int metadata_ll_open(struct ll_disk *ll)
988 {
989         int r;
990         struct dm_block *block;
991
992         r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
993                             &index_validator, &block);
994         if (r)
995                 return r;
996
997         memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
998         dm_tm_unlock(ll->tm, block);
999
1000         return 0;
1001 }
1002
1003 static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
1004 {
1005         return MAX_METADATA_BITMAPS;
1006 }
1007
1008 static int metadata_ll_commit(struct ll_disk *ll)
1009 {
1010         int r, inc;
1011         struct dm_block *b;
1012
1013         r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1014         if (r)
1015                 return r;
1016
1017         memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1018         ll->bitmap_root = dm_block_location(b);
1019
1020         dm_tm_unlock(ll->tm, b);
1021
1022         return 0;
1023 }
1024
1025 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1026 {
1027         int r;
1028
1029         r = sm_ll_init(ll, tm);
1030         if (r < 0)
1031                 return r;
1032
1033         ll->load_ie = metadata_ll_load_ie;
1034         ll->save_ie = metadata_ll_save_ie;
1035         ll->init_index = metadata_ll_init_index;
1036         ll->open_index = metadata_ll_open;
1037         ll->max_entries = metadata_ll_max_entries;
1038         ll->commit = metadata_ll_commit;
1039
1040         ll->nr_blocks = 0;
1041         ll->nr_allocated = 0;
1042
1043         r = ll->init_index(ll);
1044         if (r < 0)
1045                 return r;
1046
1047         r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1048         if (r < 0)
1049                 return r;
1050
1051         return 0;
1052 }
1053
1054 int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1055                         void *root_le, size_t len)
1056 {
1057         int r;
1058         struct disk_sm_root smr;
1059
1060         if (len < sizeof(struct disk_sm_root)) {
1061                 DMERR("sm_metadata root too small");
1062                 return -ENOMEM;
1063         }
1064
1065         /*
1066          * We don't know the alignment of the root_le buffer, so need to
1067          * copy into a new structure.
1068          */
1069         memcpy(&smr, root_le, sizeof(smr));
1070
1071         r = sm_ll_init(ll, tm);
1072         if (r < 0)
1073                 return r;
1074
1075         ll->load_ie = metadata_ll_load_ie;
1076         ll->save_ie = metadata_ll_save_ie;
1077         ll->init_index = metadata_ll_init_index;
1078         ll->open_index = metadata_ll_open;
1079         ll->max_entries = metadata_ll_max_entries;
1080         ll->commit = metadata_ll_commit;
1081
1082         ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1083         ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1084         ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1085         ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
1086
1087         return ll->open_index(ll);
1088 }
1089
1090 /*----------------------------------------------------------------*/
1091
1092 static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
1093 {
1094         iec->dirty = false;
1095         __dm_bless_for_disk(iec->ie);
1096         return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1097                                &iec->index, &iec->ie, &ll->bitmap_root);
1098 }
1099
1100 static inline unsigned hash_index(dm_block_t index)
1101 {
1102         return dm_hash_block(index, IE_CACHE_MASK);
1103 }
1104
1105 static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1106                            struct disk_index_entry *ie)
1107 {
1108         int r;
1109         unsigned h = hash_index(index);
1110         struct ie_cache *iec = ll->ie_cache + h;
1111
1112         if (iec->valid) {
1113                 if (iec->index == index) {
1114                         memcpy(ie, &iec->ie, sizeof(*ie));
1115                         return 0;
1116                 }
1117
1118                 if (iec->dirty) {
1119                         r = ie_cache_writeback(ll, iec);
1120                         if (r)
1121                                 return r;
1122                 }
1123         }
1124
1125         r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1126         if (!r) {
1127                 iec->valid = true;
1128                 iec->dirty = false;
1129                 iec->index = index;
1130                 memcpy(&iec->ie, ie, sizeof(*ie));
1131         }
1132
1133         return r;
1134 }
1135
1136 static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1137                            struct disk_index_entry *ie)
1138 {
1139         int r;
1140         unsigned h = hash_index(index);
1141         struct ie_cache *iec = ll->ie_cache + h;
1142
1143         ll->bitmap_index_changed = true;
1144         if (iec->valid) {
1145                 if (iec->index == index) {
1146                         memcpy(&iec->ie, ie, sizeof(*ie));
1147                         iec->dirty = true;
1148                         return 0;
1149                 }
1150
1151                 if (iec->dirty) {
1152                         r = ie_cache_writeback(ll, iec);
1153                         if (r)
1154                                 return r;
1155                 }
1156         }
1157
1158         iec->valid = true;
1159         iec->dirty = true;
1160         iec->index = index;
1161         memcpy(&iec->ie, ie, sizeof(*ie));
1162         return 0;
1163 }
1164
1165 static int disk_ll_init_index(struct ll_disk *ll)
1166 {
1167         unsigned i;
1168         for (i = 0; i < IE_CACHE_SIZE; i++) {
1169                 struct ie_cache *iec = ll->ie_cache + i;
1170                 iec->valid = false;
1171                 iec->dirty = false;
1172         }
1173         return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1174 }
1175
1176 static int disk_ll_open(struct ll_disk *ll)
1177 {
1178         return 0;
1179 }
1180
1181 static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1182 {
1183         return -1ULL;
1184 }
1185
1186 static int disk_ll_commit(struct ll_disk *ll)
1187 {
1188         int r = 0;
1189         unsigned i;
1190
1191         for (i = 0; i < IE_CACHE_SIZE; i++) {
1192                 struct ie_cache *iec = ll->ie_cache + i;
1193                 if (iec->valid && iec->dirty)
1194                         r = ie_cache_writeback(ll, iec);
1195         }
1196
1197         return r;
1198 }
1199
1200 int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1201 {
1202         int r;
1203
1204         r = sm_ll_init(ll, tm);
1205         if (r < 0)
1206                 return r;
1207
1208         ll->load_ie = disk_ll_load_ie;
1209         ll->save_ie = disk_ll_save_ie;
1210         ll->init_index = disk_ll_init_index;
1211         ll->open_index = disk_ll_open;
1212         ll->max_entries = disk_ll_max_entries;
1213         ll->commit = disk_ll_commit;
1214
1215         ll->nr_blocks = 0;
1216         ll->nr_allocated = 0;
1217
1218         r = ll->init_index(ll);
1219         if (r < 0)
1220                 return r;
1221
1222         r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1223         if (r < 0)
1224                 return r;
1225
1226         return 0;
1227 }
1228
1229 int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1230                     void *root_le, size_t len)
1231 {
1232         int r;
1233         struct disk_sm_root *smr = root_le;
1234
1235         if (len < sizeof(struct disk_sm_root)) {
1236                 DMERR("sm_metadata root too small");
1237                 return -ENOMEM;
1238         }
1239
1240         r = sm_ll_init(ll, tm);
1241         if (r < 0)
1242                 return r;
1243
1244         ll->load_ie = disk_ll_load_ie;
1245         ll->save_ie = disk_ll_save_ie;
1246         ll->init_index = disk_ll_init_index;
1247         ll->open_index = disk_ll_open;
1248         ll->max_entries = disk_ll_max_entries;
1249         ll->commit = disk_ll_commit;
1250
1251         ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1252         ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1253         ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1254         ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1255
1256         return ll->open_index(ll);
1257 }
1258
1259 /*----------------------------------------------------------------*/