2 * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved.
3 * Copyright (C) 2016-2017 Milan Broz
4 * Copyright (C) 2016-2017 Mikulas Patocka
6 * This file is released under the GPL.
9 #include "dm-bio-record.h"
11 #include <linux/compiler.h>
12 #include <linux/module.h>
13 #include <linux/device-mapper.h>
14 #include <linux/dm-io.h>
15 #include <linux/vmalloc.h>
16 #include <linux/sort.h>
17 #include <linux/rbtree.h>
18 #include <linux/delay.h>
19 #include <linux/random.h>
20 #include <crypto/hash.h>
21 #include <crypto/skcipher.h>
22 #include <linux/async_tx.h>
23 #include <linux/dm-bufio.h>
25 #define DM_MSG_PREFIX "integrity"
27 #define DEFAULT_INTERLEAVE_SECTORS 32768
28 #define DEFAULT_JOURNAL_SIZE_FACTOR 7
29 #define DEFAULT_BUFFER_SECTORS 128
30 #define DEFAULT_JOURNAL_WATERMARK 50
31 #define DEFAULT_SYNC_MSEC 10000
32 #define DEFAULT_MAX_JOURNAL_SECTORS 131072
33 #define MIN_LOG2_INTERLEAVE_SECTORS 3
34 #define MAX_LOG2_INTERLEAVE_SECTORS 31
35 #define METADATA_WORKQUEUE_MAX_ACTIVE 16
36 #define RECALC_SECTORS 8192
37 #define RECALC_WRITE_SUPER 16
40 * Warning - DEBUG_PRINT prints security-sensitive data to the log,
41 * so it should not be enabled in the official kernel
44 //#define INTERNAL_VERIFY
50 #define SB_MAGIC "integrt"
51 #define SB_VERSION_1 1
52 #define SB_VERSION_2 2
54 #define MAX_SECTORS_PER_BLOCK 8
59 __u8 log2_interleave_sectors;
60 __u16 integrity_tag_size;
61 __u32 journal_sections;
62 __u64 provided_data_sectors; /* userspace uses this value */
64 __u8 log2_sectors_per_block;
69 #define SB_FLAG_HAVE_JOURNAL_MAC 0x1
70 #define SB_FLAG_RECALCULATING 0x2
72 #define JOURNAL_ENTRY_ROUNDUP 8
74 typedef __u64 commit_id_t;
75 #define JOURNAL_MAC_PER_SECTOR 8
77 struct journal_entry {
85 commit_id_t last_bytes[0];
89 #define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block])
91 #if BITS_PER_LONG == 64
92 #define journal_entry_set_sector(je, x) do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0)
93 #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
94 #elif defined(CONFIG_LBDAF)
95 #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0)
96 #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
98 #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32(0)); } while (0)
99 #define journal_entry_get_sector(je) le32_to_cpu((je)->u.s.sector_lo)
101 #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1))
102 #define journal_entry_set_unused(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-1)); } while (0)
103 #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2))
104 #define journal_entry_set_inprogress(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-2)); } while (0)
106 #define JOURNAL_BLOCK_SECTORS 8
107 #define JOURNAL_SECTOR_DATA ((1 << SECTOR_SHIFT) - sizeof(commit_id_t))
108 #define JOURNAL_MAC_SIZE (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS)
110 struct journal_sector {
111 __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR];
112 __u8 mac[JOURNAL_MAC_PER_SECTOR];
113 commit_id_t commit_id;
116 #define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK]))
118 #define METADATA_PADDING_SECTORS 8
120 #define N_COMMIT_IDS 4
122 static unsigned char prev_commit_seq(unsigned char seq)
124 return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS;
127 static unsigned char next_commit_seq(unsigned char seq)
129 return (seq + 1) % N_COMMIT_IDS;
133 * In-memory structures
136 struct journal_node {
148 struct dm_integrity_c {
150 struct dm_dev *meta_dev;
154 mempool_t journal_io_mempool;
155 struct dm_io_client *io;
156 struct dm_bufio_client *bufio;
157 struct workqueue_struct *metadata_wq;
158 struct superblock *sb;
159 unsigned journal_pages;
160 struct page_list *journal;
161 struct page_list *journal_io;
162 struct page_list *journal_xor;
164 struct crypto_skcipher *journal_crypt;
165 struct scatterlist **journal_scatterlist;
166 struct scatterlist **journal_io_scatterlist;
167 struct skcipher_request **sk_requests;
169 struct crypto_shash *journal_mac;
171 struct journal_node *journal_tree;
172 struct rb_root journal_tree_root;
174 sector_t provided_data_sectors;
176 unsigned short journal_entry_size;
177 unsigned char journal_entries_per_sector;
178 unsigned char journal_section_entries;
179 unsigned short journal_section_sectors;
180 unsigned journal_sections;
181 unsigned journal_entries;
182 sector_t data_device_sectors;
183 sector_t meta_device_sectors;
184 unsigned initial_sectors;
185 unsigned metadata_run;
186 __s8 log2_metadata_run;
187 __u8 log2_buffer_sectors;
188 __u8 sectors_per_block;
194 struct crypto_shash *internal_hash;
196 struct dm_target *ti;
198 /* these variables are locked with endio_wait.lock */
199 struct rb_root in_progress;
200 struct list_head wait_list;
201 wait_queue_head_t endio_wait;
202 struct workqueue_struct *wait_wq;
203 struct workqueue_struct *offload_wq;
205 unsigned char commit_seq;
206 commit_id_t commit_ids[N_COMMIT_IDS];
208 unsigned committed_section;
209 unsigned n_committed_sections;
211 unsigned uncommitted_section;
212 unsigned n_uncommitted_sections;
214 unsigned free_section;
215 unsigned char free_section_entry;
216 unsigned free_sectors;
218 unsigned free_sectors_threshold;
220 struct workqueue_struct *commit_wq;
221 struct work_struct commit_work;
223 struct workqueue_struct *writer_wq;
224 struct work_struct writer_work;
226 struct workqueue_struct *recalc_wq;
227 struct work_struct recalc_work;
231 struct bio_list flush_bio_list;
233 unsigned long autocommit_jiffies;
234 struct timer_list autocommit_timer;
235 unsigned autocommit_msec;
237 wait_queue_head_t copy_to_journal_wait;
239 struct completion crypto_backoff;
241 bool journal_uptodate;
243 bool legacy_recalculate;
245 struct alg_spec internal_hash_alg;
246 struct alg_spec journal_crypt_alg;
247 struct alg_spec journal_mac_alg;
249 atomic64_t number_of_mismatches;
252 struct dm_integrity_range {
253 sector_t logical_sector;
259 struct task_struct *task;
260 struct list_head wait_entry;
265 struct dm_integrity_io {
266 struct work_struct work;
268 struct dm_integrity_c *ic;
272 struct dm_integrity_range range;
274 sector_t metadata_block;
275 unsigned metadata_offset;
278 blk_status_t bi_status;
280 struct completion *completion;
282 struct dm_bio_details bio_details;
285 struct journal_completion {
286 struct dm_integrity_c *ic;
288 struct completion comp;
292 struct dm_integrity_range range;
293 struct journal_completion *comp;
296 static struct kmem_cache *journal_io_cache;
298 #define JOURNAL_IO_MEMPOOL 32
301 #define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__)
302 static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...)
311 pr_cont(" %02x", *bytes);
317 #define DEBUG_bytes(bytes, len, msg, ...) __DEBUG_bytes(bytes, len, KERN_DEBUG msg, ##__VA_ARGS__)
319 #define DEBUG_print(x, ...) do { } while (0)
320 #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0)
324 * DM Integrity profile, protection is performed layer above (dm-crypt)
326 static const struct blk_integrity_profile dm_integrity_profile = {
327 .name = "DM-DIF-EXT-TAG",
332 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
333 static void integrity_bio_wait(struct work_struct *w);
334 static void dm_integrity_dtr(struct dm_target *ti);
336 static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err)
339 atomic64_inc(&ic->number_of_mismatches);
340 if (!cmpxchg(&ic->failed, 0, err))
341 DMERR("Error on %s: %d", msg, err);
344 static int dm_integrity_failed(struct dm_integrity_c *ic)
346 return READ_ONCE(ic->failed);
349 static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic)
351 if ((ic->internal_hash_alg.key || ic->journal_mac_alg.key) &&
352 !ic->legacy_recalculate)
357 static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i,
358 unsigned j, unsigned char seq)
361 * Xor the number with section and sector, so that if a piece of
362 * journal is written at wrong place, it is detected.
364 return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j);
367 static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector,
368 sector_t *area, sector_t *offset)
371 __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors;
372 *area = data_sector >> log2_interleave_sectors;
373 *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1);
376 *offset = data_sector;
380 #define sector_to_block(ic, n) \
382 BUG_ON((n) & (unsigned)((ic)->sectors_per_block - 1)); \
383 (n) >>= (ic)->sb->log2_sectors_per_block; \
386 static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area,
387 sector_t offset, unsigned *metadata_offset)
392 ms = area << ic->sb->log2_interleave_sectors;
393 if (likely(ic->log2_metadata_run >= 0))
394 ms += area << ic->log2_metadata_run;
396 ms += area * ic->metadata_run;
397 ms >>= ic->log2_buffer_sectors;
399 sector_to_block(ic, offset);
401 if (likely(ic->log2_tag_size >= 0)) {
402 ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size);
403 mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
405 ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors);
406 mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
408 *metadata_offset = mo;
412 static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset)
419 result = area << ic->sb->log2_interleave_sectors;
420 if (likely(ic->log2_metadata_run >= 0))
421 result += (area + 1) << ic->log2_metadata_run;
423 result += (area + 1) * ic->metadata_run;
425 result += (sector_t)ic->initial_sectors + offset;
431 static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr)
433 if (unlikely(*sec_ptr >= ic->journal_sections))
434 *sec_ptr -= ic->journal_sections;
437 static void sb_set_version(struct dm_integrity_c *ic)
439 if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
440 ic->sb->version = SB_VERSION_2;
442 ic->sb->version = SB_VERSION_1;
445 static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
447 struct dm_io_request io_req;
448 struct dm_io_region io_loc;
451 io_req.bi_op_flags = op_flags;
452 io_req.mem.type = DM_IO_KMEM;
453 io_req.mem.ptr.addr = ic->sb;
454 io_req.notify.fn = NULL;
455 io_req.client = ic->io;
456 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
457 io_loc.sector = ic->start;
458 io_loc.count = SB_SECTORS;
460 return dm_io(&io_req, 1, &io_loc, NULL);
463 static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset,
464 bool e, const char *function)
466 #if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY)
467 unsigned limit = e ? ic->journal_section_entries : ic->journal_section_sectors;
469 if (unlikely(section >= ic->journal_sections) ||
470 unlikely(offset >= limit)) {
471 printk(KERN_CRIT "%s: invalid access at (%u,%u), limit (%u,%u)\n",
472 function, section, offset, ic->journal_sections, limit);
478 static void page_list_location(struct dm_integrity_c *ic, unsigned section, unsigned offset,
479 unsigned *pl_index, unsigned *pl_offset)
483 access_journal_check(ic, section, offset, false, "page_list_location");
485 sector = section * ic->journal_section_sectors + offset;
487 *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
488 *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
491 static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl,
492 unsigned section, unsigned offset, unsigned *n_sectors)
494 unsigned pl_index, pl_offset;
497 page_list_location(ic, section, offset, &pl_index, &pl_offset);
500 *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT;
502 va = lowmem_page_address(pl[pl_index].page);
504 return (struct journal_sector *)(va + pl_offset);
507 static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset)
509 return access_page_list(ic, ic->journal, section, offset, NULL);
512 static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned section, unsigned n)
514 unsigned rel_sector, offset;
515 struct journal_sector *js;
517 access_journal_check(ic, section, n, true, "access_journal_entry");
519 rel_sector = n % JOURNAL_BLOCK_SECTORS;
520 offset = n / JOURNAL_BLOCK_SECTORS;
522 js = access_journal(ic, section, rel_sector);
523 return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size);
526 static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned section, unsigned n)
528 n <<= ic->sb->log2_sectors_per_block;
530 n += JOURNAL_BLOCK_SECTORS;
532 access_journal_check(ic, section, n, false, "access_journal_data");
534 return access_journal(ic, section, n);
537 static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result[JOURNAL_MAC_SIZE])
539 SHASH_DESC_ON_STACK(desc, ic->journal_mac);
543 desc->tfm = ic->journal_mac;
546 r = crypto_shash_init(desc);
548 dm_integrity_io_error(ic, "crypto_shash_init", r);
552 for (j = 0; j < ic->journal_section_entries; j++) {
553 struct journal_entry *je = access_journal_entry(ic, section, j);
554 r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof je->u.sector);
556 dm_integrity_io_error(ic, "crypto_shash_update", r);
561 size = crypto_shash_digestsize(ic->journal_mac);
563 if (likely(size <= JOURNAL_MAC_SIZE)) {
564 r = crypto_shash_final(desc, result);
566 dm_integrity_io_error(ic, "crypto_shash_final", r);
569 memset(result + size, 0, JOURNAL_MAC_SIZE - size);
572 r = crypto_shash_final(desc, digest);
574 dm_integrity_io_error(ic, "crypto_shash_final", r);
577 memcpy(result, digest, JOURNAL_MAC_SIZE);
582 memset(result, 0, JOURNAL_MAC_SIZE);
585 static void rw_section_mac(struct dm_integrity_c *ic, unsigned section, bool wr)
587 __u8 result[JOURNAL_MAC_SIZE];
590 if (!ic->journal_mac)
593 section_mac(ic, section, result);
595 for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) {
596 struct journal_sector *js = access_journal(ic, section, j);
599 memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR);
601 if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR))
602 dm_integrity_io_error(ic, "journal mac", -EILSEQ);
607 static void complete_journal_op(void *context)
609 struct journal_completion *comp = context;
610 BUG_ON(!atomic_read(&comp->in_flight));
611 if (likely(atomic_dec_and_test(&comp->in_flight)))
612 complete(&comp->comp);
615 static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
616 unsigned n_sections, struct journal_completion *comp)
618 struct async_submit_ctl submit;
619 size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT;
620 unsigned pl_index, pl_offset, section_index;
621 struct page_list *source_pl, *target_pl;
623 if (likely(encrypt)) {
624 source_pl = ic->journal;
625 target_pl = ic->journal_io;
627 source_pl = ic->journal_io;
628 target_pl = ic->journal;
631 page_list_location(ic, section, 0, &pl_index, &pl_offset);
633 atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight);
635 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL);
637 section_index = pl_index;
641 struct page *src_pages[2];
642 struct page *dst_page;
644 while (unlikely(pl_index == section_index)) {
647 rw_section_mac(ic, section, true);
652 page_list_location(ic, section, 0, §ion_index, &dummy);
655 this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset);
656 dst_page = target_pl[pl_index].page;
657 src_pages[0] = source_pl[pl_index].page;
658 src_pages[1] = ic->journal_xor[pl_index].page;
660 async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit);
664 n_bytes -= this_step;
669 async_tx_issue_pending_all();
672 static void complete_journal_encrypt(struct crypto_async_request *req, int err)
674 struct journal_completion *comp = req->data;
676 if (likely(err == -EINPROGRESS)) {
677 complete(&comp->ic->crypto_backoff);
680 dm_integrity_io_error(comp->ic, "asynchronous encrypt", err);
682 complete_journal_op(comp);
685 static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp)
688 skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
689 complete_journal_encrypt, comp);
691 r = crypto_skcipher_encrypt(req);
693 r = crypto_skcipher_decrypt(req);
696 if (likely(r == -EINPROGRESS))
698 if (likely(r == -EBUSY)) {
699 wait_for_completion(&comp->ic->crypto_backoff);
700 reinit_completion(&comp->ic->crypto_backoff);
703 dm_integrity_io_error(comp->ic, "encrypt", r);
707 static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
708 unsigned n_sections, struct journal_completion *comp)
710 struct scatterlist **source_sg;
711 struct scatterlist **target_sg;
713 atomic_add(2, &comp->in_flight);
715 if (likely(encrypt)) {
716 source_sg = ic->journal_scatterlist;
717 target_sg = ic->journal_io_scatterlist;
719 source_sg = ic->journal_io_scatterlist;
720 target_sg = ic->journal_scatterlist;
724 struct skcipher_request *req;
729 rw_section_mac(ic, section, true);
731 req = ic->sk_requests[section];
732 ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
735 memcpy(iv, iv + ivsize, ivsize);
737 req->src = source_sg[section];
738 req->dst = target_sg[section];
740 if (unlikely(do_crypt(encrypt, req, comp)))
741 atomic_inc(&comp->in_flight);
745 } while (n_sections);
747 atomic_dec(&comp->in_flight);
748 complete_journal_op(comp);
751 static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
752 unsigned n_sections, struct journal_completion *comp)
755 return xor_journal(ic, encrypt, section, n_sections, comp);
757 return crypt_journal(ic, encrypt, section, n_sections, comp);
760 static void complete_journal_io(unsigned long error, void *context)
762 struct journal_completion *comp = context;
763 if (unlikely(error != 0))
764 dm_integrity_io_error(comp->ic, "writing journal", -EIO);
765 complete_journal_op(comp);
768 static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section,
769 unsigned n_sections, struct journal_completion *comp)
771 struct dm_io_request io_req;
772 struct dm_io_region io_loc;
773 unsigned sector, n_sectors, pl_index, pl_offset;
776 if (unlikely(dm_integrity_failed(ic))) {
778 complete_journal_io(-1UL, comp);
782 sector = section * ic->journal_section_sectors;
783 n_sectors = n_sections * ic->journal_section_sectors;
785 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
786 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
789 io_req.bi_op_flags = op_flags;
790 io_req.mem.type = DM_IO_PAGE_LIST;
792 io_req.mem.ptr.pl = &ic->journal_io[pl_index];
794 io_req.mem.ptr.pl = &ic->journal[pl_index];
795 io_req.mem.offset = pl_offset;
796 if (likely(comp != NULL)) {
797 io_req.notify.fn = complete_journal_io;
798 io_req.notify.context = comp;
800 io_req.notify.fn = NULL;
802 io_req.client = ic->io;
803 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
804 io_loc.sector = ic->start + SB_SECTORS + sector;
805 io_loc.count = n_sectors;
807 r = dm_io(&io_req, 1, &io_loc, NULL);
809 dm_integrity_io_error(ic, op == REQ_OP_READ ? "reading journal" : "writing journal", r);
811 WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
812 complete_journal_io(-1UL, comp);
817 static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections)
819 struct journal_completion io_comp;
820 struct journal_completion crypt_comp_1;
821 struct journal_completion crypt_comp_2;
825 init_completion(&io_comp.comp);
827 if (commit_start + commit_sections <= ic->journal_sections) {
828 io_comp.in_flight = (atomic_t)ATOMIC_INIT(1);
829 if (ic->journal_io) {
830 crypt_comp_1.ic = ic;
831 init_completion(&crypt_comp_1.comp);
832 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
833 encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1);
834 wait_for_completion_io(&crypt_comp_1.comp);
836 for (i = 0; i < commit_sections; i++)
837 rw_section_mac(ic, commit_start + i, true);
839 rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start,
840 commit_sections, &io_comp);
843 io_comp.in_flight = (atomic_t)ATOMIC_INIT(2);
844 to_end = ic->journal_sections - commit_start;
845 if (ic->journal_io) {
846 crypt_comp_1.ic = ic;
847 init_completion(&crypt_comp_1.comp);
848 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
849 encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1);
850 if (try_wait_for_completion(&crypt_comp_1.comp)) {
851 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
852 reinit_completion(&crypt_comp_1.comp);
853 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
854 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1);
855 wait_for_completion_io(&crypt_comp_1.comp);
857 crypt_comp_2.ic = ic;
858 init_completion(&crypt_comp_2.comp);
859 crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0);
860 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2);
861 wait_for_completion_io(&crypt_comp_1.comp);
862 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
863 wait_for_completion_io(&crypt_comp_2.comp);
866 for (i = 0; i < to_end; i++)
867 rw_section_mac(ic, commit_start + i, true);
868 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
869 for (i = 0; i < commit_sections - to_end; i++)
870 rw_section_mac(ic, i, true);
872 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, 0, commit_sections - to_end, &io_comp);
875 wait_for_completion_io(&io_comp.comp);
878 static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset,
879 unsigned n_sectors, sector_t target, io_notify_fn fn, void *data)
881 struct dm_io_request io_req;
882 struct dm_io_region io_loc;
884 unsigned sector, pl_index, pl_offset;
886 BUG_ON((target | n_sectors | offset) & (unsigned)(ic->sectors_per_block - 1));
888 if (unlikely(dm_integrity_failed(ic))) {
893 sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset;
895 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
896 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
898 io_req.bi_op = REQ_OP_WRITE;
899 io_req.bi_op_flags = 0;
900 io_req.mem.type = DM_IO_PAGE_LIST;
901 io_req.mem.ptr.pl = &ic->journal[pl_index];
902 io_req.mem.offset = pl_offset;
903 io_req.notify.fn = fn;
904 io_req.notify.context = data;
905 io_req.client = ic->io;
906 io_loc.bdev = ic->dev->bdev;
907 io_loc.sector = target;
908 io_loc.count = n_sectors;
910 r = dm_io(&io_req, 1, &io_loc, NULL);
912 WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
917 static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2)
919 return range1->logical_sector < range2->logical_sector + range2->n_sectors &&
920 range1->logical_sector + range1->n_sectors > range2->logical_sector;
923 static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting)
925 struct rb_node **n = &ic->in_progress.rb_node;
926 struct rb_node *parent;
928 BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1));
930 if (likely(check_waiting)) {
931 struct dm_integrity_range *range;
932 list_for_each_entry(range, &ic->wait_list, wait_entry) {
933 if (unlikely(ranges_overlap(range, new_range)))
941 struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node);
944 if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector) {
945 n = &range->node.rb_left;
946 } else if (new_range->logical_sector >= range->logical_sector + range->n_sectors) {
947 n = &range->node.rb_right;
953 rb_link_node(&new_range->node, parent, n);
954 rb_insert_color(&new_range->node, &ic->in_progress);
959 static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range)
961 rb_erase(&range->node, &ic->in_progress);
962 while (unlikely(!list_empty(&ic->wait_list))) {
963 struct dm_integrity_range *last_range =
964 list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry);
965 struct task_struct *last_range_task;
966 last_range_task = last_range->task;
967 list_del(&last_range->wait_entry);
968 if (!add_new_range(ic, last_range, false)) {
969 last_range->task = last_range_task;
970 list_add(&last_range->wait_entry, &ic->wait_list);
973 last_range->waiting = false;
974 wake_up_process(last_range_task);
978 static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range)
982 spin_lock_irqsave(&ic->endio_wait.lock, flags);
983 remove_range_unlocked(ic, range);
984 spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
987 static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
989 new_range->waiting = true;
990 list_add_tail(&new_range->wait_entry, &ic->wait_list);
991 new_range->task = current;
993 __set_current_state(TASK_UNINTERRUPTIBLE);
994 spin_unlock_irq(&ic->endio_wait.lock);
996 spin_lock_irq(&ic->endio_wait.lock);
997 } while (unlikely(new_range->waiting));
1000 static void init_journal_node(struct journal_node *node)
1002 RB_CLEAR_NODE(&node->node);
1003 node->sector = (sector_t)-1;
1006 static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector)
1008 struct rb_node **link;
1009 struct rb_node *parent;
1011 node->sector = sector;
1012 BUG_ON(!RB_EMPTY_NODE(&node->node));
1014 link = &ic->journal_tree_root.rb_node;
1018 struct journal_node *j;
1020 j = container_of(parent, struct journal_node, node);
1021 if (sector < j->sector)
1022 link = &j->node.rb_left;
1024 link = &j->node.rb_right;
1027 rb_link_node(&node->node, parent, link);
1028 rb_insert_color(&node->node, &ic->journal_tree_root);
1031 static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node)
1033 BUG_ON(RB_EMPTY_NODE(&node->node));
1034 rb_erase(&node->node, &ic->journal_tree_root);
1035 init_journal_node(node);
1038 #define NOT_FOUND (-1U)
1040 static unsigned find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector)
1042 struct rb_node *n = ic->journal_tree_root.rb_node;
1043 unsigned found = NOT_FOUND;
1044 *next_sector = (sector_t)-1;
1046 struct journal_node *j = container_of(n, struct journal_node, node);
1047 if (sector == j->sector) {
1048 found = j - ic->journal_tree;
1050 if (sector < j->sector) {
1051 *next_sector = j->sector;
1052 n = j->node.rb_left;
1054 n = j->node.rb_right;
1061 static bool test_journal_node(struct dm_integrity_c *ic, unsigned pos, sector_t sector)
1063 struct journal_node *node, *next_node;
1064 struct rb_node *next;
1066 if (unlikely(pos >= ic->journal_entries))
1068 node = &ic->journal_tree[pos];
1069 if (unlikely(RB_EMPTY_NODE(&node->node)))
1071 if (unlikely(node->sector != sector))
1074 next = rb_next(&node->node);
1075 if (unlikely(!next))
1078 next_node = container_of(next, struct journal_node, node);
1079 return next_node->sector != sector;
1082 static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node)
1084 struct rb_node *next;
1085 struct journal_node *next_node;
1086 unsigned next_section;
1088 BUG_ON(RB_EMPTY_NODE(&node->node));
1090 next = rb_next(&node->node);
1091 if (unlikely(!next))
1094 next_node = container_of(next, struct journal_node, node);
1096 if (next_node->sector != node->sector)
1099 next_section = (unsigned)(next_node - ic->journal_tree) / ic->journal_section_entries;
1100 if (next_section >= ic->committed_section &&
1101 next_section < ic->committed_section + ic->n_committed_sections)
1103 if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections)
1113 static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block,
1114 unsigned *metadata_offset, unsigned total_size, int op)
1117 unsigned char *data, *dp;
1118 struct dm_buffer *b;
1122 r = dm_integrity_failed(ic);
1126 data = dm_bufio_read(ic->bufio, *metadata_block, &b);
1127 if (unlikely(IS_ERR(data)))
1128 return PTR_ERR(data);
1130 to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
1131 dp = data + *metadata_offset;
1132 if (op == TAG_READ) {
1133 memcpy(tag, dp, to_copy);
1134 } else if (op == TAG_WRITE) {
1135 memcpy(dp, tag, to_copy);
1136 dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
1138 /* e.g.: op == TAG_CMP */
1139 if (unlikely(memcmp(dp, tag, to_copy))) {
1142 for (i = 0; i < to_copy; i++) {
1143 if (dp[i] != tag[i])
1147 dm_bufio_release(b);
1151 dm_bufio_release(b);
1154 *metadata_offset += to_copy;
1155 if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
1156 (*metadata_block)++;
1157 *metadata_offset = 0;
1159 total_size -= to_copy;
1160 } while (unlikely(total_size));
1165 struct flush_request {
1166 struct dm_io_request io_req;
1167 struct dm_io_region io_reg;
1168 struct dm_integrity_c *ic;
1169 struct completion comp;
1172 static void flush_notify(unsigned long error, void *fr_)
1174 struct flush_request *fr = fr_;
1175 if (unlikely(error != 0))
1176 dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO);
1177 complete(&fr->comp);
1180 static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data)
1184 struct flush_request fr;
1189 fr.io_req.bi_op = REQ_OP_WRITE,
1190 fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
1191 fr.io_req.mem.type = DM_IO_KMEM,
1192 fr.io_req.mem.ptr.addr = NULL,
1193 fr.io_req.notify.fn = flush_notify,
1194 fr.io_req.notify.context = &fr;
1195 fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio),
1196 fr.io_reg.bdev = ic->dev->bdev,
1197 fr.io_reg.sector = 0,
1198 fr.io_reg.count = 0,
1200 init_completion(&fr.comp);
1201 r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL);
1205 r = dm_bufio_write_dirty_buffers(ic->bufio);
1207 dm_integrity_io_error(ic, "writing tags", r);
1210 wait_for_completion(&fr.comp);
1213 static void sleep_on_endio_wait(struct dm_integrity_c *ic)
1215 DECLARE_WAITQUEUE(wait, current);
1216 __add_wait_queue(&ic->endio_wait, &wait);
1217 __set_current_state(TASK_UNINTERRUPTIBLE);
1218 spin_unlock_irq(&ic->endio_wait.lock);
1220 spin_lock_irq(&ic->endio_wait.lock);
1221 __remove_wait_queue(&ic->endio_wait, &wait);
1224 static void autocommit_fn(struct timer_list *t)
1226 struct dm_integrity_c *ic = from_timer(ic, t, autocommit_timer);
1228 if (likely(!dm_integrity_failed(ic)))
1229 queue_work(ic->commit_wq, &ic->commit_work);
1232 static void schedule_autocommit(struct dm_integrity_c *ic)
1234 if (!timer_pending(&ic->autocommit_timer))
1235 mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies);
1238 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
1241 unsigned long flags;
1243 spin_lock_irqsave(&ic->endio_wait.lock, flags);
1244 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1245 bio_list_add(&ic->flush_bio_list, bio);
1246 spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1248 queue_work(ic->commit_wq, &ic->commit_work);
1251 static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
1253 int r = dm_integrity_failed(ic);
1254 if (unlikely(r) && !bio->bi_status)
1255 bio->bi_status = errno_to_blk_status(r);
1259 static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
1261 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1263 if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
1264 submit_flush_bio(ic, dio);
1269 static void dec_in_flight(struct dm_integrity_io *dio)
1271 if (atomic_dec_and_test(&dio->in_flight)) {
1272 struct dm_integrity_c *ic = dio->ic;
1275 remove_range(ic, &dio->range);
1277 if (unlikely(dio->write))
1278 schedule_autocommit(ic);
1280 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1282 if (unlikely(dio->bi_status) && !bio->bi_status)
1283 bio->bi_status = dio->bi_status;
1284 if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
1285 dio->range.logical_sector += dio->range.n_sectors;
1286 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
1287 INIT_WORK(&dio->work, integrity_bio_wait);
1288 queue_work(ic->offload_wq, &dio->work);
1291 do_endio_flush(ic, dio);
1295 static void integrity_end_io(struct bio *bio)
1297 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
1299 dm_bio_restore(&dio->bio_details, bio);
1300 if (bio->bi_integrity)
1301 bio->bi_opf |= REQ_INTEGRITY;
1303 if (dio->completion)
1304 complete(dio->completion);
1309 static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector,
1310 const char *data, char *result)
1312 __u64 sector_le = cpu_to_le64(sector);
1313 SHASH_DESC_ON_STACK(req, ic->internal_hash);
1315 unsigned digest_size;
1317 req->tfm = ic->internal_hash;
1320 r = crypto_shash_init(req);
1321 if (unlikely(r < 0)) {
1322 dm_integrity_io_error(ic, "crypto_shash_init", r);
1326 r = crypto_shash_update(req, (const __u8 *)§or_le, sizeof sector_le);
1327 if (unlikely(r < 0)) {
1328 dm_integrity_io_error(ic, "crypto_shash_update", r);
1332 r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT);
1333 if (unlikely(r < 0)) {
1334 dm_integrity_io_error(ic, "crypto_shash_update", r);
1338 r = crypto_shash_final(req, result);
1339 if (unlikely(r < 0)) {
1340 dm_integrity_io_error(ic, "crypto_shash_final", r);
1344 digest_size = crypto_shash_digestsize(ic->internal_hash);
1345 if (unlikely(digest_size < ic->tag_size))
1346 memset(result + digest_size, 0, ic->tag_size - digest_size);
1351 /* this shouldn't happen anyway, the hash functions have no reason to fail */
1352 get_random_bytes(result, ic->tag_size);
1355 static void integrity_metadata(struct work_struct *w)
1357 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
1358 struct dm_integrity_c *ic = dio->ic;
1362 if (ic->internal_hash) {
1363 struct bvec_iter iter;
1365 unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
1366 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1368 unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
1369 char checksums_onstack[ic->tag_size + extra_space];
1370 unsigned sectors_to_process = dio->range.n_sectors;
1371 sector_t sector = dio->range.logical_sector;
1373 if (unlikely(ic->mode == 'R'))
1376 checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
1377 GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
1379 checksums = checksums_onstack;
1381 __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
1383 char *mem, *checksums_ptr;
1386 mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
1388 checksums_ptr = checksums;
1390 integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
1391 checksums_ptr += ic->tag_size;
1392 sectors_to_process -= ic->sectors_per_block;
1393 pos += ic->sectors_per_block << SECTOR_SHIFT;
1394 sector += ic->sectors_per_block;
1395 } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
1398 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
1399 checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
1402 DMERR_LIMIT("Checksum failed at sector 0x%llx",
1403 (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
1405 atomic64_inc(&ic->number_of_mismatches);
1407 if (likely(checksums != checksums_onstack))
1412 if (!sectors_to_process)
1415 if (unlikely(pos < bv.bv_len)) {
1416 bv.bv_offset += pos;
1422 if (likely(checksums != checksums_onstack))
1425 struct bio_integrity_payload *bip = dio->bio_details.bi_integrity;
1429 struct bvec_iter iter;
1430 unsigned data_to_process = dio->range.n_sectors;
1431 sector_to_block(ic, data_to_process);
1432 data_to_process *= ic->tag_size;
1434 bip_for_each_vec(biv, bip, iter) {
1438 BUG_ON(PageHighMem(biv.bv_page));
1439 tag = lowmem_page_address(biv.bv_page) + biv.bv_offset;
1440 this_len = min(biv.bv_len, data_to_process);
1441 r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset,
1442 this_len, !dio->write ? TAG_READ : TAG_WRITE);
1445 data_to_process -= this_len;
1446 if (!data_to_process)
1455 dio->bi_status = errno_to_blk_status(r);
1459 static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1461 struct dm_integrity_c *ic = ti->private;
1462 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
1463 struct bio_integrity_payload *bip;
1465 sector_t area, offset;
1470 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
1471 submit_flush_bio(ic, dio);
1472 return DM_MAPIO_SUBMITTED;
1475 dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
1476 dio->write = bio_op(bio) == REQ_OP_WRITE;
1477 dio->fua = dio->write && bio->bi_opf & REQ_FUA;
1478 if (unlikely(dio->fua)) {
1480 * Don't pass down the FUA flag because we have to flush
1481 * disk cache anyway.
1483 bio->bi_opf &= ~REQ_FUA;
1485 if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) {
1486 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
1487 (unsigned long long)dio->range.logical_sector, bio_sectors(bio),
1488 (unsigned long long)ic->provided_data_sectors);
1489 return DM_MAPIO_KILL;
1491 if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
1492 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
1493 ic->sectors_per_block,
1494 (unsigned long long)dio->range.logical_sector, bio_sectors(bio));
1495 return DM_MAPIO_KILL;
1498 if (ic->sectors_per_block > 1) {
1499 struct bvec_iter iter;
1501 bio_for_each_segment(bv, bio, iter) {
1502 if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
1503 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
1504 bv.bv_offset, bv.bv_len, ic->sectors_per_block);
1505 return DM_MAPIO_KILL;
1510 bip = bio_integrity(bio);
1511 if (!ic->internal_hash) {
1513 unsigned wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block;
1514 if (ic->log2_tag_size >= 0)
1515 wanted_tag_size <<= ic->log2_tag_size;
1517 wanted_tag_size *= ic->tag_size;
1518 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
1519 DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
1520 return DM_MAPIO_KILL;
1524 if (unlikely(bip != NULL)) {
1525 DMERR("Unexpected integrity data when using internal hash");
1526 return DM_MAPIO_KILL;
1530 if (unlikely(ic->mode == 'R') && unlikely(dio->write))
1531 return DM_MAPIO_KILL;
1533 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
1534 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
1535 bio->bi_iter.bi_sector = get_data_sector(ic, area, offset);
1537 dm_integrity_map_continue(dio, true);
1538 return DM_MAPIO_SUBMITTED;
1541 static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
1542 unsigned journal_section, unsigned journal_entry)
1544 struct dm_integrity_c *ic = dio->ic;
1545 sector_t logical_sector;
1548 logical_sector = dio->range.logical_sector;
1549 n_sectors = dio->range.n_sectors;
1551 struct bio_vec bv = bio_iovec(bio);
1554 if (unlikely(bv.bv_len >> SECTOR_SHIFT > n_sectors))
1555 bv.bv_len = n_sectors << SECTOR_SHIFT;
1556 n_sectors -= bv.bv_len >> SECTOR_SHIFT;
1557 bio_advance_iter(bio, &bio->bi_iter, bv.bv_len);
1559 mem = kmap_atomic(bv.bv_page);
1560 if (likely(dio->write))
1561 flush_dcache_page(bv.bv_page);
1564 struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry);
1566 if (unlikely(!dio->write)) {
1567 struct journal_sector *js;
1571 if (unlikely(journal_entry_is_inprogress(je))) {
1572 flush_dcache_page(bv.bv_page);
1575 __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je));
1579 BUG_ON(journal_entry_get_sector(je) != logical_sector);
1580 js = access_journal_data(ic, journal_section, journal_entry);
1581 mem_ptr = mem + bv.bv_offset;
1584 memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA);
1585 *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s];
1587 mem_ptr += 1 << SECTOR_SHIFT;
1588 } while (++s < ic->sectors_per_block);
1589 #ifdef INTERNAL_VERIFY
1590 if (ic->internal_hash) {
1591 char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
1593 integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
1594 if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
1595 DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx",
1596 (unsigned long long)logical_sector);
1602 if (!ic->internal_hash) {
1603 struct bio_integrity_payload *bip = bio_integrity(bio);
1604 unsigned tag_todo = ic->tag_size;
1605 char *tag_ptr = journal_entry_tag(ic, je);
1608 struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
1609 unsigned tag_now = min(biv.bv_len, tag_todo);
1611 BUG_ON(PageHighMem(biv.bv_page));
1612 tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset;
1613 if (likely(dio->write))
1614 memcpy(tag_ptr, tag_addr, tag_now);
1616 memcpy(tag_addr, tag_ptr, tag_now);
1617 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, tag_now);
1619 tag_todo -= tag_now;
1620 } while (unlikely(tag_todo)); else {
1621 if (likely(dio->write))
1622 memset(tag_ptr, 0, tag_todo);
1626 if (likely(dio->write)) {
1627 struct journal_sector *js;
1630 js = access_journal_data(ic, journal_section, journal_entry);
1631 memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT);
1635 je->last_bytes[s] = js[s].commit_id;
1636 } while (++s < ic->sectors_per_block);
1638 if (ic->internal_hash) {
1639 unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
1640 if (unlikely(digest_size > ic->tag_size)) {
1641 char checksums_onstack[digest_size];
1642 integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack);
1643 memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size);
1645 integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je));
1648 journal_entry_set_sector(je, logical_sector);
1650 logical_sector += ic->sectors_per_block;
1653 if (unlikely(journal_entry == ic->journal_section_entries)) {
1656 wraparound_section(ic, &journal_section);
1659 bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT;
1660 } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT);
1662 if (unlikely(!dio->write))
1663 flush_dcache_page(bv.bv_page);
1665 } while (n_sectors);
1667 if (likely(dio->write)) {
1669 if (unlikely(waitqueue_active(&ic->copy_to_journal_wait)))
1670 wake_up(&ic->copy_to_journal_wait);
1671 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) {
1672 queue_work(ic->commit_wq, &ic->commit_work);
1674 schedule_autocommit(ic);
1677 remove_range(ic, &dio->range);
1680 if (unlikely(bio->bi_iter.bi_size)) {
1681 sector_t area, offset;
1683 dio->range.logical_sector = logical_sector;
1684 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
1685 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
1692 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map)
1694 struct dm_integrity_c *ic = dio->ic;
1695 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1696 unsigned journal_section, journal_entry;
1697 unsigned journal_read_pos;
1698 struct completion read_comp;
1699 bool need_sync_io = ic->internal_hash && !dio->write;
1701 if (need_sync_io && from_map) {
1702 INIT_WORK(&dio->work, integrity_bio_wait);
1703 queue_work(ic->offload_wq, &dio->work);
1708 spin_lock_irq(&ic->endio_wait.lock);
1710 if (unlikely(dm_integrity_failed(ic))) {
1711 spin_unlock_irq(&ic->endio_wait.lock);
1715 dio->range.n_sectors = bio_sectors(bio);
1716 journal_read_pos = NOT_FOUND;
1717 if (likely(ic->mode == 'J')) {
1719 unsigned next_entry, i, pos;
1720 unsigned ws, we, range_sectors;
1722 dio->range.n_sectors = min(dio->range.n_sectors,
1723 ic->free_sectors << ic->sb->log2_sectors_per_block);
1724 if (unlikely(!dio->range.n_sectors)) {
1726 goto offload_to_thread;
1727 sleep_on_endio_wait(ic);
1730 range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block;
1731 ic->free_sectors -= range_sectors;
1732 journal_section = ic->free_section;
1733 journal_entry = ic->free_section_entry;
1735 next_entry = ic->free_section_entry + range_sectors;
1736 ic->free_section_entry = next_entry % ic->journal_section_entries;
1737 ic->free_section += next_entry / ic->journal_section_entries;
1738 ic->n_uncommitted_sections += next_entry / ic->journal_section_entries;
1739 wraparound_section(ic, &ic->free_section);
1741 pos = journal_section * ic->journal_section_entries + journal_entry;
1742 ws = journal_section;
1746 struct journal_entry *je;
1748 add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i);
1750 if (unlikely(pos >= ic->journal_entries))
1753 je = access_journal_entry(ic, ws, we);
1754 BUG_ON(!journal_entry_is_unused(je));
1755 journal_entry_set_inprogress(je);
1757 if (unlikely(we == ic->journal_section_entries)) {
1760 wraparound_section(ic, &ws);
1762 } while ((i += ic->sectors_per_block) < dio->range.n_sectors);
1764 spin_unlock_irq(&ic->endio_wait.lock);
1765 goto journal_read_write;
1767 sector_t next_sector;
1768 journal_read_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
1769 if (likely(journal_read_pos == NOT_FOUND)) {
1770 if (unlikely(dio->range.n_sectors > next_sector - dio->range.logical_sector))
1771 dio->range.n_sectors = next_sector - dio->range.logical_sector;
1774 unsigned jp = journal_read_pos + 1;
1775 for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) {
1776 if (!test_journal_node(ic, jp, dio->range.logical_sector + i))
1779 dio->range.n_sectors = i;
1783 if (unlikely(!add_new_range(ic, &dio->range, true))) {
1785 * We must not sleep in the request routine because it could
1786 * stall bios on current->bio_list.
1787 * So, we offload the bio to a workqueue if we have to sleep.
1791 spin_unlock_irq(&ic->endio_wait.lock);
1792 INIT_WORK(&dio->work, integrity_bio_wait);
1793 queue_work(ic->wait_wq, &dio->work);
1796 if (journal_read_pos != NOT_FOUND)
1797 dio->range.n_sectors = ic->sectors_per_block;
1798 wait_and_add_new_range(ic, &dio->range);
1800 * wait_and_add_new_range drops the spinlock, so the journal
1801 * may have been changed arbitrarily. We need to recheck.
1802 * To simplify the code, we restrict I/O size to just one block.
1804 if (journal_read_pos != NOT_FOUND) {
1805 sector_t next_sector;
1806 unsigned new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
1807 if (unlikely(new_pos != journal_read_pos)) {
1808 remove_range_unlocked(ic, &dio->range);
1813 spin_unlock_irq(&ic->endio_wait.lock);
1815 if (unlikely(journal_read_pos != NOT_FOUND)) {
1816 journal_section = journal_read_pos / ic->journal_section_entries;
1817 journal_entry = journal_read_pos % ic->journal_section_entries;
1818 goto journal_read_write;
1821 dio->in_flight = (atomic_t)ATOMIC_INIT(2);
1824 init_completion(&read_comp);
1825 dio->completion = &read_comp;
1827 dio->completion = NULL;
1829 dm_bio_record(&dio->bio_details, bio);
1830 bio_set_dev(bio, ic->dev->bdev);
1831 bio->bi_integrity = NULL;
1832 bio->bi_opf &= ~REQ_INTEGRITY;
1833 bio->bi_end_io = integrity_end_io;
1834 bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
1836 generic_make_request(bio);
1839 wait_for_completion_io(&read_comp);
1840 if (unlikely(ic->recalc_wq != NULL) &&
1841 ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
1842 dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector))
1844 if (likely(!bio->bi_status))
1845 integrity_metadata(&dio->work);
1851 INIT_WORK(&dio->work, integrity_metadata);
1852 queue_work(ic->metadata_wq, &dio->work);
1858 if (unlikely(__journal_read_write(dio, bio, journal_section, journal_entry)))
1861 do_endio_flush(ic, dio);
1865 static void integrity_bio_wait(struct work_struct *w)
1867 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
1869 dm_integrity_map_continue(dio, false);
1872 static void pad_uncommitted(struct dm_integrity_c *ic)
1874 if (ic->free_section_entry) {
1875 ic->free_sectors -= ic->journal_section_entries - ic->free_section_entry;
1876 ic->free_section_entry = 0;
1878 wraparound_section(ic, &ic->free_section);
1879 ic->n_uncommitted_sections++;
1881 WARN_ON(ic->journal_sections * ic->journal_section_entries !=
1882 (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors);
1885 static void integrity_commit(struct work_struct *w)
1887 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, commit_work);
1888 unsigned commit_start, commit_sections;
1890 struct bio *flushes;
1892 del_timer(&ic->autocommit_timer);
1894 spin_lock_irq(&ic->endio_wait.lock);
1895 flushes = bio_list_get(&ic->flush_bio_list);
1896 if (unlikely(ic->mode != 'J')) {
1897 spin_unlock_irq(&ic->endio_wait.lock);
1898 dm_integrity_flush_buffers(ic, true);
1899 goto release_flush_bios;
1902 pad_uncommitted(ic);
1903 commit_start = ic->uncommitted_section;
1904 commit_sections = ic->n_uncommitted_sections;
1905 spin_unlock_irq(&ic->endio_wait.lock);
1907 if (!commit_sections)
1908 goto release_flush_bios;
1911 for (n = 0; n < commit_sections; n++) {
1912 for (j = 0; j < ic->journal_section_entries; j++) {
1913 struct journal_entry *je;
1914 je = access_journal_entry(ic, i, j);
1915 io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je));
1917 for (j = 0; j < ic->journal_section_sectors; j++) {
1918 struct journal_sector *js;
1919 js = access_journal(ic, i, j);
1920 js->commit_id = dm_integrity_commit_id(ic, i, j, ic->commit_seq);
1923 if (unlikely(i >= ic->journal_sections))
1924 ic->commit_seq = next_commit_seq(ic->commit_seq);
1925 wraparound_section(ic, &i);
1929 write_journal(ic, commit_start, commit_sections);
1931 spin_lock_irq(&ic->endio_wait.lock);
1932 ic->uncommitted_section += commit_sections;
1933 wraparound_section(ic, &ic->uncommitted_section);
1934 ic->n_uncommitted_sections -= commit_sections;
1935 ic->n_committed_sections += commit_sections;
1936 spin_unlock_irq(&ic->endio_wait.lock);
1938 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold)
1939 queue_work(ic->writer_wq, &ic->writer_work);
1943 struct bio *next = flushes->bi_next;
1944 flushes->bi_next = NULL;
1945 do_endio(ic, flushes);
1950 static void complete_copy_from_journal(unsigned long error, void *context)
1952 struct journal_io *io = context;
1953 struct journal_completion *comp = io->comp;
1954 struct dm_integrity_c *ic = comp->ic;
1955 remove_range(ic, &io->range);
1956 mempool_free(io, &ic->journal_io_mempool);
1957 if (unlikely(error != 0))
1958 dm_integrity_io_error(ic, "copying from journal", -EIO);
1959 complete_journal_op(comp);
1962 static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js,
1963 struct journal_entry *je)
1967 js->commit_id = je->last_bytes[s];
1969 } while (++s < ic->sectors_per_block);
1972 static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
1973 unsigned write_sections, bool from_replay)
1976 struct journal_completion comp;
1977 struct blk_plug plug;
1979 blk_start_plug(&plug);
1982 comp.in_flight = (atomic_t)ATOMIC_INIT(1);
1983 init_completion(&comp.comp);
1986 for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) {
1987 #ifndef INTERNAL_VERIFY
1988 if (unlikely(from_replay))
1990 rw_section_mac(ic, i, false);
1991 for (j = 0; j < ic->journal_section_entries; j++) {
1992 struct journal_entry *je = access_journal_entry(ic, i, j);
1993 sector_t sec, area, offset;
1994 unsigned k, l, next_loop;
1995 sector_t metadata_block;
1996 unsigned metadata_offset;
1997 struct journal_io *io;
1999 if (journal_entry_is_unused(je))
2001 BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay);
2002 sec = journal_entry_get_sector(je);
2003 if (unlikely(from_replay)) {
2004 if (unlikely(sec & (unsigned)(ic->sectors_per_block - 1))) {
2005 dm_integrity_io_error(ic, "invalid sector in journal", -EIO);
2006 sec &= ~(sector_t)(ic->sectors_per_block - 1);
2009 get_area_and_offset(ic, sec, &area, &offset);
2010 restore_last_bytes(ic, access_journal_data(ic, i, j), je);
2011 for (k = j + 1; k < ic->journal_section_entries; k++) {
2012 struct journal_entry *je2 = access_journal_entry(ic, i, k);
2013 sector_t sec2, area2, offset2;
2014 if (journal_entry_is_unused(je2))
2016 BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay);
2017 sec2 = journal_entry_get_sector(je2);
2018 get_area_and_offset(ic, sec2, &area2, &offset2);
2019 if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block))
2021 restore_last_bytes(ic, access_journal_data(ic, i, k), je2);
2025 io = mempool_alloc(&ic->journal_io_mempool, GFP_NOIO);
2027 io->range.logical_sector = sec;
2028 io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
2030 spin_lock_irq(&ic->endio_wait.lock);
2031 if (unlikely(!add_new_range(ic, &io->range, true)))
2032 wait_and_add_new_range(ic, &io->range);
2034 if (likely(!from_replay)) {
2035 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries];
2037 /* don't write if there is newer committed sector */
2038 while (j < k && find_newer_committed_node(ic, §ion_node[j])) {
2039 struct journal_entry *je2 = access_journal_entry(ic, i, j);
2041 journal_entry_set_unused(je2);
2042 remove_journal_node(ic, §ion_node[j]);
2044 sec += ic->sectors_per_block;
2045 offset += ic->sectors_per_block;
2047 while (j < k && find_newer_committed_node(ic, §ion_node[k - 1])) {
2048 struct journal_entry *je2 = access_journal_entry(ic, i, k - 1);
2050 journal_entry_set_unused(je2);
2051 remove_journal_node(ic, §ion_node[k - 1]);
2055 remove_range_unlocked(ic, &io->range);
2056 spin_unlock_irq(&ic->endio_wait.lock);
2057 mempool_free(io, &ic->journal_io_mempool);
2060 for (l = j; l < k; l++) {
2061 remove_journal_node(ic, §ion_node[l]);
2064 spin_unlock_irq(&ic->endio_wait.lock);
2066 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
2067 for (l = j; l < k; l++) {
2069 struct journal_entry *je2 = access_journal_entry(ic, i, l);
2072 #ifndef INTERNAL_VERIFY
2073 unlikely(from_replay) &&
2075 ic->internal_hash) {
2076 char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
2078 integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
2079 (char *)access_journal_data(ic, i, l), test_tag);
2080 if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size)))
2081 dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ);
2084 journal_entry_set_unused(je2);
2085 r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset,
2086 ic->tag_size, TAG_WRITE);
2088 dm_integrity_io_error(ic, "reading tags", r);
2092 atomic_inc(&comp.in_flight);
2093 copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block,
2094 (k - j) << ic->sb->log2_sectors_per_block,
2095 get_data_sector(ic, area, offset),
2096 complete_copy_from_journal, io);
2102 dm_bufio_write_dirty_buffers_async(ic->bufio);
2104 blk_finish_plug(&plug);
2106 complete_journal_op(&comp);
2107 wait_for_completion_io(&comp.comp);
2109 dm_integrity_flush_buffers(ic, true);
2112 static void integrity_writer(struct work_struct *w)
2114 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, writer_work);
2115 unsigned write_start, write_sections;
2117 unsigned prev_free_sectors;
2119 /* the following test is not needed, but it tests the replay code */
2120 if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev)
2123 spin_lock_irq(&ic->endio_wait.lock);
2124 write_start = ic->committed_section;
2125 write_sections = ic->n_committed_sections;
2126 spin_unlock_irq(&ic->endio_wait.lock);
2128 if (!write_sections)
2131 do_journal_write(ic, write_start, write_sections, false);
2133 spin_lock_irq(&ic->endio_wait.lock);
2135 ic->committed_section += write_sections;
2136 wraparound_section(ic, &ic->committed_section);
2137 ic->n_committed_sections -= write_sections;
2139 prev_free_sectors = ic->free_sectors;
2140 ic->free_sectors += write_sections * ic->journal_section_entries;
2141 if (unlikely(!prev_free_sectors))
2142 wake_up_locked(&ic->endio_wait);
2144 spin_unlock_irq(&ic->endio_wait.lock);
2147 static void recalc_write_super(struct dm_integrity_c *ic)
2151 dm_integrity_flush_buffers(ic, false);
2152 if (dm_integrity_failed(ic))
2156 r = sync_rw_sb(ic, REQ_OP_WRITE, 0);
2158 dm_integrity_io_error(ic, "writing superblock", r);
2161 static void integrity_recalc(struct work_struct *w)
2163 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work);
2164 struct dm_integrity_range range;
2165 struct dm_io_request io_req;
2166 struct dm_io_region io_loc;
2167 sector_t area, offset;
2168 sector_t metadata_block;
2169 unsigned metadata_offset;
2173 unsigned super_counter = 0;
2175 spin_lock_irq(&ic->endio_wait.lock);
2179 if (unlikely(dm_post_suspending(ic->ti)))
2182 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
2183 if (unlikely(range.logical_sector >= ic->provided_data_sectors))
2186 get_area_and_offset(ic, range.logical_sector, &area, &offset);
2187 range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector);
2189 range.n_sectors = min(range.n_sectors, (1U << ic->sb->log2_interleave_sectors) - (unsigned)offset);
2191 if (unlikely(!add_new_range(ic, &range, true)))
2192 wait_and_add_new_range(ic, &range);
2194 spin_unlock_irq(&ic->endio_wait.lock);
2196 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
2197 recalc_write_super(ic);
2201 if (unlikely(dm_integrity_failed(ic)))
2204 io_req.bi_op = REQ_OP_READ;
2205 io_req.bi_op_flags = 0;
2206 io_req.mem.type = DM_IO_VMA;
2207 io_req.mem.ptr.addr = ic->recalc_buffer;
2208 io_req.notify.fn = NULL;
2209 io_req.client = ic->io;
2210 io_loc.bdev = ic->dev->bdev;
2211 io_loc.sector = get_data_sector(ic, area, offset);
2212 io_loc.count = range.n_sectors;
2214 r = dm_io(&io_req, 1, &io_loc, NULL);
2216 dm_integrity_io_error(ic, "reading data", r);
2220 t = ic->recalc_tags;
2221 for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) {
2222 integrity_sector_checksum(ic, range.logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
2226 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
2228 r = dm_integrity_rw_tag(ic, ic->recalc_tags, &metadata_block, &metadata_offset, t - ic->recalc_tags, TAG_WRITE);
2230 dm_integrity_io_error(ic, "writing tags", r);
2234 spin_lock_irq(&ic->endio_wait.lock);
2235 remove_range_unlocked(ic, &range);
2236 ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors);
2240 remove_range(ic, &range);
2244 spin_unlock_irq(&ic->endio_wait.lock);
2246 recalc_write_super(ic);
2249 static void init_journal(struct dm_integrity_c *ic, unsigned start_section,
2250 unsigned n_sections, unsigned char commit_seq)
2257 for (n = 0; n < n_sections; n++) {
2258 i = start_section + n;
2259 wraparound_section(ic, &i);
2260 for (j = 0; j < ic->journal_section_sectors; j++) {
2261 struct journal_sector *js = access_journal(ic, i, j);
2262 memset(&js->entries, 0, JOURNAL_SECTOR_DATA);
2263 js->commit_id = dm_integrity_commit_id(ic, i, j, commit_seq);
2265 for (j = 0; j < ic->journal_section_entries; j++) {
2266 struct journal_entry *je = access_journal_entry(ic, i, j);
2267 journal_entry_set_unused(je);
2271 write_journal(ic, start_section, n_sections);
2274 static int find_commit_seq(struct dm_integrity_c *ic, unsigned i, unsigned j, commit_id_t id)
2277 for (k = 0; k < N_COMMIT_IDS; k++) {
2278 if (dm_integrity_commit_id(ic, i, j, k) == id)
2281 dm_integrity_io_error(ic, "journal commit id", -EIO);
2285 static void replay_journal(struct dm_integrity_c *ic)
2288 bool used_commit_ids[N_COMMIT_IDS];
2289 unsigned max_commit_id_sections[N_COMMIT_IDS];
2290 unsigned write_start, write_sections;
2291 unsigned continue_section;
2293 unsigned char unused, last_used, want_commit_seq;
2295 if (ic->mode == 'R')
2298 if (ic->journal_uptodate)
2304 if (!ic->just_formatted) {
2305 DEBUG_print("reading journal\n");
2306 rw_journal(ic, REQ_OP_READ, 0, 0, ic->journal_sections, NULL);
2308 DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal");
2309 if (ic->journal_io) {
2310 struct journal_completion crypt_comp;
2312 init_completion(&crypt_comp.comp);
2313 crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0);
2314 encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp);
2315 wait_for_completion(&crypt_comp.comp);
2317 DEBUG_bytes(lowmem_page_address(ic->journal[0].page), 64, "decrypted journal");
2320 if (dm_integrity_failed(ic))
2323 journal_empty = true;
2324 memset(used_commit_ids, 0, sizeof used_commit_ids);
2325 memset(max_commit_id_sections, 0, sizeof max_commit_id_sections);
2326 for (i = 0; i < ic->journal_sections; i++) {
2327 for (j = 0; j < ic->journal_section_sectors; j++) {
2329 struct journal_sector *js = access_journal(ic, i, j);
2330 k = find_commit_seq(ic, i, j, js->commit_id);
2333 used_commit_ids[k] = true;
2334 max_commit_id_sections[k] = i;
2336 if (journal_empty) {
2337 for (j = 0; j < ic->journal_section_entries; j++) {
2338 struct journal_entry *je = access_journal_entry(ic, i, j);
2339 if (!journal_entry_is_unused(je)) {
2340 journal_empty = false;
2347 if (!used_commit_ids[N_COMMIT_IDS - 1]) {
2348 unused = N_COMMIT_IDS - 1;
2349 while (unused && !used_commit_ids[unused - 1])
2352 for (unused = 0; unused < N_COMMIT_IDS; unused++)
2353 if (!used_commit_ids[unused])
2355 if (unused == N_COMMIT_IDS) {
2356 dm_integrity_io_error(ic, "journal commit ids", -EIO);
2360 DEBUG_print("first unused commit seq %d [%d,%d,%d,%d]\n",
2361 unused, used_commit_ids[0], used_commit_ids[1],
2362 used_commit_ids[2], used_commit_ids[3]);
2364 last_used = prev_commit_seq(unused);
2365 want_commit_seq = prev_commit_seq(last_used);
2367 if (!used_commit_ids[want_commit_seq] && used_commit_ids[prev_commit_seq(want_commit_seq)])
2368 journal_empty = true;
2370 write_start = max_commit_id_sections[last_used] + 1;
2371 if (unlikely(write_start >= ic->journal_sections))
2372 want_commit_seq = next_commit_seq(want_commit_seq);
2373 wraparound_section(ic, &write_start);
2376 for (write_sections = 0; write_sections < ic->journal_sections; write_sections++) {
2377 for (j = 0; j < ic->journal_section_sectors; j++) {
2378 struct journal_sector *js = access_journal(ic, i, j);
2380 if (js->commit_id != dm_integrity_commit_id(ic, i, j, want_commit_seq)) {
2382 * This could be caused by crash during writing.
2383 * We won't replay the inconsistent part of the
2386 DEBUG_print("commit id mismatch at position (%u, %u): %d != %d\n",
2387 i, j, find_commit_seq(ic, i, j, js->commit_id), want_commit_seq);
2392 if (unlikely(i >= ic->journal_sections))
2393 want_commit_seq = next_commit_seq(want_commit_seq);
2394 wraparound_section(ic, &i);
2398 if (!journal_empty) {
2399 DEBUG_print("replaying %u sections, starting at %u, commit seq %d\n",
2400 write_sections, write_start, want_commit_seq);
2401 do_journal_write(ic, write_start, write_sections, true);
2404 if (write_sections == ic->journal_sections && (ic->mode == 'J' || journal_empty)) {
2405 continue_section = write_start;
2406 ic->commit_seq = want_commit_seq;
2407 DEBUG_print("continuing from section %u, commit seq %d\n", write_start, ic->commit_seq);
2410 unsigned char erase_seq;
2412 DEBUG_print("clearing journal\n");
2414 erase_seq = prev_commit_seq(prev_commit_seq(last_used));
2416 init_journal(ic, s, 1, erase_seq);
2418 wraparound_section(ic, &s);
2419 if (ic->journal_sections >= 2) {
2420 init_journal(ic, s, ic->journal_sections - 2, erase_seq);
2421 s += ic->journal_sections - 2;
2422 wraparound_section(ic, &s);
2423 init_journal(ic, s, 1, erase_seq);
2426 continue_section = 0;
2427 ic->commit_seq = next_commit_seq(erase_seq);
2430 ic->committed_section = continue_section;
2431 ic->n_committed_sections = 0;
2433 ic->uncommitted_section = continue_section;
2434 ic->n_uncommitted_sections = 0;
2436 ic->free_section = continue_section;
2437 ic->free_section_entry = 0;
2438 ic->free_sectors = ic->journal_entries;
2440 ic->journal_tree_root = RB_ROOT;
2441 for (i = 0; i < ic->journal_entries; i++)
2442 init_journal_node(&ic->journal_tree[i]);
2445 static void dm_integrity_postsuspend(struct dm_target *ti)
2447 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
2449 del_timer_sync(&ic->autocommit_timer);
2452 drain_workqueue(ic->recalc_wq);
2454 queue_work(ic->commit_wq, &ic->commit_work);
2455 drain_workqueue(ic->commit_wq);
2457 if (ic->mode == 'J') {
2459 queue_work(ic->writer_wq, &ic->writer_work);
2460 drain_workqueue(ic->writer_wq);
2461 dm_integrity_flush_buffers(ic, true);
2464 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
2466 ic->journal_uptodate = true;
2469 static void dm_integrity_resume(struct dm_target *ti)
2471 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
2475 if (ic->recalc_wq && ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
2476 __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector);
2477 if (recalc_pos < ic->provided_data_sectors) {
2478 queue_work(ic->recalc_wq, &ic->recalc_work);
2479 } else if (recalc_pos > ic->provided_data_sectors) {
2480 ic->sb->recalc_sector = cpu_to_le64(ic->provided_data_sectors);
2481 recalc_write_super(ic);
2486 static void dm_integrity_status(struct dm_target *ti, status_type_t type,
2487 unsigned status_flags, char *result, unsigned maxlen)
2489 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
2494 case STATUSTYPE_INFO:
2496 (unsigned long long)atomic64_read(&ic->number_of_mismatches),
2497 (unsigned long long)ic->provided_data_sectors);
2498 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
2499 DMEMIT(" %llu", (unsigned long long)le64_to_cpu(ic->sb->recalc_sector));
2504 case STATUSTYPE_TABLE: {
2505 __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100;
2506 watermark_percentage += ic->journal_entries / 2;
2507 do_div(watermark_percentage, ic->journal_entries);
2509 arg_count += !!ic->meta_dev;
2510 arg_count += ic->sectors_per_block != 1;
2511 arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
2512 arg_count += !!ic->internal_hash_alg.alg_string;
2513 arg_count += !!ic->journal_crypt_alg.alg_string;
2514 arg_count += !!ic->journal_mac_alg.alg_string;
2515 arg_count += ic->legacy_recalculate;
2516 DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
2517 ic->tag_size, ic->mode, arg_count);
2519 DMEMIT(" meta_device:%s", ic->meta_dev->name);
2520 if (ic->sectors_per_block != 1)
2521 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
2522 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
2523 DMEMIT(" recalculate");
2524 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
2525 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
2526 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
2527 DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
2528 DMEMIT(" commit_time:%u", ic->autocommit_msec);
2529 if (ic->legacy_recalculate)
2530 DMEMIT(" legacy_recalculate");
2532 #define EMIT_ALG(a, n) \
2534 if (ic->a.alg_string) { \
2535 DMEMIT(" %s:%s", n, ic->a.alg_string); \
2536 if (ic->a.key_string) \
2537 DMEMIT(":%s", ic->a.key_string);\
2540 EMIT_ALG(internal_hash_alg, "internal_hash");
2541 EMIT_ALG(journal_crypt_alg, "journal_crypt");
2542 EMIT_ALG(journal_mac_alg, "journal_mac");
2548 static int dm_integrity_iterate_devices(struct dm_target *ti,
2549 iterate_devices_callout_fn fn, void *data)
2551 struct dm_integrity_c *ic = ti->private;
2554 return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data);
2556 return fn(ti, ic->dev, 0, ti->len, data);
2559 static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits)
2561 struct dm_integrity_c *ic = ti->private;
2563 if (ic->sectors_per_block > 1) {
2564 limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
2565 limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
2566 blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
2570 static void calculate_journal_section_size(struct dm_integrity_c *ic)
2572 unsigned sector_space = JOURNAL_SECTOR_DATA;
2574 ic->journal_sections = le32_to_cpu(ic->sb->journal_sections);
2575 ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size,
2576 JOURNAL_ENTRY_ROUNDUP);
2578 if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC))
2579 sector_space -= JOURNAL_MAC_PER_SECTOR;
2580 ic->journal_entries_per_sector = sector_space / ic->journal_entry_size;
2581 ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS;
2582 ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS;
2583 ic->journal_entries = ic->journal_section_entries * ic->journal_sections;
2586 static int calculate_device_limits(struct dm_integrity_c *ic)
2588 __u64 initial_sectors;
2590 calculate_journal_section_size(ic);
2591 initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections;
2592 if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX)
2594 ic->initial_sectors = initial_sectors;
2596 if (!ic->meta_dev) {
2597 sector_t last_sector, last_area, last_offset;
2599 ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
2600 (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT;
2601 if (!(ic->metadata_run & (ic->metadata_run - 1)))
2602 ic->log2_metadata_run = __ffs(ic->metadata_run);
2604 ic->log2_metadata_run = -1;
2606 get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset);
2607 last_sector = get_data_sector(ic, last_area, last_offset);
2608 if (last_sector < ic->start || last_sector >= ic->meta_device_sectors)
2611 __u64 meta_size = (ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
2612 meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1))
2613 >> (ic->log2_buffer_sectors + SECTOR_SHIFT);
2614 meta_size <<= ic->log2_buffer_sectors;
2615 if (ic->initial_sectors + meta_size < ic->initial_sectors ||
2616 ic->initial_sectors + meta_size > ic->meta_device_sectors)
2618 ic->metadata_run = 1;
2619 ic->log2_metadata_run = 0;
2625 static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sectors, unsigned interleave_sectors)
2627 unsigned journal_sections;
2630 memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT);
2631 memcpy(ic->sb->magic, SB_MAGIC, 8);
2632 ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size);
2633 ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block);
2634 if (ic->journal_mac_alg.alg_string)
2635 ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC);
2637 calculate_journal_section_size(ic);
2638 journal_sections = journal_sectors / ic->journal_section_sectors;
2639 if (!journal_sections)
2640 journal_sections = 1;
2642 if (!ic->meta_dev) {
2643 ic->sb->journal_sections = cpu_to_le32(journal_sections);
2644 if (!interleave_sectors)
2645 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
2646 ic->sb->log2_interleave_sectors = __fls(interleave_sectors);
2647 ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
2648 ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
2650 ic->provided_data_sectors = 0;
2651 for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) {
2652 __u64 prev_data_sectors = ic->provided_data_sectors;
2654 ic->provided_data_sectors |= (sector_t)1 << test_bit;
2655 if (calculate_device_limits(ic))
2656 ic->provided_data_sectors = prev_data_sectors;
2658 if (!ic->provided_data_sectors)
2661 ic->sb->log2_interleave_sectors = 0;
2662 ic->provided_data_sectors = ic->data_device_sectors;
2663 ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1);
2666 ic->sb->journal_sections = cpu_to_le32(0);
2667 for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) {
2668 __u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections);
2669 __u32 test_journal_sections = prev_journal_sections | (1U << test_bit);
2670 if (test_journal_sections > journal_sections)
2672 ic->sb->journal_sections = cpu_to_le32(test_journal_sections);
2673 if (calculate_device_limits(ic))
2674 ic->sb->journal_sections = cpu_to_le32(prev_journal_sections);
2677 if (!le32_to_cpu(ic->sb->journal_sections)) {
2678 if (ic->log2_buffer_sectors > 3) {
2679 ic->log2_buffer_sectors--;
2680 goto try_smaller_buffer;
2686 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
2693 static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
2695 struct gendisk *disk = dm_disk(dm_table_get_md(ti->table));
2696 struct blk_integrity bi;
2698 memset(&bi, 0, sizeof(bi));
2699 bi.profile = &dm_integrity_profile;
2700 bi.tuple_size = ic->tag_size;
2701 bi.tag_size = bi.tuple_size;
2702 bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
2704 blk_integrity_register(disk, &bi);
2705 blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
2708 static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl)
2714 for (i = 0; i < ic->journal_pages; i++)
2716 __free_page(pl[i].page);
2720 static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic)
2722 size_t page_list_desc_size = ic->journal_pages * sizeof(struct page_list);
2723 struct page_list *pl;
2726 pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO);
2730 for (i = 0; i < ic->journal_pages; i++) {
2731 pl[i].page = alloc_page(GFP_KERNEL);
2733 dm_integrity_free_page_list(ic, pl);
2737 pl[i - 1].next = &pl[i];
2743 static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, struct scatterlist **sl)
2746 for (i = 0; i < ic->journal_sections; i++)
2751 static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl)
2753 struct scatterlist **sl;
2756 sl = kvmalloc_array(ic->journal_sections,
2757 sizeof(struct scatterlist *),
2758 GFP_KERNEL | __GFP_ZERO);
2762 for (i = 0; i < ic->journal_sections; i++) {
2763 struct scatterlist *s;
2764 unsigned start_index, start_offset;
2765 unsigned end_index, end_offset;
2769 page_list_location(ic, i, 0, &start_index, &start_offset);
2770 page_list_location(ic, i, ic->journal_section_sectors - 1, &end_index, &end_offset);
2772 n_pages = (end_index - start_index + 1);
2774 s = kvmalloc_array(n_pages, sizeof(struct scatterlist),
2777 dm_integrity_free_journal_scatterlist(ic, sl);
2781 sg_init_table(s, n_pages);
2782 for (idx = start_index; idx <= end_index; idx++) {
2783 char *va = lowmem_page_address(pl[idx].page);
2784 unsigned start = 0, end = PAGE_SIZE;
2785 if (idx == start_index)
2786 start = start_offset;
2787 if (idx == end_index)
2788 end = end_offset + (1 << SECTOR_SHIFT);
2789 sg_set_buf(&s[idx - start_index], va + start, end - start);
2798 static void free_alg(struct alg_spec *a)
2800 kzfree(a->alg_string);
2802 memset(a, 0, sizeof *a);
2805 static int get_alg_and_key(const char *arg, struct alg_spec *a, char **error, char *error_inval)
2811 a->alg_string = kstrdup(strchr(arg, ':') + 1, GFP_KERNEL);
2815 k = strchr(a->alg_string, ':');
2818 a->key_string = k + 1;
2819 if (strlen(a->key_string) & 1)
2822 a->key_size = strlen(a->key_string) / 2;
2823 a->key = kmalloc(a->key_size, GFP_KERNEL);
2826 if (hex2bin(a->key, a->key_string, a->key_size))
2832 *error = error_inval;
2835 *error = "Out of memory for an argument";
2839 static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error,
2840 char *error_alg, char *error_key)
2844 if (a->alg_string) {
2845 *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ASYNC);
2846 if (IS_ERR(*hash)) {
2854 r = crypto_shash_setkey(*hash, a->key, a->key_size);
2859 } else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) {
2868 static int create_journal(struct dm_integrity_c *ic, char **error)
2872 __u64 journal_pages, journal_desc_size, journal_tree_size;
2873 unsigned char *crypt_data = NULL, *crypt_iv = NULL;
2874 struct skcipher_request *req = NULL;
2876 ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL);
2877 ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL);
2878 ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL);
2879 ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL);
2881 journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors,
2882 PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT);
2883 journal_desc_size = journal_pages * sizeof(struct page_list);
2884 if (journal_pages >= totalram_pages - totalhigh_pages || journal_desc_size > ULONG_MAX) {
2885 *error = "Journal doesn't fit into memory";
2889 ic->journal_pages = journal_pages;
2891 ic->journal = dm_integrity_alloc_page_list(ic);
2893 *error = "Could not allocate memory for journal";
2897 if (ic->journal_crypt_alg.alg_string) {
2898 unsigned ivsize, blocksize;
2899 struct journal_completion comp;
2902 ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, 0);
2903 if (IS_ERR(ic->journal_crypt)) {
2904 *error = "Invalid journal cipher";
2905 r = PTR_ERR(ic->journal_crypt);
2906 ic->journal_crypt = NULL;
2909 ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
2910 blocksize = crypto_skcipher_blocksize(ic->journal_crypt);
2912 if (ic->journal_crypt_alg.key) {
2913 r = crypto_skcipher_setkey(ic->journal_crypt, ic->journal_crypt_alg.key,
2914 ic->journal_crypt_alg.key_size);
2916 *error = "Error setting encryption key";
2920 DEBUG_print("cipher %s, block size %u iv size %u\n",
2921 ic->journal_crypt_alg.alg_string, blocksize, ivsize);
2923 ic->journal_io = dm_integrity_alloc_page_list(ic);
2924 if (!ic->journal_io) {
2925 *error = "Could not allocate memory for journal io";
2930 if (blocksize == 1) {
2931 struct scatterlist *sg;
2933 req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
2935 *error = "Could not allocate crypt request";
2940 crypt_iv = kmalloc(ivsize, GFP_KERNEL);
2942 *error = "Could not allocate iv";
2947 ic->journal_xor = dm_integrity_alloc_page_list(ic);
2948 if (!ic->journal_xor) {
2949 *error = "Could not allocate memory for journal xor";
2954 sg = kvmalloc_array(ic->journal_pages + 1,
2955 sizeof(struct scatterlist),
2958 *error = "Unable to allocate sg list";
2962 sg_init_table(sg, ic->journal_pages + 1);
2963 for (i = 0; i < ic->journal_pages; i++) {
2964 char *va = lowmem_page_address(ic->journal_xor[i].page);
2966 sg_set_buf(&sg[i], va, PAGE_SIZE);
2968 sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids);
2969 memset(crypt_iv, 0x00, ivsize);
2971 skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv);
2972 init_completion(&comp.comp);
2973 comp.in_flight = (atomic_t)ATOMIC_INIT(1);
2974 if (do_crypt(true, req, &comp))
2975 wait_for_completion(&comp.comp);
2977 r = dm_integrity_failed(ic);
2979 *error = "Unable to encrypt journal";
2982 DEBUG_bytes(lowmem_page_address(ic->journal_xor[0].page), 64, "xor data");
2984 crypto_free_skcipher(ic->journal_crypt);
2985 ic->journal_crypt = NULL;
2987 unsigned crypt_len = roundup(ivsize, blocksize);
2989 req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
2991 *error = "Could not allocate crypt request";
2996 crypt_iv = kmalloc(ivsize, GFP_KERNEL);
2998 *error = "Could not allocate iv";
3003 crypt_data = kmalloc(crypt_len, GFP_KERNEL);
3005 *error = "Unable to allocate crypt data";
3010 ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal);
3011 if (!ic->journal_scatterlist) {
3012 *error = "Unable to allocate sg list";
3016 ic->journal_io_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal_io);
3017 if (!ic->journal_io_scatterlist) {
3018 *error = "Unable to allocate sg list";
3022 ic->sk_requests = kvmalloc_array(ic->journal_sections,
3023 sizeof(struct skcipher_request *),
3024 GFP_KERNEL | __GFP_ZERO);
3025 if (!ic->sk_requests) {
3026 *error = "Unable to allocate sk requests";
3030 for (i = 0; i < ic->journal_sections; i++) {
3031 struct scatterlist sg;
3032 struct skcipher_request *section_req;
3033 __u32 section_le = cpu_to_le32(i);
3035 memset(crypt_iv, 0x00, ivsize);
3036 memset(crypt_data, 0x00, crypt_len);
3037 memcpy(crypt_data, §ion_le, min((size_t)crypt_len, sizeof(section_le)));
3039 sg_init_one(&sg, crypt_data, crypt_len);
3040 skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv);
3041 init_completion(&comp.comp);
3042 comp.in_flight = (atomic_t)ATOMIC_INIT(1);
3043 if (do_crypt(true, req, &comp))
3044 wait_for_completion(&comp.comp);
3046 r = dm_integrity_failed(ic);
3048 *error = "Unable to generate iv";
3052 section_req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
3054 *error = "Unable to allocate crypt request";
3058 section_req->iv = kmalloc_array(ivsize, 2,
3060 if (!section_req->iv) {
3061 skcipher_request_free(section_req);
3062 *error = "Unable to allocate iv";
3066 memcpy(section_req->iv + ivsize, crypt_data, ivsize);
3067 section_req->cryptlen = (size_t)ic->journal_section_sectors << SECTOR_SHIFT;
3068 ic->sk_requests[i] = section_req;
3069 DEBUG_bytes(crypt_data, ivsize, "iv(%u)", i);
3074 for (i = 0; i < N_COMMIT_IDS; i++) {
3077 for (j = 0; j < i; j++) {
3078 if (ic->commit_ids[j] == ic->commit_ids[i]) {
3079 ic->commit_ids[i] = cpu_to_le64(le64_to_cpu(ic->commit_ids[i]) + 1);
3080 goto retest_commit_id;
3083 DEBUG_print("commit id %u: %016llx\n", i, ic->commit_ids[i]);
3086 journal_tree_size = (__u64)ic->journal_entries * sizeof(struct journal_node);
3087 if (journal_tree_size > ULONG_MAX) {
3088 *error = "Journal doesn't fit into memory";
3092 ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL);
3093 if (!ic->journal_tree) {
3094 *error = "Could not allocate memory for journal tree";
3100 skcipher_request_free(req);
3106 * Construct a integrity mapping
3110 * offset from the start of the device
3112 * D - direct writes, J - journal writes, R - recovery mode
3113 * number of optional arguments
3114 * optional arguments:
3116 * interleave_sectors
3125 static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3127 struct dm_integrity_c *ic;
3130 unsigned extra_args;
3131 struct dm_arg_set as;
3132 static const struct dm_arg _args[] = {
3133 {0, 12, "Invalid number of feature args"},
3135 unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
3137 bool should_write_sb;
3139 unsigned long long start;
3141 #define DIRECT_ARGUMENTS 4
3143 if (argc <= DIRECT_ARGUMENTS) {
3144 ti->error = "Invalid argument count";
3148 ic = kzalloc(sizeof(struct dm_integrity_c), GFP_KERNEL);
3150 ti->error = "Cannot allocate integrity context";
3154 ti->per_io_data_size = sizeof(struct dm_integrity_io);
3157 ic->in_progress = RB_ROOT;
3158 INIT_LIST_HEAD(&ic->wait_list);
3159 init_waitqueue_head(&ic->endio_wait);
3160 bio_list_init(&ic->flush_bio_list);
3161 init_waitqueue_head(&ic->copy_to_journal_wait);
3162 init_completion(&ic->crypto_backoff);
3163 atomic64_set(&ic->number_of_mismatches, 0);
3165 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev);
3167 ti->error = "Device lookup failed";
3171 if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) {
3172 ti->error = "Invalid starting offset";
3178 if (strcmp(argv[2], "-")) {
3179 if (sscanf(argv[2], "%u%c", &ic->tag_size, &dummy) != 1 || !ic->tag_size) {
3180 ti->error = "Invalid tag size";
3186 if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R"))
3187 ic->mode = argv[3][0];
3189 ti->error = "Invalid mode (expecting J, D, R)";
3194 journal_sectors = 0;
3195 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
3196 buffer_sectors = DEFAULT_BUFFER_SECTORS;
3197 journal_watermark = DEFAULT_JOURNAL_WATERMARK;
3198 sync_msec = DEFAULT_SYNC_MSEC;
3199 recalculate = false;
3200 ic->sectors_per_block = 1;
3202 as.argc = argc - DIRECT_ARGUMENTS;
3203 as.argv = argv + DIRECT_ARGUMENTS;
3204 r = dm_read_arg_group(_args, &as, &extra_args, &ti->error);
3208 while (extra_args--) {
3209 const char *opt_string;
3211 opt_string = dm_shift_arg(&as);
3214 ti->error = "Not enough feature arguments";
3217 if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1)
3218 journal_sectors = val ? val : 1;
3219 else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1)
3220 interleave_sectors = val;
3221 else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1)
3222 buffer_sectors = val;
3223 else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100)
3224 journal_watermark = val;
3225 else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
3227 else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) {
3229 dm_put_device(ti, ic->meta_dev);
3230 ic->meta_dev = NULL;
3232 r = dm_get_device(ti, strchr(opt_string, ':') + 1, dm_table_get_mode(ti->table), &ic->meta_dev);
3234 ti->error = "Device lookup failed";
3237 } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) {
3238 if (val < 1 << SECTOR_SHIFT ||
3239 val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT ||
3242 ti->error = "Invalid block_size argument";
3245 ic->sectors_per_block = val >> SECTOR_SHIFT;
3246 } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
3247 r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
3248 "Invalid internal_hash argument");
3251 } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
3252 r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error,
3253 "Invalid journal_crypt argument");
3256 } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
3257 r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error,
3258 "Invalid journal_mac argument");
3261 } else if (!strcmp(opt_string, "recalculate")) {
3263 } else if (!strcmp(opt_string, "legacy_recalculate")) {
3264 ic->legacy_recalculate = true;
3267 ti->error = "Invalid argument";
3272 ic->data_device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT;
3274 ic->meta_device_sectors = ic->data_device_sectors;
3276 ic->meta_device_sectors = i_size_read(ic->meta_dev->bdev->bd_inode) >> SECTOR_SHIFT;
3278 if (!journal_sectors) {
3279 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
3280 ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
3283 if (!buffer_sectors)
3285 ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT);
3287 r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error,
3288 "Invalid internal hash", "Error setting internal hash key");
3292 r = get_mac(&ic->journal_mac, &ic->journal_mac_alg, &ti->error,
3293 "Invalid journal mac", "Error setting journal mac key");
3297 if (!ic->tag_size) {
3298 if (!ic->internal_hash) {
3299 ti->error = "Unknown tag size";
3303 ic->tag_size = crypto_shash_digestsize(ic->internal_hash);
3305 if (ic->tag_size > MAX_TAG_SIZE) {
3306 ti->error = "Too big tag size";
3310 if (!(ic->tag_size & (ic->tag_size - 1)))
3311 ic->log2_tag_size = __ffs(ic->tag_size);
3313 ic->log2_tag_size = -1;
3315 ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
3316 ic->autocommit_msec = sync_msec;
3317 timer_setup(&ic->autocommit_timer, autocommit_fn, 0);
3319 ic->io = dm_io_client_create();
3320 if (IS_ERR(ic->io)) {
3321 r = PTR_ERR(ic->io);
3323 ti->error = "Cannot allocate dm io";
3327 r = mempool_init_slab_pool(&ic->journal_io_mempool, JOURNAL_IO_MEMPOOL, journal_io_cache);
3329 ti->error = "Cannot allocate mempool";
3333 ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
3334 WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE);
3335 if (!ic->metadata_wq) {
3336 ti->error = "Cannot allocate workqueue";
3342 * If this workqueue were percpu, it would cause bio reordering
3343 * and reduced performance.
3345 ic->wait_wq = alloc_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
3347 ti->error = "Cannot allocate workqueue";
3352 ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM,
3353 METADATA_WORKQUEUE_MAX_ACTIVE);
3354 if (!ic->offload_wq) {
3355 ti->error = "Cannot allocate workqueue";
3360 ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1);
3361 if (!ic->commit_wq) {
3362 ti->error = "Cannot allocate workqueue";
3366 INIT_WORK(&ic->commit_work, integrity_commit);
3368 if (ic->mode == 'J') {
3369 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1);
3370 if (!ic->writer_wq) {
3371 ti->error = "Cannot allocate workqueue";
3375 INIT_WORK(&ic->writer_work, integrity_writer);
3378 ic->sb = alloc_pages_exact(SB_SECTORS << SECTOR_SHIFT, GFP_KERNEL);
3381 ti->error = "Cannot allocate superblock area";
3385 r = sync_rw_sb(ic, REQ_OP_READ, 0);
3387 ti->error = "Error reading superblock";
3390 should_write_sb = false;
3391 if (memcmp(ic->sb->magic, SB_MAGIC, 8)) {
3392 if (ic->mode != 'R') {
3393 if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) {
3395 ti->error = "The device is not initialized";
3400 r = initialize_superblock(ic, journal_sectors, interleave_sectors);
3402 ti->error = "Could not initialize superblock";
3405 if (ic->mode != 'R')
3406 should_write_sb = true;
3409 if (!ic->sb->version || ic->sb->version > SB_VERSION_2) {
3411 ti->error = "Unknown version";
3414 if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) {
3416 ti->error = "Tag size doesn't match the information in superblock";
3419 if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) {
3421 ti->error = "Block size doesn't match the information in superblock";
3424 if (!le32_to_cpu(ic->sb->journal_sections)) {
3426 ti->error = "Corrupted superblock, journal_sections is 0";
3429 /* make sure that ti->max_io_len doesn't overflow */
3430 if (!ic->meta_dev) {
3431 if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS ||
3432 ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) {
3434 ti->error = "Invalid interleave_sectors in the superblock";
3438 if (ic->sb->log2_interleave_sectors) {
3440 ti->error = "Invalid interleave_sectors in the superblock";
3444 ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
3445 if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) {
3446 /* test for overflow */
3448 ti->error = "The superblock has 64-bit device size, but the kernel was compiled with 32-bit sectors";
3451 if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) {
3453 ti->error = "Journal mac mismatch";
3458 r = calculate_device_limits(ic);
3461 if (ic->log2_buffer_sectors > 3) {
3462 ic->log2_buffer_sectors--;
3463 goto try_smaller_buffer;
3466 ti->error = "The device is too small";
3470 ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run));
3472 if (ti->len > ic->provided_data_sectors) {
3474 ti->error = "Not enough provided sectors for requested mapping size";
3479 threshold = (__u64)ic->journal_entries * (100 - journal_watermark);
3481 do_div(threshold, 100);
3482 ic->free_sectors_threshold = threshold;
3484 DEBUG_print("initialized:\n");
3485 DEBUG_print(" integrity_tag_size %u\n", le16_to_cpu(ic->sb->integrity_tag_size));
3486 DEBUG_print(" journal_entry_size %u\n", ic->journal_entry_size);
3487 DEBUG_print(" journal_entries_per_sector %u\n", ic->journal_entries_per_sector);
3488 DEBUG_print(" journal_section_entries %u\n", ic->journal_section_entries);
3489 DEBUG_print(" journal_section_sectors %u\n", ic->journal_section_sectors);
3490 DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections));
3491 DEBUG_print(" journal_entries %u\n", ic->journal_entries);
3492 DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors);
3493 DEBUG_print(" data_device_sectors 0x%llx\n", (unsigned long long)ic->data_device_sectors);
3494 DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors);
3495 DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run);
3496 DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run);
3497 DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors,
3498 (unsigned long long)ic->provided_data_sectors);
3499 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
3501 if (recalculate && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
3502 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3503 ic->sb->recalc_sector = cpu_to_le64(0);
3506 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
3507 size_t recalc_tags_size;
3508 if (!ic->internal_hash) {
3510 ti->error = "Recalculate is only valid with internal hash";
3513 ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1);
3514 if (!ic->recalc_wq ) {
3515 ti->error = "Cannot allocate workqueue";
3519 INIT_WORK(&ic->recalc_work, integrity_recalc);
3520 ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT);
3521 if (!ic->recalc_buffer) {
3522 ti->error = "Cannot allocate buffer for recalculating";
3526 recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size;
3527 if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size)
3528 recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size;
3529 ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL);
3530 if (!ic->recalc_tags) {
3531 ti->error = "Cannot allocate tags for recalculating";
3536 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
3537 ti->error = "Recalculate can only be specified with internal_hash";
3543 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
3544 le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors &&
3545 dm_integrity_disable_recalculate(ic)) {
3546 ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\"";
3551 ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
3552 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL);
3553 if (IS_ERR(ic->bufio)) {
3554 r = PTR_ERR(ic->bufio);
3555 ti->error = "Cannot initialize dm-bufio";
3559 dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors);
3561 if (ic->mode != 'R') {
3562 r = create_journal(ic, &ti->error);
3567 if (should_write_sb) {
3568 init_journal(ic, 0, ic->journal_sections, 0);
3569 r = dm_integrity_failed(ic);
3571 ti->error = "Error initializing journal";
3574 r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
3576 ti->error = "Error initializing superblock";
3579 ic->just_formatted = true;
3582 if (!ic->meta_dev) {
3583 r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors);
3588 if (!ic->internal_hash)
3589 dm_integrity_set(ti, ic);
3591 ti->num_flush_bios = 1;
3592 ti->flush_supported = true;
3596 dm_integrity_dtr(ti);
3600 static void dm_integrity_dtr(struct dm_target *ti)
3602 struct dm_integrity_c *ic = ti->private;
3604 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
3605 BUG_ON(!list_empty(&ic->wait_list));
3607 if (ic->metadata_wq)
3608 destroy_workqueue(ic->metadata_wq);
3610 destroy_workqueue(ic->wait_wq);
3612 destroy_workqueue(ic->offload_wq);
3614 destroy_workqueue(ic->commit_wq);
3616 destroy_workqueue(ic->writer_wq);
3618 destroy_workqueue(ic->recalc_wq);
3619 if (ic->recalc_buffer)
3620 vfree(ic->recalc_buffer);
3621 if (ic->recalc_tags)
3622 kvfree(ic->recalc_tags);
3624 dm_bufio_client_destroy(ic->bufio);
3625 mempool_exit(&ic->journal_io_mempool);
3627 dm_io_client_destroy(ic->io);
3629 dm_put_device(ti, ic->dev);
3631 dm_put_device(ti, ic->meta_dev);
3632 dm_integrity_free_page_list(ic, ic->journal);
3633 dm_integrity_free_page_list(ic, ic->journal_io);
3634 dm_integrity_free_page_list(ic, ic->journal_xor);
3635 if (ic->journal_scatterlist)
3636 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist);
3637 if (ic->journal_io_scatterlist)
3638 dm_integrity_free_journal_scatterlist(ic, ic->journal_io_scatterlist);
3639 if (ic->sk_requests) {
3642 for (i = 0; i < ic->journal_sections; i++) {
3643 struct skcipher_request *req = ic->sk_requests[i];
3646 skcipher_request_free(req);
3649 kvfree(ic->sk_requests);
3651 kvfree(ic->journal_tree);
3653 free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT);
3655 if (ic->internal_hash)
3656 crypto_free_shash(ic->internal_hash);
3657 free_alg(&ic->internal_hash_alg);
3659 if (ic->journal_crypt)
3660 crypto_free_skcipher(ic->journal_crypt);
3661 free_alg(&ic->journal_crypt_alg);
3663 if (ic->journal_mac)
3664 crypto_free_shash(ic->journal_mac);
3665 free_alg(&ic->journal_mac_alg);
3670 static struct target_type integrity_target = {
3671 .name = "integrity",
3672 .version = {1, 2, 0},
3673 .module = THIS_MODULE,
3674 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
3675 .ctr = dm_integrity_ctr,
3676 .dtr = dm_integrity_dtr,
3677 .map = dm_integrity_map,
3678 .postsuspend = dm_integrity_postsuspend,
3679 .resume = dm_integrity_resume,
3680 .status = dm_integrity_status,
3681 .iterate_devices = dm_integrity_iterate_devices,
3682 .io_hints = dm_integrity_io_hints,
3685 int __init dm_integrity_init(void)
3689 journal_io_cache = kmem_cache_create("integrity_journal_io",
3690 sizeof(struct journal_io), 0, 0, NULL);
3691 if (!journal_io_cache) {
3692 DMERR("can't allocate journal io cache");
3696 r = dm_register_target(&integrity_target);
3699 DMERR("register failed %d", r);
3704 void dm_integrity_exit(void)
3706 dm_unregister_target(&integrity_target);
3707 kmem_cache_destroy(journal_io_cache);
3710 module_init(dm_integrity_init);
3711 module_exit(dm_integrity_exit);
3713 MODULE_AUTHOR("Milan Broz");
3714 MODULE_AUTHOR("Mikulas Patocka");
3715 MODULE_DESCRIPTION(DM_NAME " target for integrity tags extension");
3716 MODULE_LICENSE("GPL");