GNU Linux-libre 6.7.9-gnu
[releases.git] / drivers / md / dm-delay.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005-2007 Red Hat GmbH
4  *
5  * A target that delays reads and/or writes and can send
6  * them to different devices.
7  *
8  * This file is released under the GPL.
9  */
10
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/blkdev.h>
14 #include <linux/bio.h>
15 #include <linux/slab.h>
16 #include <linux/kthread.h>
17
18 #include <linux/device-mapper.h>
19
20 #define DM_MSG_PREFIX "delay"
21
22 struct delay_class {
23         struct dm_dev *dev;
24         sector_t start;
25         unsigned int delay;
26         unsigned int ops;
27 };
28
29 struct delay_c {
30         struct timer_list delay_timer;
31         struct mutex timer_lock;
32         struct workqueue_struct *kdelayd_wq;
33         struct work_struct flush_expired_bios;
34         struct list_head delayed_bios;
35         struct task_struct *worker;
36         bool may_delay;
37
38         struct delay_class read;
39         struct delay_class write;
40         struct delay_class flush;
41
42         int argc;
43 };
44
45 struct dm_delay_info {
46         struct delay_c *context;
47         struct delay_class *class;
48         struct list_head list;
49         unsigned long expires;
50 };
51
52 static DEFINE_MUTEX(delayed_bios_lock);
53
54 static void handle_delayed_timer(struct timer_list *t)
55 {
56         struct delay_c *dc = from_timer(dc, t, delay_timer);
57
58         queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
59 }
60
61 static void queue_timeout(struct delay_c *dc, unsigned long expires)
62 {
63         mutex_lock(&dc->timer_lock);
64
65         if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
66                 mod_timer(&dc->delay_timer, expires);
67
68         mutex_unlock(&dc->timer_lock);
69 }
70
71 static inline bool delay_is_fast(struct delay_c *dc)
72 {
73         return !!dc->worker;
74 }
75
76 static void flush_bios(struct bio *bio)
77 {
78         struct bio *n;
79
80         while (bio) {
81                 n = bio->bi_next;
82                 bio->bi_next = NULL;
83                 dm_submit_bio_remap(bio, NULL);
84                 bio = n;
85         }
86 }
87
88 static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
89 {
90         struct dm_delay_info *delayed, *next;
91         struct bio_list flush_bio_list;
92         unsigned long next_expires = 0;
93         bool start_timer = false;
94         bio_list_init(&flush_bio_list);
95
96         mutex_lock(&delayed_bios_lock);
97         list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
98                 cond_resched();
99                 if (flush_all || time_after_eq(jiffies, delayed->expires)) {
100                         struct bio *bio = dm_bio_from_per_bio_data(delayed,
101                                                 sizeof(struct dm_delay_info));
102                         list_del(&delayed->list);
103                         bio_list_add(&flush_bio_list, bio);
104                         delayed->class->ops--;
105                         continue;
106                 }
107
108                 if (!delay_is_fast(dc)) {
109                         if (!start_timer) {
110                                 start_timer = true;
111                                 next_expires = delayed->expires;
112                         } else {
113                                 next_expires = min(next_expires, delayed->expires);
114                         }
115                 }
116         }
117         mutex_unlock(&delayed_bios_lock);
118
119         if (start_timer)
120                 queue_timeout(dc, next_expires);
121
122         flush_bios(bio_list_get(&flush_bio_list));
123 }
124
125 static int flush_worker_fn(void *data)
126 {
127         struct delay_c *dc = data;
128
129         while (!kthread_should_stop()) {
130                 flush_delayed_bios(dc, false);
131                 mutex_lock(&delayed_bios_lock);
132                 if (unlikely(list_empty(&dc->delayed_bios))) {
133                         set_current_state(TASK_INTERRUPTIBLE);
134                         mutex_unlock(&delayed_bios_lock);
135                         schedule();
136                 } else {
137                         mutex_unlock(&delayed_bios_lock);
138                         cond_resched();
139                 }
140         }
141
142         return 0;
143 }
144
145 static void flush_expired_bios(struct work_struct *work)
146 {
147         struct delay_c *dc;
148
149         dc = container_of(work, struct delay_c, flush_expired_bios);
150         flush_delayed_bios(dc, false);
151 }
152
153 static void delay_dtr(struct dm_target *ti)
154 {
155         struct delay_c *dc = ti->private;
156
157         if (dc->kdelayd_wq)
158                 destroy_workqueue(dc->kdelayd_wq);
159
160         if (dc->read.dev)
161                 dm_put_device(ti, dc->read.dev);
162         if (dc->write.dev)
163                 dm_put_device(ti, dc->write.dev);
164         if (dc->flush.dev)
165                 dm_put_device(ti, dc->flush.dev);
166         if (dc->worker)
167                 kthread_stop(dc->worker);
168
169         mutex_destroy(&dc->timer_lock);
170
171         kfree(dc);
172 }
173
174 static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv)
175 {
176         int ret;
177         unsigned long long tmpll;
178         char dummy;
179
180         if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
181                 ti->error = "Invalid device sector";
182                 return -EINVAL;
183         }
184         c->start = tmpll;
185
186         if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) {
187                 ti->error = "Invalid delay";
188                 return -EINVAL;
189         }
190
191         ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev);
192         if (ret) {
193                 ti->error = "Device lookup failed";
194                 return ret;
195         }
196
197         return 0;
198 }
199
200 /*
201  * Mapping parameters:
202  *    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
203  *
204  * With separate write parameters, the first set is only used for reads.
205  * Offsets are specified in sectors.
206  * Delays are specified in milliseconds.
207  */
208 static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
209 {
210         struct delay_c *dc;
211         int ret;
212         unsigned int max_delay;
213
214         if (argc != 3 && argc != 6 && argc != 9) {
215                 ti->error = "Requires exactly 3, 6 or 9 arguments";
216                 return -EINVAL;
217         }
218
219         dc = kzalloc(sizeof(*dc), GFP_KERNEL);
220         if (!dc) {
221                 ti->error = "Cannot allocate context";
222                 return -ENOMEM;
223         }
224
225         ti->private = dc;
226         INIT_LIST_HEAD(&dc->delayed_bios);
227         mutex_init(&dc->timer_lock);
228         dc->may_delay = true;
229         dc->argc = argc;
230
231         ret = delay_class_ctr(ti, &dc->read, argv);
232         if (ret)
233                 goto bad;
234         max_delay = dc->read.delay;
235
236         if (argc == 3) {
237                 ret = delay_class_ctr(ti, &dc->write, argv);
238                 if (ret)
239                         goto bad;
240                 ret = delay_class_ctr(ti, &dc->flush, argv);
241                 if (ret)
242                         goto bad;
243                 max_delay = max(max_delay, dc->write.delay);
244                 max_delay = max(max_delay, dc->flush.delay);
245                 goto out;
246         }
247
248         ret = delay_class_ctr(ti, &dc->write, argv + 3);
249         if (ret)
250                 goto bad;
251         if (argc == 6) {
252                 ret = delay_class_ctr(ti, &dc->flush, argv + 3);
253                 if (ret)
254                         goto bad;
255                 max_delay = max(max_delay, dc->flush.delay);
256                 goto out;
257         }
258
259         ret = delay_class_ctr(ti, &dc->flush, argv + 6);
260         if (ret)
261                 goto bad;
262         max_delay = max(max_delay, dc->flush.delay);
263
264 out:
265         if (max_delay < 50) {
266                 /*
267                  * In case of small requested delays, use kthread instead of
268                  * timers and workqueue to achieve better latency.
269                  */
270                 dc->worker = kthread_create(&flush_worker_fn, dc,
271                                             "dm-delay-flush-worker");
272                 if (IS_ERR(dc->worker)) {
273                         ret = PTR_ERR(dc->worker);
274                         dc->worker = NULL;
275                         goto bad;
276                 }
277         } else {
278                 timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
279                 INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
280                 dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
281                 if (!dc->kdelayd_wq) {
282                         ret = -EINVAL;
283                         DMERR("Couldn't start kdelayd");
284                         goto bad;
285                 }
286         }
287
288         ti->num_flush_bios = 1;
289         ti->num_discard_bios = 1;
290         ti->accounts_remapped_io = true;
291         ti->per_io_data_size = sizeof(struct dm_delay_info);
292         return 0;
293
294 bad:
295         delay_dtr(ti);
296         return ret;
297 }
298
299 static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
300 {
301         struct dm_delay_info *delayed;
302         unsigned long expires = 0;
303
304         if (!c->delay)
305                 return DM_MAPIO_REMAPPED;
306
307         delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
308
309         delayed->context = dc;
310         delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
311
312         mutex_lock(&delayed_bios_lock);
313         if (unlikely(!dc->may_delay)) {
314                 mutex_unlock(&delayed_bios_lock);
315                 return DM_MAPIO_REMAPPED;
316         }
317         c->ops++;
318         list_add_tail(&delayed->list, &dc->delayed_bios);
319         mutex_unlock(&delayed_bios_lock);
320
321         if (delay_is_fast(dc))
322                 wake_up_process(dc->worker);
323         else
324                 queue_timeout(dc, expires);
325
326         return DM_MAPIO_SUBMITTED;
327 }
328
329 static void delay_presuspend(struct dm_target *ti)
330 {
331         struct delay_c *dc = ti->private;
332
333         mutex_lock(&delayed_bios_lock);
334         dc->may_delay = false;
335         mutex_unlock(&delayed_bios_lock);
336
337         if (!delay_is_fast(dc))
338                 del_timer_sync(&dc->delay_timer);
339         flush_delayed_bios(dc, true);
340 }
341
342 static void delay_resume(struct dm_target *ti)
343 {
344         struct delay_c *dc = ti->private;
345
346         dc->may_delay = true;
347 }
348
349 static int delay_map(struct dm_target *ti, struct bio *bio)
350 {
351         struct delay_c *dc = ti->private;
352         struct delay_class *c;
353         struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
354
355         if (bio_data_dir(bio) == WRITE) {
356                 if (unlikely(bio->bi_opf & REQ_PREFLUSH))
357                         c = &dc->flush;
358                 else
359                         c = &dc->write;
360         } else {
361                 c = &dc->read;
362         }
363         delayed->class = c;
364         bio_set_dev(bio, c->dev->bdev);
365         bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
366
367         return delay_bio(dc, c, bio);
368 }
369
370 #define DMEMIT_DELAY_CLASS(c) \
371         DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay)
372
373 static void delay_status(struct dm_target *ti, status_type_t type,
374                          unsigned int status_flags, char *result, unsigned int maxlen)
375 {
376         struct delay_c *dc = ti->private;
377         int sz = 0;
378
379         switch (type) {
380         case STATUSTYPE_INFO:
381                 DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops);
382                 break;
383
384         case STATUSTYPE_TABLE:
385                 DMEMIT_DELAY_CLASS(&dc->read);
386                 if (dc->argc >= 6) {
387                         DMEMIT(" ");
388                         DMEMIT_DELAY_CLASS(&dc->write);
389                 }
390                 if (dc->argc >= 9) {
391                         DMEMIT(" ");
392                         DMEMIT_DELAY_CLASS(&dc->flush);
393                 }
394                 break;
395
396         case STATUSTYPE_IMA:
397                 *result = '\0';
398                 break;
399         }
400 }
401
402 static int delay_iterate_devices(struct dm_target *ti,
403                                  iterate_devices_callout_fn fn, void *data)
404 {
405         struct delay_c *dc = ti->private;
406         int ret = 0;
407
408         ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data);
409         if (ret)
410                 goto out;
411         ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data);
412         if (ret)
413                 goto out;
414         ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data);
415         if (ret)
416                 goto out;
417
418 out:
419         return ret;
420 }
421
422 static struct target_type delay_target = {
423         .name        = "delay",
424         .version     = {1, 4, 0},
425         .features    = DM_TARGET_PASSES_INTEGRITY,
426         .module      = THIS_MODULE,
427         .ctr         = delay_ctr,
428         .dtr         = delay_dtr,
429         .map         = delay_map,
430         .presuspend  = delay_presuspend,
431         .resume      = delay_resume,
432         .status      = delay_status,
433         .iterate_devices = delay_iterate_devices,
434 };
435 module_dm(delay);
436
437 MODULE_DESCRIPTION(DM_NAME " delay target");
438 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
439 MODULE_LICENSE("GPL");