GNU Linux-libre 5.19-rc6-gnu
[releases.git] / fs / cachefiles / io.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* kiocb-using read/write
3  *
4  * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/mount.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/uio.h>
12 #include <linux/falloc.h>
13 #include <linux/sched/mm.h>
14 #include <trace/events/fscache.h>
15 #include "internal.h"
16
17 struct cachefiles_kiocb {
18         struct kiocb            iocb;
19         refcount_t              ki_refcnt;
20         loff_t                  start;
21         union {
22                 size_t          skipped;
23                 size_t          len;
24         };
25         struct cachefiles_object *object;
26         netfs_io_terminated_t   term_func;
27         void                    *term_func_priv;
28         bool                    was_async;
29         unsigned int            inval_counter;  /* Copy of cookie->inval_counter */
30         u64                     b_writing;
31 };
32
33 static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
34 {
35         if (refcount_dec_and_test(&ki->ki_refcnt)) {
36                 cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq);
37                 fput(ki->iocb.ki_filp);
38                 kfree(ki);
39         }
40 }
41
42 /*
43  * Handle completion of a read from the cache.
44  */
45 static void cachefiles_read_complete(struct kiocb *iocb, long ret)
46 {
47         struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
48         struct inode *inode = file_inode(ki->iocb.ki_filp);
49
50         _enter("%ld", ret);
51
52         if (ret < 0)
53                 trace_cachefiles_io_error(ki->object, inode, ret,
54                                           cachefiles_trace_read_error);
55
56         if (ki->term_func) {
57                 if (ret >= 0) {
58                         if (ki->object->cookie->inval_counter == ki->inval_counter)
59                                 ki->skipped += ret;
60                         else
61                                 ret = -ESTALE;
62                 }
63
64                 ki->term_func(ki->term_func_priv, ret, ki->was_async);
65         }
66
67         cachefiles_put_kiocb(ki);
68 }
69
70 /*
71  * Initiate a read from the cache.
72  */
73 static int cachefiles_read(struct netfs_cache_resources *cres,
74                            loff_t start_pos,
75                            struct iov_iter *iter,
76                            enum netfs_read_from_hole read_hole,
77                            netfs_io_terminated_t term_func,
78                            void *term_func_priv)
79 {
80         struct cachefiles_object *object;
81         struct cachefiles_kiocb *ki;
82         struct file *file;
83         unsigned int old_nofs;
84         ssize_t ret = -ENOBUFS;
85         size_t len = iov_iter_count(iter), skipped = 0;
86
87         if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
88                 goto presubmission_error;
89
90         fscache_count_read();
91         object = cachefiles_cres_object(cres);
92         file = cachefiles_cres_file(cres);
93
94         _enter("%pD,%li,%llx,%zx/%llx",
95                file, file_inode(file)->i_ino, start_pos, len,
96                i_size_read(file_inode(file)));
97
98         /* If the caller asked us to seek for data before doing the read, then
99          * we should do that now.  If we find a gap, we fill it with zeros.
100          */
101         if (read_hole != NETFS_READ_HOLE_IGNORE) {
102                 loff_t off = start_pos, off2;
103
104                 off2 = cachefiles_inject_read_error();
105                 if (off2 == 0)
106                         off2 = vfs_llseek(file, off, SEEK_DATA);
107                 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
108                         skipped = 0;
109                         ret = off2;
110                         goto presubmission_error;
111                 }
112
113                 if (off2 == -ENXIO || off2 >= start_pos + len) {
114                         /* The region is beyond the EOF or there's no more data
115                          * in the region, so clear the rest of the buffer and
116                          * return success.
117                          */
118                         ret = -ENODATA;
119                         if (read_hole == NETFS_READ_HOLE_FAIL)
120                                 goto presubmission_error;
121
122                         iov_iter_zero(len, iter);
123                         skipped = len;
124                         ret = 0;
125                         goto presubmission_error;
126                 }
127
128                 skipped = off2 - off;
129                 iov_iter_zero(skipped, iter);
130         }
131
132         ret = -ENOMEM;
133         ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
134         if (!ki)
135                 goto presubmission_error;
136
137         refcount_set(&ki->ki_refcnt, 2);
138         ki->iocb.ki_filp        = file;
139         ki->iocb.ki_pos         = start_pos + skipped;
140         ki->iocb.ki_flags       = IOCB_DIRECT;
141         ki->iocb.ki_ioprio      = get_current_ioprio();
142         ki->skipped             = skipped;
143         ki->object              = object;
144         ki->inval_counter       = cres->inval_counter;
145         ki->term_func           = term_func;
146         ki->term_func_priv      = term_func_priv;
147         ki->was_async           = true;
148
149         if (ki->term_func)
150                 ki->iocb.ki_complete = cachefiles_read_complete;
151
152         get_file(ki->iocb.ki_filp);
153         cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
154
155         trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped);
156         old_nofs = memalloc_nofs_save();
157         ret = cachefiles_inject_read_error();
158         if (ret == 0)
159                 ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
160         memalloc_nofs_restore(old_nofs);
161         switch (ret) {
162         case -EIOCBQUEUED:
163                 goto in_progress;
164
165         case -ERESTARTSYS:
166         case -ERESTARTNOINTR:
167         case -ERESTARTNOHAND:
168         case -ERESTART_RESTARTBLOCK:
169                 /* There's no easy way to restart the syscall since other AIO's
170                  * may be already running. Just fail this IO with EINTR.
171                  */
172                 ret = -EINTR;
173                 fallthrough;
174         default:
175                 ki->was_async = false;
176                 cachefiles_read_complete(&ki->iocb, ret);
177                 if (ret > 0)
178                         ret = 0;
179                 break;
180         }
181
182 in_progress:
183         cachefiles_put_kiocb(ki);
184         _leave(" = %zd", ret);
185         return ret;
186
187 presubmission_error:
188         if (term_func)
189                 term_func(term_func_priv, ret < 0 ? ret : skipped, false);
190         return ret;
191 }
192
193 /*
194  * Query the occupancy of the cache in a region, returning where the next chunk
195  * of data starts and how long it is.
196  */
197 static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
198                                       loff_t start, size_t len, size_t granularity,
199                                       loff_t *_data_start, size_t *_data_len)
200 {
201         struct cachefiles_object *object;
202         struct file *file;
203         loff_t off, off2;
204
205         *_data_start = -1;
206         *_data_len = 0;
207
208         if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
209                 return -ENOBUFS;
210
211         object = cachefiles_cres_object(cres);
212         file = cachefiles_cres_file(cres);
213         granularity = max_t(size_t, object->volume->cache->bsize, granularity);
214
215         _enter("%pD,%li,%llx,%zx/%llx",
216                file, file_inode(file)->i_ino, start, len,
217                i_size_read(file_inode(file)));
218
219         off = cachefiles_inject_read_error();
220         if (off == 0)
221                 off = vfs_llseek(file, start, SEEK_DATA);
222         if (off == -ENXIO)
223                 return -ENODATA; /* Beyond EOF */
224         if (off < 0 && off >= (loff_t)-MAX_ERRNO)
225                 return -ENOBUFS; /* Error. */
226         if (round_up(off, granularity) >= start + len)
227                 return -ENODATA; /* No data in range */
228
229         off2 = cachefiles_inject_read_error();
230         if (off2 == 0)
231                 off2 = vfs_llseek(file, off, SEEK_HOLE);
232         if (off2 == -ENXIO)
233                 return -ENODATA; /* Beyond EOF */
234         if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
235                 return -ENOBUFS; /* Error. */
236
237         /* Round away partial blocks */
238         off = round_up(off, granularity);
239         off2 = round_down(off2, granularity);
240         if (off2 <= off)
241                 return -ENODATA;
242
243         *_data_start = off;
244         if (off2 > start + len)
245                 *_data_len = len;
246         else
247                 *_data_len = off2 - off;
248         return 0;
249 }
250
251 /*
252  * Handle completion of a write to the cache.
253  */
254 static void cachefiles_write_complete(struct kiocb *iocb, long ret)
255 {
256         struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
257         struct cachefiles_object *object = ki->object;
258         struct inode *inode = file_inode(ki->iocb.ki_filp);
259
260         _enter("%ld", ret);
261
262         /* Tell lockdep we inherited freeze protection from submission thread */
263         __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
264         __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
265
266         if (ret < 0)
267                 trace_cachefiles_io_error(object, inode, ret,
268                                           cachefiles_trace_write_error);
269
270         atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
271         set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
272         if (ki->term_func)
273                 ki->term_func(ki->term_func_priv, ret, ki->was_async);
274         cachefiles_put_kiocb(ki);
275 }
276
277 /*
278  * Initiate a write to the cache.
279  */
280 int __cachefiles_write(struct cachefiles_object *object,
281                        struct file *file,
282                        loff_t start_pos,
283                        struct iov_iter *iter,
284                        netfs_io_terminated_t term_func,
285                        void *term_func_priv)
286 {
287         struct cachefiles_cache *cache;
288         struct cachefiles_kiocb *ki;
289         struct inode *inode;
290         unsigned int old_nofs;
291         ssize_t ret;
292         size_t len = iov_iter_count(iter);
293
294         fscache_count_write();
295         cache = object->volume->cache;
296
297         _enter("%pD,%li,%llx,%zx/%llx",
298                file, file_inode(file)->i_ino, start_pos, len,
299                i_size_read(file_inode(file)));
300
301         ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
302         if (!ki) {
303                 if (term_func)
304                         term_func(term_func_priv, -ENOMEM, false);
305                 return -ENOMEM;
306         }
307
308         refcount_set(&ki->ki_refcnt, 2);
309         ki->iocb.ki_filp        = file;
310         ki->iocb.ki_pos         = start_pos;
311         ki->iocb.ki_flags       = IOCB_DIRECT | IOCB_WRITE;
312         ki->iocb.ki_ioprio      = get_current_ioprio();
313         ki->object              = object;
314         ki->start               = start_pos;
315         ki->len                 = len;
316         ki->term_func           = term_func;
317         ki->term_func_priv      = term_func_priv;
318         ki->was_async           = true;
319         ki->b_writing           = (len + (1 << cache->bshift) - 1) >> cache->bshift;
320
321         if (ki->term_func)
322                 ki->iocb.ki_complete = cachefiles_write_complete;
323         atomic_long_add(ki->b_writing, &cache->b_writing);
324
325         /* Open-code file_start_write here to grab freeze protection, which
326          * will be released by another thread in aio_complete_rw().  Fool
327          * lockdep by telling it the lock got released so that it doesn't
328          * complain about the held lock when we return to userspace.
329          */
330         inode = file_inode(file);
331         __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
332         __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
333
334         get_file(ki->iocb.ki_filp);
335         cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
336
337         trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
338         old_nofs = memalloc_nofs_save();
339         ret = cachefiles_inject_write_error();
340         if (ret == 0)
341                 ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
342         memalloc_nofs_restore(old_nofs);
343         switch (ret) {
344         case -EIOCBQUEUED:
345                 goto in_progress;
346
347         case -ERESTARTSYS:
348         case -ERESTARTNOINTR:
349         case -ERESTARTNOHAND:
350         case -ERESTART_RESTARTBLOCK:
351                 /* There's no easy way to restart the syscall since other AIO's
352                  * may be already running. Just fail this IO with EINTR.
353                  */
354                 ret = -EINTR;
355                 fallthrough;
356         default:
357                 ki->was_async = false;
358                 cachefiles_write_complete(&ki->iocb, ret);
359                 if (ret > 0)
360                         ret = 0;
361                 break;
362         }
363
364 in_progress:
365         cachefiles_put_kiocb(ki);
366         _leave(" = %zd", ret);
367         return ret;
368 }
369
370 static int cachefiles_write(struct netfs_cache_resources *cres,
371                             loff_t start_pos,
372                             struct iov_iter *iter,
373                             netfs_io_terminated_t term_func,
374                             void *term_func_priv)
375 {
376         if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
377                 if (term_func)
378                         term_func(term_func_priv, -ENOBUFS, false);
379                 return -ENOBUFS;
380         }
381
382         return __cachefiles_write(cachefiles_cres_object(cres),
383                                   cachefiles_cres_file(cres),
384                                   start_pos, iter,
385                                   term_func, term_func_priv);
386 }
387
388 /*
389  * Prepare a read operation, shortening it to a cached/uncached
390  * boundary as appropriate.
391  */
392 static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
393                                                       loff_t i_size)
394 {
395         enum cachefiles_prepare_read_trace why;
396         struct netfs_io_request *rreq = subreq->rreq;
397         struct netfs_cache_resources *cres = &rreq->cache_resources;
398         struct cachefiles_object *object;
399         struct cachefiles_cache *cache;
400         struct fscache_cookie *cookie = fscache_cres_cookie(cres);
401         const struct cred *saved_cred;
402         struct file *file = cachefiles_cres_file(cres);
403         enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER;
404         loff_t off, to;
405         ino_t ino = file ? file_inode(file)->i_ino : 0;
406         int rc;
407
408         _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
409
410         if (subreq->start >= i_size) {
411                 ret = NETFS_FILL_WITH_ZEROES;
412                 why = cachefiles_trace_read_after_eof;
413                 goto out_no_object;
414         }
415
416         if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
417                 __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
418                 why = cachefiles_trace_read_no_data;
419                 if (!test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags))
420                         goto out_no_object;
421         }
422
423         /* The object and the file may be being created in the background. */
424         if (!file) {
425                 why = cachefiles_trace_read_no_file;
426                 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
427                         goto out_no_object;
428                 file = cachefiles_cres_file(cres);
429                 if (!file)
430                         goto out_no_object;
431                 ino = file_inode(file)->i_ino;
432         }
433
434         object = cachefiles_cres_object(cres);
435         cache = object->volume->cache;
436         cachefiles_begin_secure(cache, &saved_cred);
437 retry:
438         off = cachefiles_inject_read_error();
439         if (off == 0)
440                 off = vfs_llseek(file, subreq->start, SEEK_DATA);
441         if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
442                 if (off == (loff_t)-ENXIO) {
443                         why = cachefiles_trace_read_seek_nxio;
444                         goto download_and_store;
445                 }
446                 trace_cachefiles_io_error(object, file_inode(file), off,
447                                           cachefiles_trace_seek_error);
448                 why = cachefiles_trace_read_seek_error;
449                 goto out;
450         }
451
452         if (off >= subreq->start + subreq->len) {
453                 why = cachefiles_trace_read_found_hole;
454                 goto download_and_store;
455         }
456
457         if (off > subreq->start) {
458                 off = round_up(off, cache->bsize);
459                 subreq->len = off - subreq->start;
460                 why = cachefiles_trace_read_found_part;
461                 goto download_and_store;
462         }
463
464         to = cachefiles_inject_read_error();
465         if (to == 0)
466                 to = vfs_llseek(file, subreq->start, SEEK_HOLE);
467         if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
468                 trace_cachefiles_io_error(object, file_inode(file), to,
469                                           cachefiles_trace_seek_error);
470                 why = cachefiles_trace_read_seek_error;
471                 goto out;
472         }
473
474         if (to < subreq->start + subreq->len) {
475                 if (subreq->start + subreq->len >= i_size)
476                         to = round_up(to, cache->bsize);
477                 else
478                         to = round_down(to, cache->bsize);
479                 subreq->len = to - subreq->start;
480         }
481
482         why = cachefiles_trace_read_have_data;
483         ret = NETFS_READ_FROM_CACHE;
484         goto out;
485
486 download_and_store:
487         __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
488         if (test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) {
489                 rc = cachefiles_ondemand_read(object, subreq->start,
490                                               subreq->len);
491                 if (!rc) {
492                         __clear_bit(NETFS_SREQ_ONDEMAND, &subreq->flags);
493                         goto retry;
494                 }
495                 ret = NETFS_INVALID_READ;
496         }
497 out:
498         cachefiles_end_secure(cache, saved_cred);
499 out_no_object:
500         trace_cachefiles_prep_read(subreq, ret, why, ino);
501         return ret;
502 }
503
504 /*
505  * Prepare for a write to occur.
506  */
507 int __cachefiles_prepare_write(struct cachefiles_object *object,
508                                struct file *file,
509                                loff_t *_start, size_t *_len,
510                                bool no_space_allocated_yet)
511 {
512         struct cachefiles_cache *cache = object->volume->cache;
513         loff_t start = *_start, pos;
514         size_t len = *_len, down;
515         int ret;
516
517         /* Round to DIO size */
518         down = start - round_down(start, PAGE_SIZE);
519         *_start = start - down;
520         *_len = round_up(down + len, PAGE_SIZE);
521
522         /* We need to work out whether there's sufficient disk space to perform
523          * the write - but we can skip that check if we have space already
524          * allocated.
525          */
526         if (no_space_allocated_yet)
527                 goto check_space;
528
529         pos = cachefiles_inject_read_error();
530         if (pos == 0)
531                 pos = vfs_llseek(file, *_start, SEEK_DATA);
532         if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
533                 if (pos == -ENXIO)
534                         goto check_space; /* Unallocated tail */
535                 trace_cachefiles_io_error(object, file_inode(file), pos,
536                                           cachefiles_trace_seek_error);
537                 return pos;
538         }
539         if ((u64)pos >= (u64)*_start + *_len)
540                 goto check_space; /* Unallocated region */
541
542         /* We have a block that's at least partially filled - if we're low on
543          * space, we need to see if it's fully allocated.  If it's not, we may
544          * want to cull it.
545          */
546         if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
547                                  cachefiles_has_space_check) == 0)
548                 return 0; /* Enough space to simply overwrite the whole block */
549
550         pos = cachefiles_inject_read_error();
551         if (pos == 0)
552                 pos = vfs_llseek(file, *_start, SEEK_HOLE);
553         if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
554                 trace_cachefiles_io_error(object, file_inode(file), pos,
555                                           cachefiles_trace_seek_error);
556                 return pos;
557         }
558         if ((u64)pos >= (u64)*_start + *_len)
559                 return 0; /* Fully allocated */
560
561         /* Partially allocated, but insufficient space: cull. */
562         fscache_count_no_write_space();
563         ret = cachefiles_inject_remove_error();
564         if (ret == 0)
565                 ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
566                                     *_start, *_len);
567         if (ret < 0) {
568                 trace_cachefiles_io_error(object, file_inode(file), ret,
569                                           cachefiles_trace_fallocate_error);
570                 cachefiles_io_error_obj(object,
571                                         "CacheFiles: fallocate failed (%d)\n", ret);
572                 ret = -EIO;
573         }
574
575         return ret;
576
577 check_space:
578         return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
579                                     cachefiles_has_space_for_write);
580 }
581
582 static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
583                                     loff_t *_start, size_t *_len, loff_t i_size,
584                                     bool no_space_allocated_yet)
585 {
586         struct cachefiles_object *object = cachefiles_cres_object(cres);
587         struct cachefiles_cache *cache = object->volume->cache;
588         const struct cred *saved_cred;
589         int ret;
590
591         if (!cachefiles_cres_file(cres)) {
592                 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
593                         return -ENOBUFS;
594                 if (!cachefiles_cres_file(cres))
595                         return -ENOBUFS;
596         }
597
598         cachefiles_begin_secure(cache, &saved_cred);
599         ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
600                                          _start, _len,
601                                          no_space_allocated_yet);
602         cachefiles_end_secure(cache, saved_cred);
603         return ret;
604 }
605
606 /*
607  * Clean up an operation.
608  */
609 static void cachefiles_end_operation(struct netfs_cache_resources *cres)
610 {
611         struct file *file = cachefiles_cres_file(cres);
612
613         if (file)
614                 fput(file);
615         fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end);
616 }
617
618 static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
619         .end_operation          = cachefiles_end_operation,
620         .read                   = cachefiles_read,
621         .write                  = cachefiles_write,
622         .prepare_read           = cachefiles_prepare_read,
623         .prepare_write          = cachefiles_prepare_write,
624         .query_occupancy        = cachefiles_query_occupancy,
625 };
626
627 /*
628  * Open the cache file when beginning a cache operation.
629  */
630 bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
631                                 enum fscache_want_state want_state)
632 {
633         struct cachefiles_object *object = cachefiles_cres_object(cres);
634
635         if (!cachefiles_cres_file(cres)) {
636                 cres->ops = &cachefiles_netfs_cache_ops;
637                 if (object->file) {
638                         spin_lock(&object->lock);
639                         if (!cres->cache_priv2 && object->file)
640                                 cres->cache_priv2 = get_file(object->file);
641                         spin_unlock(&object->lock);
642                 }
643         }
644
645         if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
646                 pr_err("failed to get cres->file\n");
647                 return false;
648         }
649
650         return true;
651 }