1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2022, Alibaba Cloud
4 * Copyright (C) 2022, Bytedance Inc. All rights reserved.
6 #include <linux/pseudo_fs.h>
7 #include <linux/fscache.h>
10 static DEFINE_MUTEX(erofs_domain_list_lock);
11 static DEFINE_MUTEX(erofs_domain_cookies_lock);
12 static LIST_HEAD(erofs_domain_list);
13 static LIST_HEAD(erofs_domain_cookies_list);
14 static struct vfsmount *erofs_pseudo_mnt;
16 static int erofs_anon_init_fs_context(struct fs_context *fc)
18 return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
21 static struct file_system_type erofs_anon_fs_type = {
23 .name = "pseudo_erofs",
24 .init_fs_context = erofs_anon_init_fs_context,
25 .kill_sb = kill_anon_super,
28 struct erofs_fscache_io {
29 struct netfs_cache_resources cres;
31 netfs_io_terminated_t end_io;
36 struct erofs_fscache_rq {
37 struct address_space *mapping; /* The mapping being accessed */
38 loff_t start; /* Start position */
39 size_t len; /* Length of the request */
40 size_t submitted; /* Length of submitted */
41 short error; /* 0 or error that occurred */
45 static bool erofs_fscache_io_put(struct erofs_fscache_io *io)
47 if (!refcount_dec_and_test(&io->ref))
50 io->cres.ops->end_operation(&io->cres);
55 static void erofs_fscache_req_complete(struct erofs_fscache_rq *req)
58 bool failed = req->error;
59 pgoff_t start_page = req->start / PAGE_SIZE;
60 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
62 XA_STATE(xas, &req->mapping->i_pages, start_page);
65 xas_for_each(&xas, folio, last_page) {
66 if (xas_retry(&xas, folio))
69 folio_mark_uptodate(folio);
75 static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
77 if (!refcount_dec_and_test(&req->ref))
79 erofs_fscache_req_complete(req);
83 static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping,
84 loff_t start, size_t len)
86 struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL);
90 req->mapping = mapping;
93 refcount_set(&req->ref, 1);
97 static void erofs_fscache_req_io_put(struct erofs_fscache_io *io)
99 struct erofs_fscache_rq *req = io->private;
101 if (erofs_fscache_io_put(io))
102 erofs_fscache_req_put(req);
105 static void erofs_fscache_req_end_io(void *priv,
106 ssize_t transferred_or_error, bool was_async)
108 struct erofs_fscache_io *io = priv;
109 struct erofs_fscache_rq *req = io->private;
111 if (IS_ERR_VALUE(transferred_or_error))
112 req->error = transferred_or_error;
113 erofs_fscache_req_io_put(io);
116 static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req)
118 struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL);
122 io->end_io = erofs_fscache_req_end_io;
124 refcount_inc(&req->ref);
125 refcount_set(&io->ref, 1);
130 * Read data from fscache described by cookie at pstart physical address
131 * offset, and fill the read data into buffer described by io->iter.
133 static int erofs_fscache_read_io_async(struct fscache_cookie *cookie,
134 loff_t pstart, struct erofs_fscache_io *io)
136 enum netfs_io_source source;
137 struct netfs_cache_resources *cres = &io->cres;
138 struct iov_iter *iter = &io->iter;
141 ret = fscache_begin_read_operation(cres, cookie);
145 while (iov_iter_count(iter)) {
146 size_t orig_count = iov_iter_count(iter), len = orig_count;
147 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
149 source = cres->ops->prepare_ondemand_read(cres,
150 pstart, &len, LLONG_MAX, &flags, 0);
151 if (WARN_ON(len == 0))
152 source = NETFS_INVALID_READ;
153 if (source != NETFS_READ_FROM_CACHE) {
154 erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source);
158 iov_iter_truncate(iter, len);
159 refcount_inc(&io->ref);
160 ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL,
162 if (ret == -EIOCBQUEUED)
165 erofs_err(NULL, "fscache_read failed (ret %d)", ret);
168 if (WARN_ON(iov_iter_count(iter)))
171 iov_iter_reexpand(iter, orig_count - len);
177 struct erofs_fscache_bio {
178 struct erofs_fscache_io io;
179 struct bio bio; /* w/o bdev to share bio_add_page/endio() */
180 struct bio_vec bvecs[BIO_MAX_VECS];
183 static void erofs_fscache_bio_endio(void *priv,
184 ssize_t transferred_or_error, bool was_async)
186 struct erofs_fscache_bio *io = priv;
188 if (IS_ERR_VALUE(transferred_or_error))
189 io->bio.bi_status = errno_to_blk_status(transferred_or_error);
190 io->bio.bi_end_io(&io->bio);
191 BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0);
192 erofs_fscache_io_put(&io->io);
195 struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
197 struct erofs_fscache_bio *io;
199 io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
200 bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
201 io->io.private = mdev->m_fscache->cookie;
202 io->io.end_io = erofs_fscache_bio_endio;
203 refcount_set(&io->io.ref, 1);
207 void erofs_fscache_submit_bio(struct bio *bio)
209 struct erofs_fscache_bio *io = container_of(bio,
210 struct erofs_fscache_bio, bio);
213 iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt,
214 bio->bi_iter.bi_size);
215 ret = erofs_fscache_read_io_async(io->io.private,
216 bio->bi_iter.bi_sector << 9, &io->io);
217 erofs_fscache_io_put(&io->io);
220 bio->bi_status = errno_to_blk_status(ret);
224 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
226 struct erofs_fscache *ctx = folio->mapping->host->i_private;
228 struct erofs_fscache_rq *req;
229 struct erofs_fscache_io *io;
231 req = erofs_fscache_req_alloc(folio->mapping,
232 folio_pos(folio), folio_size(folio));
238 io = erofs_fscache_req_io_alloc(req);
243 iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages,
244 folio_pos(folio), folio_size(folio));
246 ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io);
250 erofs_fscache_req_io_put(io);
252 erofs_fscache_req_put(req);
256 static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
258 struct address_space *mapping = req->mapping;
259 struct inode *inode = mapping->host;
260 struct super_block *sb = inode->i_sb;
261 struct erofs_fscache_io *io;
262 struct erofs_map_blocks map;
263 struct erofs_map_dev mdev;
264 loff_t pos = req->start + req->submitted;
269 ret = erofs_map_blocks(inode, &map);
273 if (map.m_flags & EROFS_MAP_META) {
274 struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
275 struct iov_iter iter;
280 /* For tail packing layout, the offset may be non-zero. */
281 offset = erofs_blkoff(sb, map.m_pa);
282 blknr = erofs_blknr(sb, map.m_pa);
285 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
289 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
290 if (copy_to_iter(src + offset, size, &iter) != size) {
291 erofs_put_metabuf(&buf);
294 iov_iter_zero(PAGE_SIZE - size, &iter);
295 erofs_put_metabuf(&buf);
296 req->submitted += PAGE_SIZE;
300 count = req->len - req->submitted;
301 if (!(map.m_flags & EROFS_MAP_MAPPED)) {
302 struct iov_iter iter;
304 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
305 iov_iter_zero(count, &iter);
306 req->submitted += count;
310 count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
311 DBG_BUGON(!count || count % PAGE_SIZE);
313 mdev = (struct erofs_map_dev) {
314 .m_deviceid = map.m_deviceid,
317 ret = erofs_map_dev(sb, &mdev);
321 io = erofs_fscache_req_io_alloc(req);
324 iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
325 ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
326 mdev.m_pa + (pos - map.m_la), io);
327 erofs_fscache_req_io_put(io);
329 req->submitted += count;
333 static int erofs_fscache_data_read(struct erofs_fscache_rq *req)
338 ret = erofs_fscache_data_read_slice(req);
341 } while (!ret && req->submitted < req->len);
345 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
347 struct erofs_fscache_rq *req;
350 req = erofs_fscache_req_alloc(folio->mapping,
351 folio_pos(folio), folio_size(folio));
357 ret = erofs_fscache_data_read(req);
358 erofs_fscache_req_put(req);
362 static void erofs_fscache_readahead(struct readahead_control *rac)
364 struct erofs_fscache_rq *req;
366 if (!readahead_count(rac))
369 req = erofs_fscache_req_alloc(rac->mapping,
370 readahead_pos(rac), readahead_length(rac));
374 /* The request completion will drop refs on the folios. */
375 while (readahead_folio(rac))
378 erofs_fscache_data_read(req);
379 erofs_fscache_req_put(req);
382 static const struct address_space_operations erofs_fscache_meta_aops = {
383 .read_folio = erofs_fscache_meta_read_folio,
386 const struct address_space_operations erofs_fscache_access_aops = {
387 .read_folio = erofs_fscache_read_folio,
388 .readahead = erofs_fscache_readahead,
391 static void erofs_fscache_domain_put(struct erofs_domain *domain)
393 mutex_lock(&erofs_domain_list_lock);
394 if (refcount_dec_and_test(&domain->ref)) {
395 list_del(&domain->list);
396 if (list_empty(&erofs_domain_list)) {
397 kern_unmount(erofs_pseudo_mnt);
398 erofs_pseudo_mnt = NULL;
400 fscache_relinquish_volume(domain->volume, NULL, false);
401 mutex_unlock(&erofs_domain_list_lock);
402 kfree(domain->domain_id);
406 mutex_unlock(&erofs_domain_list_lock);
409 static int erofs_fscache_register_volume(struct super_block *sb)
411 struct erofs_sb_info *sbi = EROFS_SB(sb);
412 char *domain_id = sbi->domain_id;
413 struct fscache_volume *volume;
417 name = kasprintf(GFP_KERNEL, "erofs,%s",
418 domain_id ? domain_id : sbi->fsid);
422 volume = fscache_acquire_volume(name, NULL, NULL, 0);
423 if (IS_ERR_OR_NULL(volume)) {
424 erofs_err(sb, "failed to register volume for %s", name);
425 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
429 sbi->volume = volume;
434 static int erofs_fscache_init_domain(struct super_block *sb)
437 struct erofs_domain *domain;
438 struct erofs_sb_info *sbi = EROFS_SB(sb);
440 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
444 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
445 if (!domain->domain_id) {
450 err = erofs_fscache_register_volume(sb);
454 if (!erofs_pseudo_mnt) {
455 struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
460 erofs_pseudo_mnt = mnt;
463 domain->volume = sbi->volume;
464 refcount_set(&domain->ref, 1);
465 list_add(&domain->list, &erofs_domain_list);
466 sbi->domain = domain;
469 kfree(domain->domain_id);
474 static int erofs_fscache_register_domain(struct super_block *sb)
477 struct erofs_domain *domain;
478 struct erofs_sb_info *sbi = EROFS_SB(sb);
480 mutex_lock(&erofs_domain_list_lock);
481 list_for_each_entry(domain, &erofs_domain_list, list) {
482 if (!strcmp(domain->domain_id, sbi->domain_id)) {
483 sbi->domain = domain;
484 sbi->volume = domain->volume;
485 refcount_inc(&domain->ref);
486 mutex_unlock(&erofs_domain_list_lock);
490 err = erofs_fscache_init_domain(sb);
491 mutex_unlock(&erofs_domain_list_lock);
495 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
496 char *name, unsigned int flags)
498 struct fscache_volume *volume = EROFS_SB(sb)->volume;
499 struct erofs_fscache *ctx;
500 struct fscache_cookie *cookie;
501 struct super_block *isb;
505 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
507 return ERR_PTR(-ENOMEM);
508 INIT_LIST_HEAD(&ctx->node);
509 refcount_set(&ctx->ref, 1);
511 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
512 name, strlen(name), NULL, 0, 0);
514 erofs_err(sb, "failed to get cookie for %s", name);
518 fscache_use_cookie(cookie, false);
521 * Allocate anonymous inode in global pseudo mount for shareable blobs,
522 * so that they are accessible among erofs fs instances.
524 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
525 inode = new_inode(isb);
527 erofs_err(sb, "failed to get anon inode for %s", name);
532 inode->i_size = OFFSET_MAX;
533 inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
534 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
535 inode->i_blkbits = EROFS_SB(sb)->blkszbits;
536 inode->i_private = ctx;
538 ctx->cookie = cookie;
543 fscache_unuse_cookie(cookie, NULL, NULL);
544 fscache_relinquish_cookie(cookie, false);
550 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
552 fscache_unuse_cookie(ctx->cookie, NULL, NULL);
553 fscache_relinquish_cookie(ctx->cookie, false);
559 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
560 char *name, unsigned int flags)
562 struct erofs_fscache *ctx;
563 struct erofs_domain *domain = EROFS_SB(sb)->domain;
565 ctx = erofs_fscache_acquire_cookie(sb, name, flags);
569 ctx->name = kstrdup(name, GFP_KERNEL);
571 erofs_fscache_relinquish_cookie(ctx);
572 return ERR_PTR(-ENOMEM);
575 refcount_inc(&domain->ref);
576 ctx->domain = domain;
577 list_add(&ctx->node, &erofs_domain_cookies_list);
581 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
582 char *name, unsigned int flags)
584 struct erofs_fscache *ctx;
585 struct erofs_domain *domain = EROFS_SB(sb)->domain;
587 flags |= EROFS_REG_COOKIE_SHARE;
588 mutex_lock(&erofs_domain_cookies_lock);
589 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
590 if (ctx->domain != domain || strcmp(ctx->name, name))
592 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
593 refcount_inc(&ctx->ref);
595 erofs_err(sb, "%s already exists in domain %s", name,
597 ctx = ERR_PTR(-EEXIST);
599 mutex_unlock(&erofs_domain_cookies_lock);
602 ctx = erofs_domain_init_cookie(sb, name, flags);
603 mutex_unlock(&erofs_domain_cookies_lock);
607 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
611 if (EROFS_SB(sb)->domain_id)
612 return erofs_domain_register_cookie(sb, name, flags);
613 return erofs_fscache_acquire_cookie(sb, name, flags);
616 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
618 struct erofs_domain *domain = NULL;
623 return erofs_fscache_relinquish_cookie(ctx);
625 mutex_lock(&erofs_domain_cookies_lock);
626 if (refcount_dec_and_test(&ctx->ref)) {
627 domain = ctx->domain;
628 list_del(&ctx->node);
629 erofs_fscache_relinquish_cookie(ctx);
631 mutex_unlock(&erofs_domain_cookies_lock);
633 erofs_fscache_domain_put(domain);
636 int erofs_fscache_register_fs(struct super_block *sb)
639 struct erofs_sb_info *sbi = EROFS_SB(sb);
640 struct erofs_fscache *fscache;
641 unsigned int flags = 0;
644 ret = erofs_fscache_register_domain(sb);
646 ret = erofs_fscache_register_volume(sb);
651 * When shared domain is enabled, using NEED_NOEXIST to guarantee
652 * the primary data blob (aka fsid) is unique in the shared domain.
654 * For non-shared-domain case, fscache_acquire_volume() invoked by
655 * erofs_fscache_register_volume() has already guaranteed
656 * the uniqueness of primary data blob.
658 * Acquired domain/volume will be relinquished in kill_sb() on error.
661 flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
662 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
664 return PTR_ERR(fscache);
666 sbi->s_fscache = fscache;
670 void erofs_fscache_unregister_fs(struct super_block *sb)
672 struct erofs_sb_info *sbi = EROFS_SB(sb);
674 erofs_fscache_unregister_cookie(sbi->s_fscache);
677 erofs_fscache_domain_put(sbi->domain);
679 fscache_relinquish_volume(sbi->volume, NULL, false);
681 sbi->s_fscache = NULL;