1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2022, Alibaba Cloud
4 * Copyright (C) 2022, Bytedance Inc. All rights reserved.
6 #include <linux/pseudo_fs.h>
7 #include <linux/fscache.h>
10 static DEFINE_MUTEX(erofs_domain_list_lock);
11 static DEFINE_MUTEX(erofs_domain_cookies_lock);
12 static LIST_HEAD(erofs_domain_list);
13 static LIST_HEAD(erofs_domain_cookies_list);
14 static struct vfsmount *erofs_pseudo_mnt;
16 static int erofs_anon_init_fs_context(struct fs_context *fc)
18 return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
21 static struct file_system_type erofs_anon_fs_type = {
23 .name = "pseudo_erofs",
24 .init_fs_context = erofs_anon_init_fs_context,
25 .kill_sb = kill_anon_super,
28 struct erofs_fscache_request {
29 struct erofs_fscache_request *primary;
30 struct netfs_cache_resources cache_resources;
31 struct address_space *mapping; /* The mapping being accessed */
32 loff_t start; /* Start position */
33 size_t len; /* Length of the request */
34 size_t submitted; /* Length of submitted */
35 short error; /* 0 or error that occurred */
39 static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
40 loff_t start, size_t len)
42 struct erofs_fscache_request *req;
44 req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
46 return ERR_PTR(-ENOMEM);
48 req->mapping = mapping;
51 refcount_set(&req->ref, 1);
56 static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
59 struct erofs_fscache_request *req;
61 /* use primary request for the first submission */
62 if (!primary->submitted) {
63 refcount_inc(&primary->ref);
67 req = erofs_fscache_req_alloc(primary->mapping,
68 primary->start + primary->submitted, len);
70 req->primary = primary;
71 refcount_inc(&primary->ref);
76 static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
79 bool failed = req->error;
80 pgoff_t start_page = req->start / PAGE_SIZE;
81 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
83 XA_STATE(xas, &req->mapping->i_pages, start_page);
86 xas_for_each(&xas, folio, last_page) {
87 if (xas_retry(&xas, folio))
90 folio_mark_uptodate(folio);
96 static void erofs_fscache_req_put(struct erofs_fscache_request *req)
98 if (refcount_dec_and_test(&req->ref)) {
99 if (req->cache_resources.ops)
100 req->cache_resources.ops->end_operation(&req->cache_resources);
102 erofs_fscache_req_complete(req);
104 erofs_fscache_req_put(req->primary);
109 static void erofs_fscache_subreq_complete(void *priv,
110 ssize_t transferred_or_error, bool was_async)
112 struct erofs_fscache_request *req = priv;
114 if (IS_ERR_VALUE(transferred_or_error)) {
116 req->primary->error = transferred_or_error;
118 req->error = transferred_or_error;
120 erofs_fscache_req_put(req);
124 * Read data from fscache (cookie, pstart, len), and fill the read data into
125 * page cache described by (req->mapping, lstart, len). @pstart describeis the
126 * start physical address in the cache file.
128 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
129 struct erofs_fscache_request *req, loff_t pstart, size_t len)
131 enum netfs_io_source source;
132 struct super_block *sb = req->mapping->host->i_sb;
133 struct netfs_cache_resources *cres = &req->cache_resources;
134 struct iov_iter iter;
135 loff_t lstart = req->start + req->submitted;
139 DBG_BUGON(len > req->len - req->submitted);
141 ret = fscache_begin_read_operation(cres, cookie);
146 loff_t sstart = pstart + done;
147 size_t slen = len - done;
148 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
150 source = cres->ops->prepare_ondemand_read(cres,
151 sstart, &slen, LLONG_MAX, &flags, 0);
152 if (WARN_ON(slen == 0))
153 source = NETFS_INVALID_READ;
154 if (source != NETFS_READ_FROM_CACHE) {
155 erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
159 refcount_inc(&req->ref);
160 iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
161 lstart + done, slen);
163 ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
164 erofs_fscache_subreq_complete, req);
165 if (ret == -EIOCBQUEUED)
168 erofs_err(sb, "failed to fscache_read (ret %d)", ret);
174 DBG_BUGON(done != len);
178 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
181 struct erofs_fscache *ctx = folio->mapping->host->i_private;
182 struct erofs_fscache_request *req;
184 req = erofs_fscache_req_alloc(folio->mapping,
185 folio_pos(folio), folio_size(folio));
191 ret = erofs_fscache_read_folios_async(ctx->cookie, req,
192 folio_pos(folio), folio_size(folio));
196 erofs_fscache_req_put(req);
200 static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
202 struct address_space *mapping = primary->mapping;
203 struct inode *inode = mapping->host;
204 struct super_block *sb = inode->i_sb;
205 struct erofs_fscache_request *req;
206 struct erofs_map_blocks map;
207 struct erofs_map_dev mdev;
208 struct iov_iter iter;
209 loff_t pos = primary->start + primary->submitted;
214 ret = erofs_map_blocks(inode, &map);
218 if (map.m_flags & EROFS_MAP_META) {
219 struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
224 /* For tail packing layout, the offset may be non-zero. */
225 offset = erofs_blkoff(sb, map.m_pa);
226 blknr = erofs_blknr(sb, map.m_pa);
229 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
233 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
234 if (copy_to_iter(src + offset, size, &iter) != size) {
235 erofs_put_metabuf(&buf);
238 iov_iter_zero(PAGE_SIZE - size, &iter);
239 erofs_put_metabuf(&buf);
240 primary->submitted += PAGE_SIZE;
244 count = primary->len - primary->submitted;
245 if (!(map.m_flags & EROFS_MAP_MAPPED)) {
246 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
247 iov_iter_zero(count, &iter);
248 primary->submitted += count;
252 count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
253 DBG_BUGON(!count || count % PAGE_SIZE);
255 mdev = (struct erofs_map_dev) {
256 .m_deviceid = map.m_deviceid,
259 ret = erofs_map_dev(sb, &mdev);
263 req = erofs_fscache_req_chain(primary, count);
267 ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
268 req, mdev.m_pa + (pos - map.m_la), count);
269 erofs_fscache_req_put(req);
270 primary->submitted += count;
274 static int erofs_fscache_data_read(struct erofs_fscache_request *req)
279 ret = erofs_fscache_data_read_slice(req);
282 } while (!ret && req->submitted < req->len);
287 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
289 struct erofs_fscache_request *req;
292 req = erofs_fscache_req_alloc(folio->mapping,
293 folio_pos(folio), folio_size(folio));
299 ret = erofs_fscache_data_read(req);
300 erofs_fscache_req_put(req);
304 static void erofs_fscache_readahead(struct readahead_control *rac)
306 struct erofs_fscache_request *req;
308 if (!readahead_count(rac))
311 req = erofs_fscache_req_alloc(rac->mapping,
312 readahead_pos(rac), readahead_length(rac));
316 /* The request completion will drop refs on the folios. */
317 while (readahead_folio(rac))
320 erofs_fscache_data_read(req);
321 erofs_fscache_req_put(req);
324 static const struct address_space_operations erofs_fscache_meta_aops = {
325 .read_folio = erofs_fscache_meta_read_folio,
328 const struct address_space_operations erofs_fscache_access_aops = {
329 .read_folio = erofs_fscache_read_folio,
330 .readahead = erofs_fscache_readahead,
333 static void erofs_fscache_domain_put(struct erofs_domain *domain)
335 mutex_lock(&erofs_domain_list_lock);
336 if (refcount_dec_and_test(&domain->ref)) {
337 list_del(&domain->list);
338 if (list_empty(&erofs_domain_list)) {
339 kern_unmount(erofs_pseudo_mnt);
340 erofs_pseudo_mnt = NULL;
342 fscache_relinquish_volume(domain->volume, NULL, false);
343 mutex_unlock(&erofs_domain_list_lock);
344 kfree(domain->domain_id);
348 mutex_unlock(&erofs_domain_list_lock);
351 static int erofs_fscache_register_volume(struct super_block *sb)
353 struct erofs_sb_info *sbi = EROFS_SB(sb);
354 char *domain_id = sbi->domain_id;
355 struct fscache_volume *volume;
359 name = kasprintf(GFP_KERNEL, "erofs,%s",
360 domain_id ? domain_id : sbi->fsid);
364 volume = fscache_acquire_volume(name, NULL, NULL, 0);
365 if (IS_ERR_OR_NULL(volume)) {
366 erofs_err(sb, "failed to register volume for %s", name);
367 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
371 sbi->volume = volume;
376 static int erofs_fscache_init_domain(struct super_block *sb)
379 struct erofs_domain *domain;
380 struct erofs_sb_info *sbi = EROFS_SB(sb);
382 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
386 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
387 if (!domain->domain_id) {
392 err = erofs_fscache_register_volume(sb);
396 if (!erofs_pseudo_mnt) {
397 struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
402 erofs_pseudo_mnt = mnt;
405 domain->volume = sbi->volume;
406 refcount_set(&domain->ref, 1);
407 list_add(&domain->list, &erofs_domain_list);
408 sbi->domain = domain;
411 kfree(domain->domain_id);
416 static int erofs_fscache_register_domain(struct super_block *sb)
419 struct erofs_domain *domain;
420 struct erofs_sb_info *sbi = EROFS_SB(sb);
422 mutex_lock(&erofs_domain_list_lock);
423 list_for_each_entry(domain, &erofs_domain_list, list) {
424 if (!strcmp(domain->domain_id, sbi->domain_id)) {
425 sbi->domain = domain;
426 sbi->volume = domain->volume;
427 refcount_inc(&domain->ref);
428 mutex_unlock(&erofs_domain_list_lock);
432 err = erofs_fscache_init_domain(sb);
433 mutex_unlock(&erofs_domain_list_lock);
437 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
438 char *name, unsigned int flags)
440 struct fscache_volume *volume = EROFS_SB(sb)->volume;
441 struct erofs_fscache *ctx;
442 struct fscache_cookie *cookie;
443 struct super_block *isb;
447 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
449 return ERR_PTR(-ENOMEM);
450 INIT_LIST_HEAD(&ctx->node);
451 refcount_set(&ctx->ref, 1);
453 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
454 name, strlen(name), NULL, 0, 0);
456 erofs_err(sb, "failed to get cookie for %s", name);
460 fscache_use_cookie(cookie, false);
463 * Allocate anonymous inode in global pseudo mount for shareable blobs,
464 * so that they are accessible among erofs fs instances.
466 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
467 inode = new_inode(isb);
469 erofs_err(sb, "failed to get anon inode for %s", name);
474 inode->i_size = OFFSET_MAX;
475 inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
476 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
477 inode->i_blkbits = EROFS_SB(sb)->blkszbits;
478 inode->i_private = ctx;
480 ctx->cookie = cookie;
485 fscache_unuse_cookie(cookie, NULL, NULL);
486 fscache_relinquish_cookie(cookie, false);
492 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
494 fscache_unuse_cookie(ctx->cookie, NULL, NULL);
495 fscache_relinquish_cookie(ctx->cookie, false);
501 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
502 char *name, unsigned int flags)
504 struct erofs_fscache *ctx;
505 struct erofs_domain *domain = EROFS_SB(sb)->domain;
507 ctx = erofs_fscache_acquire_cookie(sb, name, flags);
511 ctx->name = kstrdup(name, GFP_KERNEL);
513 erofs_fscache_relinquish_cookie(ctx);
514 return ERR_PTR(-ENOMEM);
517 refcount_inc(&domain->ref);
518 ctx->domain = domain;
519 list_add(&ctx->node, &erofs_domain_cookies_list);
523 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
524 char *name, unsigned int flags)
526 struct erofs_fscache *ctx;
527 struct erofs_domain *domain = EROFS_SB(sb)->domain;
529 flags |= EROFS_REG_COOKIE_SHARE;
530 mutex_lock(&erofs_domain_cookies_lock);
531 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
532 if (ctx->domain != domain || strcmp(ctx->name, name))
534 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
535 refcount_inc(&ctx->ref);
537 erofs_err(sb, "%s already exists in domain %s", name,
539 ctx = ERR_PTR(-EEXIST);
541 mutex_unlock(&erofs_domain_cookies_lock);
544 ctx = erofs_domain_init_cookie(sb, name, flags);
545 mutex_unlock(&erofs_domain_cookies_lock);
549 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
553 if (EROFS_SB(sb)->domain_id)
554 return erofs_domain_register_cookie(sb, name, flags);
555 return erofs_fscache_acquire_cookie(sb, name, flags);
558 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
560 struct erofs_domain *domain = NULL;
565 return erofs_fscache_relinquish_cookie(ctx);
567 mutex_lock(&erofs_domain_cookies_lock);
568 if (refcount_dec_and_test(&ctx->ref)) {
569 domain = ctx->domain;
570 list_del(&ctx->node);
571 erofs_fscache_relinquish_cookie(ctx);
573 mutex_unlock(&erofs_domain_cookies_lock);
575 erofs_fscache_domain_put(domain);
578 int erofs_fscache_register_fs(struct super_block *sb)
581 struct erofs_sb_info *sbi = EROFS_SB(sb);
582 struct erofs_fscache *fscache;
583 unsigned int flags = 0;
586 ret = erofs_fscache_register_domain(sb);
588 ret = erofs_fscache_register_volume(sb);
593 * When shared domain is enabled, using NEED_NOEXIST to guarantee
594 * the primary data blob (aka fsid) is unique in the shared domain.
596 * For non-shared-domain case, fscache_acquire_volume() invoked by
597 * erofs_fscache_register_volume() has already guaranteed
598 * the uniqueness of primary data blob.
600 * Acquired domain/volume will be relinquished in kill_sb() on error.
603 flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
604 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
606 return PTR_ERR(fscache);
608 sbi->s_fscache = fscache;
612 void erofs_fscache_unregister_fs(struct super_block *sb)
614 struct erofs_sb_info *sbi = EROFS_SB(sb);
616 erofs_fscache_unregister_cookie(sbi->s_fscache);
619 erofs_fscache_domain_put(sbi->domain);
621 fscache_relinquish_volume(sbi->volume, NULL, false);
623 sbi->s_fscache = NULL;