2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
18 struct fuse_conn *fc = get_fuse_conn(dir);
19 struct fuse_inode *fi = get_fuse_inode(dir);
21 if (!fc->do_readdirplus)
23 if (!fc->readdirplus_auto)
25 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
32 static void fuse_add_dirent_to_cache(struct file *file,
33 struct fuse_dirent *dirent, loff_t pos)
35 struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 size_t reclen = FUSE_DIRENT_SIZE(dirent);
44 spin_lock(&fi->rdc.lock);
46 * Is cache already completed? Or this entry does not go at the end of
49 if (fi->rdc.cached || pos != fi->rdc.pos) {
50 spin_unlock(&fi->rdc.lock);
53 version = fi->rdc.version;
55 offset = size & ~PAGE_MASK;
56 index = size >> PAGE_SHIFT;
57 /* Dirent doesn't fit in current page? Jump to next page. */
58 if (offset + reclen > PAGE_SIZE) {
62 spin_unlock(&fi->rdc.lock);
65 page = find_lock_page(file->f_mapping, index);
67 page = find_or_create_page(file->f_mapping, index,
68 mapping_gfp_mask(file->f_mapping));
73 spin_lock(&fi->rdc.lock);
74 /* Raced with another readdir */
75 if (fi->rdc.version != version || fi->rdc.size != size ||
76 WARN_ON(fi->rdc.pos != pos))
79 addr = kmap_atomic(page);
82 memcpy(addr + offset, dirent, reclen);
84 fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
85 fi->rdc.pos = dirent->off;
87 spin_unlock(&fi->rdc.lock);
92 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
94 struct fuse_inode *fi = get_fuse_inode(file_inode(file));
97 spin_lock(&fi->rdc.lock);
98 /* does cache end position match current position? */
99 if (fi->rdc.pos != pos) {
100 spin_unlock(&fi->rdc.lock);
104 fi->rdc.cached = true;
105 end = ALIGN(fi->rdc.size, PAGE_SIZE);
106 spin_unlock(&fi->rdc.lock);
108 /* truncate unused tail of cache */
109 truncate_inode_pages(file->f_mapping, end);
112 static bool fuse_emit(struct file *file, struct dir_context *ctx,
113 struct fuse_dirent *dirent)
115 struct fuse_file *ff = file->private_data;
117 if (ff->open_flags & FOPEN_CACHE_DIR)
118 fuse_add_dirent_to_cache(file, dirent, ctx->pos);
120 return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
124 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
125 struct dir_context *ctx)
127 while (nbytes >= FUSE_NAME_OFFSET) {
128 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
129 size_t reclen = FUSE_DIRENT_SIZE(dirent);
130 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
134 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
137 if (!fuse_emit(file, ctx, dirent))
142 ctx->pos = dirent->off;
148 static int fuse_direntplus_link(struct file *file,
149 struct fuse_direntplus *direntplus,
152 struct fuse_entry_out *o = &direntplus->entry_out;
153 struct fuse_dirent *dirent = &direntplus->dirent;
154 struct dentry *parent = file->f_path.dentry;
155 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
156 struct dentry *dentry;
157 struct dentry *alias;
158 struct inode *dir = d_inode(parent);
159 struct fuse_conn *fc;
161 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
165 * Unlike in the case of fuse_lookup, zero nodeid does not mean
166 * ENOENT. Instead, it only means the userspace filesystem did
167 * not want to return attributes/handle for this entry.
174 if (name.name[0] == '.') {
176 * We could potentially refresh the attributes of the directory
181 if (name.name[1] == '.' && name.len == 2)
185 if (invalid_nodeid(o->nodeid))
187 if (fuse_invalid_attr(&o->attr))
190 fc = get_fuse_conn(dir);
192 name.hash = full_name_hash(parent, name.name, name.len);
193 dentry = d_lookup(parent, &name);
196 dentry = d_alloc_parallel(parent, &name, &wq);
198 return PTR_ERR(dentry);
200 if (!d_in_lookup(dentry)) {
201 struct fuse_inode *fi;
202 inode = d_inode(dentry);
204 get_node_id(inode) != o->nodeid ||
205 ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
206 d_invalidate(dentry);
210 if (fuse_is_bad(inode)) {
215 fi = get_fuse_inode(inode);
216 spin_lock(&fi->lock);
218 spin_unlock(&fi->lock);
220 forget_all_cached_acls(inode);
221 fuse_change_attributes(inode, &o->attr,
222 entry_attr_timeout(o),
225 * The other branch comes via fuse_iget()
226 * which bumps nlookup inside
229 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
230 &o->attr, entry_attr_timeout(o),
233 inode = ERR_PTR(-ENOMEM);
235 alias = d_splice_alias(inode, dentry);
236 d_lookup_done(dentry);
241 if (IS_ERR(dentry)) {
242 if (!IS_ERR(inode)) {
243 struct fuse_inode *fi = get_fuse_inode(inode);
245 spin_lock(&fi->lock);
247 spin_unlock(&fi->lock);
249 return PTR_ERR(dentry);
252 if (fc->readdirplus_auto)
253 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
254 fuse_change_entry_timeout(dentry, o);
260 static void fuse_force_forget(struct file *file, u64 nodeid)
262 struct inode *inode = file_inode(file);
263 struct fuse_conn *fc = get_fuse_conn(inode);
264 struct fuse_forget_in inarg;
267 memset(&inarg, 0, sizeof(inarg));
269 args.opcode = FUSE_FORGET;
270 args.nodeid = nodeid;
272 args.in_args[0].size = sizeof(inarg);
273 args.in_args[0].value = &inarg;
277 fuse_simple_request(fc, &args);
281 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
282 struct dir_context *ctx, u64 attr_version)
284 struct fuse_direntplus *direntplus;
285 struct fuse_dirent *dirent;
290 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
291 direntplus = (struct fuse_direntplus *) buf;
292 dirent = &direntplus->dirent;
293 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
295 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
299 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
303 /* We fill entries into dstbuf only as much as
304 it can hold. But we still continue iterating
305 over remaining entries to link them. If not,
306 we need to send a FORGET for each of those
307 which we did not link.
309 over = !fuse_emit(file, ctx, dirent);
311 ctx->pos = dirent->off;
317 ret = fuse_direntplus_link(file, direntplus, attr_version);
319 fuse_force_forget(file, direntplus->entry_out.nodeid);
325 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
330 struct inode *inode = file_inode(file);
331 struct fuse_conn *fc = get_fuse_conn(inode);
332 struct fuse_io_args ia = {};
333 struct fuse_args_pages *ap = &ia.ap;
334 struct fuse_page_desc desc = { .length = PAGE_SIZE };
335 u64 attr_version = 0;
338 page = alloc_page(GFP_KERNEL);
342 plus = fuse_use_readdirplus(inode, ctx);
343 ap->args.out_pages = 1;
348 attr_version = fuse_get_attr_version(fc);
349 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
352 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
355 locked = fuse_lock_inode(inode);
356 res = fuse_simple_request(fc, &ap->args);
357 fuse_unlock_inode(inode, locked);
360 struct fuse_file *ff = file->private_data;
362 if (ff->open_flags & FOPEN_CACHE_DIR)
363 fuse_readdir_cache_end(file, ctx->pos);
365 res = parse_dirplusfile(page_address(page), res,
366 file, ctx, attr_version);
368 res = parse_dirfile(page_address(page), res, file,
374 fuse_invalidate_atime(inode);
378 enum fuse_parse_result {
385 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
386 void *addr, unsigned int size,
387 struct dir_context *ctx)
389 unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
390 enum fuse_parse_result res = FOUND_NONE;
392 WARN_ON(offset >= size);
395 struct fuse_dirent *dirent = addr + offset;
396 unsigned int nbytes = size - offset;
399 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
402 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
404 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
406 if (WARN_ON(reclen > nbytes))
408 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
411 if (ff->readdir.pos == ctx->pos) {
413 if (!dir_emit(ctx, dirent->name, dirent->namelen,
414 dirent->ino, dirent->type))
416 ctx->pos = dirent->off;
418 ff->readdir.pos = dirent->off;
419 ff->readdir.cache_off += reclen;
427 static void fuse_rdc_reset(struct inode *inode)
429 struct fuse_inode *fi = get_fuse_inode(inode);
431 fi->rdc.cached = false;
439 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
441 struct fuse_file *ff = file->private_data;
442 struct inode *inode = file_inode(file);
443 struct fuse_conn *fc = get_fuse_conn(inode);
444 struct fuse_inode *fi = get_fuse_inode(inode);
445 enum fuse_parse_result res;
451 /* Seeked? If so, reset the cache stream */
452 if (ff->readdir.pos != ctx->pos) {
454 ff->readdir.cache_off = 0;
458 * We're just about to start reading into the cache or reading the
459 * cache; both cases require an up-to-date mtime value.
461 if (!ctx->pos && fc->auto_inval_data) {
462 int err = fuse_update_attributes(inode, file);
469 spin_lock(&fi->rdc.lock);
471 if (!fi->rdc.cached) {
472 /* Starting cache? Set cache mtime. */
473 if (!ctx->pos && !fi->rdc.size) {
474 fi->rdc.mtime = inode->i_mtime;
475 fi->rdc.iversion = inode_query_iversion(inode);
477 spin_unlock(&fi->rdc.lock);
481 * When at the beginning of the directory (i.e. just after opendir(3) or
482 * rewinddir(3)), then need to check whether directory contents have
483 * changed, and reset the cache if so.
486 if (inode_peek_iversion(inode) != fi->rdc.iversion ||
487 !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
488 fuse_rdc_reset(inode);
494 * If cache version changed since the last getdents() call, then reset
497 if (ff->readdir.version != fi->rdc.version) {
499 ff->readdir.cache_off = 0;
502 * If at the beginning of the cache, than reset version to
505 if (ff->readdir.pos == 0)
506 ff->readdir.version = fi->rdc.version;
508 WARN_ON(fi->rdc.size < ff->readdir.cache_off);
510 index = ff->readdir.cache_off >> PAGE_SHIFT;
512 if (index == (fi->rdc.size >> PAGE_SHIFT))
513 size = fi->rdc.size & ~PAGE_MASK;
516 spin_unlock(&fi->rdc.lock);
519 if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
522 page = find_get_page_flags(file->f_mapping, index,
523 FGP_ACCESSED | FGP_LOCK);
524 spin_lock(&fi->rdc.lock);
527 * Uh-oh: page gone missing, cache is useless
529 if (fi->rdc.version == ff->readdir.version)
530 fuse_rdc_reset(inode);
534 /* Make sure it's still the same version after getting the page. */
535 if (ff->readdir.version != fi->rdc.version) {
536 spin_unlock(&fi->rdc.lock);
541 spin_unlock(&fi->rdc.lock);
544 * Contents of the page are now protected against changing by holding
548 res = fuse_parse_cache(ff, addr, size, ctx);
553 if (res == FOUND_ERR)
556 if (res == FOUND_ALL)
559 if (size == PAGE_SIZE) {
560 /* We hit end of page: skip to next page. */
561 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
566 * End of cache reached. If found position, then we are done, otherwise
567 * need to fall back to uncached, since the position we were looking for
568 * wasn't in the cache.
570 return res == FOUND_SOME ? 0 : UNCACHED;
573 int fuse_readdir(struct file *file, struct dir_context *ctx)
575 struct fuse_file *ff = file->private_data;
576 struct inode *inode = file_inode(file);
579 if (fuse_is_bad(inode))
582 mutex_lock(&ff->readdir.lock);
585 if (ff->open_flags & FOPEN_CACHE_DIR)
586 err = fuse_readdir_cached(file, ctx);
588 err = fuse_readdir_uncached(file, ctx);
590 mutex_unlock(&ff->readdir.lock);