GNU Linux-libre 5.4.257-gnu1
[releases.git] / fs / fuse / readdir.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8
9
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18         struct fuse_conn *fc = get_fuse_conn(dir);
19         struct fuse_inode *fi = get_fuse_inode(dir);
20
21         if (!fc->do_readdirplus)
22                 return false;
23         if (!fc->readdirplus_auto)
24                 return true;
25         if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26                 return true;
27         if (ctx->pos == 0)
28                 return true;
29         return false;
30 }
31
32 static void fuse_add_dirent_to_cache(struct file *file,
33                                      struct fuse_dirent *dirent, loff_t pos)
34 {
35         struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36         size_t reclen = FUSE_DIRENT_SIZE(dirent);
37         pgoff_t index;
38         struct page *page;
39         loff_t size;
40         u64 version;
41         unsigned int offset;
42         void *addr;
43
44         spin_lock(&fi->rdc.lock);
45         /*
46          * Is cache already completed?  Or this entry does not go at the end of
47          * cache?
48          */
49         if (fi->rdc.cached || pos != fi->rdc.pos) {
50                 spin_unlock(&fi->rdc.lock);
51                 return;
52         }
53         version = fi->rdc.version;
54         size = fi->rdc.size;
55         offset = size & ~PAGE_MASK;
56         index = size >> PAGE_SHIFT;
57         /* Dirent doesn't fit in current page?  Jump to next page. */
58         if (offset + reclen > PAGE_SIZE) {
59                 index++;
60                 offset = 0;
61         }
62         spin_unlock(&fi->rdc.lock);
63
64         if (offset) {
65                 page = find_lock_page(file->f_mapping, index);
66         } else {
67                 page = find_or_create_page(file->f_mapping, index,
68                                            mapping_gfp_mask(file->f_mapping));
69         }
70         if (!page)
71                 return;
72
73         spin_lock(&fi->rdc.lock);
74         /* Raced with another readdir */
75         if (fi->rdc.version != version || fi->rdc.size != size ||
76             WARN_ON(fi->rdc.pos != pos))
77                 goto unlock;
78
79         addr = kmap_atomic(page);
80         if (!offset)
81                 clear_page(addr);
82         memcpy(addr + offset, dirent, reclen);
83         kunmap_atomic(addr);
84         fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
85         fi->rdc.pos = dirent->off;
86 unlock:
87         spin_unlock(&fi->rdc.lock);
88         unlock_page(page);
89         put_page(page);
90 }
91
92 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
93 {
94         struct fuse_inode *fi = get_fuse_inode(file_inode(file));
95         loff_t end;
96
97         spin_lock(&fi->rdc.lock);
98         /* does cache end position match current position? */
99         if (fi->rdc.pos != pos) {
100                 spin_unlock(&fi->rdc.lock);
101                 return;
102         }
103
104         fi->rdc.cached = true;
105         end = ALIGN(fi->rdc.size, PAGE_SIZE);
106         spin_unlock(&fi->rdc.lock);
107
108         /* truncate unused tail of cache */
109         truncate_inode_pages(file->f_mapping, end);
110 }
111
112 static bool fuse_emit(struct file *file, struct dir_context *ctx,
113                       struct fuse_dirent *dirent)
114 {
115         struct fuse_file *ff = file->private_data;
116
117         if (ff->open_flags & FOPEN_CACHE_DIR)
118                 fuse_add_dirent_to_cache(file, dirent, ctx->pos);
119
120         return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
121                         dirent->type);
122 }
123
124 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
125                          struct dir_context *ctx)
126 {
127         while (nbytes >= FUSE_NAME_OFFSET) {
128                 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
129                 size_t reclen = FUSE_DIRENT_SIZE(dirent);
130                 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
131                         return -EIO;
132                 if (reclen > nbytes)
133                         break;
134                 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
135                         return -EIO;
136
137                 if (!fuse_emit(file, ctx, dirent))
138                         break;
139
140                 buf += reclen;
141                 nbytes -= reclen;
142                 ctx->pos = dirent->off;
143         }
144
145         return 0;
146 }
147
148 static int fuse_direntplus_link(struct file *file,
149                                 struct fuse_direntplus *direntplus,
150                                 u64 attr_version)
151 {
152         struct fuse_entry_out *o = &direntplus->entry_out;
153         struct fuse_dirent *dirent = &direntplus->dirent;
154         struct dentry *parent = file->f_path.dentry;
155         struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
156         struct dentry *dentry;
157         struct dentry *alias;
158         struct inode *dir = d_inode(parent);
159         struct fuse_conn *fc;
160         struct inode *inode;
161         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
162
163         if (!o->nodeid) {
164                 /*
165                  * Unlike in the case of fuse_lookup, zero nodeid does not mean
166                  * ENOENT. Instead, it only means the userspace filesystem did
167                  * not want to return attributes/handle for this entry.
168                  *
169                  * So do nothing.
170                  */
171                 return 0;
172         }
173
174         if (name.name[0] == '.') {
175                 /*
176                  * We could potentially refresh the attributes of the directory
177                  * and its parent?
178                  */
179                 if (name.len == 1)
180                         return 0;
181                 if (name.name[1] == '.' && name.len == 2)
182                         return 0;
183         }
184
185         if (invalid_nodeid(o->nodeid))
186                 return -EIO;
187         if (fuse_invalid_attr(&o->attr))
188                 return -EIO;
189
190         fc = get_fuse_conn(dir);
191
192         name.hash = full_name_hash(parent, name.name, name.len);
193         dentry = d_lookup(parent, &name);
194         if (!dentry) {
195 retry:
196                 dentry = d_alloc_parallel(parent, &name, &wq);
197                 if (IS_ERR(dentry))
198                         return PTR_ERR(dentry);
199         }
200         if (!d_in_lookup(dentry)) {
201                 struct fuse_inode *fi;
202                 inode = d_inode(dentry);
203                 if (!inode ||
204                     get_node_id(inode) != o->nodeid ||
205                     ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
206                         d_invalidate(dentry);
207                         dput(dentry);
208                         goto retry;
209                 }
210                 if (fuse_is_bad(inode)) {
211                         dput(dentry);
212                         return -EIO;
213                 }
214
215                 fi = get_fuse_inode(inode);
216                 spin_lock(&fi->lock);
217                 fi->nlookup++;
218                 spin_unlock(&fi->lock);
219
220                 forget_all_cached_acls(inode);
221                 fuse_change_attributes(inode, &o->attr,
222                                        entry_attr_timeout(o),
223                                        attr_version);
224                 /*
225                  * The other branch comes via fuse_iget()
226                  * which bumps nlookup inside
227                  */
228         } else {
229                 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
230                                   &o->attr, entry_attr_timeout(o),
231                                   attr_version);
232                 if (!inode)
233                         inode = ERR_PTR(-ENOMEM);
234
235                 alias = d_splice_alias(inode, dentry);
236                 d_lookup_done(dentry);
237                 if (alias) {
238                         dput(dentry);
239                         dentry = alias;
240                 }
241                 if (IS_ERR(dentry)) {
242                         if (!IS_ERR(inode)) {
243                                 struct fuse_inode *fi = get_fuse_inode(inode);
244
245                                 spin_lock(&fi->lock);
246                                 fi->nlookup--;
247                                 spin_unlock(&fi->lock);
248                         }
249                         return PTR_ERR(dentry);
250                 }
251         }
252         if (fc->readdirplus_auto)
253                 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
254         fuse_change_entry_timeout(dentry, o);
255
256         dput(dentry);
257         return 0;
258 }
259
260 static void fuse_force_forget(struct file *file, u64 nodeid)
261 {
262         struct inode *inode = file_inode(file);
263         struct fuse_conn *fc = get_fuse_conn(inode);
264         struct fuse_forget_in inarg;
265         FUSE_ARGS(args);
266
267         memset(&inarg, 0, sizeof(inarg));
268         inarg.nlookup = 1;
269         args.opcode = FUSE_FORGET;
270         args.nodeid = nodeid;
271         args.in_numargs = 1;
272         args.in_args[0].size = sizeof(inarg);
273         args.in_args[0].value = &inarg;
274         args.force = true;
275         args.noreply = true;
276
277         fuse_simple_request(fc, &args);
278         /* ignore errors */
279 }
280
281 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
282                              struct dir_context *ctx, u64 attr_version)
283 {
284         struct fuse_direntplus *direntplus;
285         struct fuse_dirent *dirent;
286         size_t reclen;
287         int over = 0;
288         int ret;
289
290         while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
291                 direntplus = (struct fuse_direntplus *) buf;
292                 dirent = &direntplus->dirent;
293                 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
294
295                 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
296                         return -EIO;
297                 if (reclen > nbytes)
298                         break;
299                 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
300                         return -EIO;
301
302                 if (!over) {
303                         /* We fill entries into dstbuf only as much as
304                            it can hold. But we still continue iterating
305                            over remaining entries to link them. If not,
306                            we need to send a FORGET for each of those
307                            which we did not link.
308                         */
309                         over = !fuse_emit(file, ctx, dirent);
310                         if (!over)
311                                 ctx->pos = dirent->off;
312                 }
313
314                 buf += reclen;
315                 nbytes -= reclen;
316
317                 ret = fuse_direntplus_link(file, direntplus, attr_version);
318                 if (ret)
319                         fuse_force_forget(file, direntplus->entry_out.nodeid);
320         }
321
322         return 0;
323 }
324
325 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
326 {
327         int plus;
328         ssize_t res;
329         struct page *page;
330         struct inode *inode = file_inode(file);
331         struct fuse_conn *fc = get_fuse_conn(inode);
332         struct fuse_io_args ia = {};
333         struct fuse_args_pages *ap = &ia.ap;
334         struct fuse_page_desc desc = { .length = PAGE_SIZE };
335         u64 attr_version = 0;
336         bool locked;
337
338         page = alloc_page(GFP_KERNEL);
339         if (!page)
340                 return -ENOMEM;
341
342         plus = fuse_use_readdirplus(inode, ctx);
343         ap->args.out_pages = 1;
344         ap->num_pages = 1;
345         ap->pages = &page;
346         ap->descs = &desc;
347         if (plus) {
348                 attr_version = fuse_get_attr_version(fc);
349                 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
350                                     FUSE_READDIRPLUS);
351         } else {
352                 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
353                                     FUSE_READDIR);
354         }
355         locked = fuse_lock_inode(inode);
356         res = fuse_simple_request(fc, &ap->args);
357         fuse_unlock_inode(inode, locked);
358         if (res >= 0) {
359                 if (!res) {
360                         struct fuse_file *ff = file->private_data;
361
362                         if (ff->open_flags & FOPEN_CACHE_DIR)
363                                 fuse_readdir_cache_end(file, ctx->pos);
364                 } else if (plus) {
365                         res = parse_dirplusfile(page_address(page), res,
366                                                 file, ctx, attr_version);
367                 } else {
368                         res = parse_dirfile(page_address(page), res, file,
369                                             ctx);
370                 }
371         }
372
373         __free_page(page);
374         fuse_invalidate_atime(inode);
375         return res;
376 }
377
378 enum fuse_parse_result {
379         FOUND_ERR = -1,
380         FOUND_NONE = 0,
381         FOUND_SOME,
382         FOUND_ALL,
383 };
384
385 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
386                                                void *addr, unsigned int size,
387                                                struct dir_context *ctx)
388 {
389         unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
390         enum fuse_parse_result res = FOUND_NONE;
391
392         WARN_ON(offset >= size);
393
394         for (;;) {
395                 struct fuse_dirent *dirent = addr + offset;
396                 unsigned int nbytes = size - offset;
397                 size_t reclen;
398
399                 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
400                         break;
401
402                 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
403
404                 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
405                         return FOUND_ERR;
406                 if (WARN_ON(reclen > nbytes))
407                         return FOUND_ERR;
408                 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
409                         return FOUND_ERR;
410
411                 if (ff->readdir.pos == ctx->pos) {
412                         res = FOUND_SOME;
413                         if (!dir_emit(ctx, dirent->name, dirent->namelen,
414                                       dirent->ino, dirent->type))
415                                 return FOUND_ALL;
416                         ctx->pos = dirent->off;
417                 }
418                 ff->readdir.pos = dirent->off;
419                 ff->readdir.cache_off += reclen;
420
421                 offset += reclen;
422         }
423
424         return res;
425 }
426
427 static void fuse_rdc_reset(struct inode *inode)
428 {
429         struct fuse_inode *fi = get_fuse_inode(inode);
430
431         fi->rdc.cached = false;
432         fi->rdc.version++;
433         fi->rdc.size = 0;
434         fi->rdc.pos = 0;
435 }
436
437 #define UNCACHED 1
438
439 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
440 {
441         struct fuse_file *ff = file->private_data;
442         struct inode *inode = file_inode(file);
443         struct fuse_conn *fc = get_fuse_conn(inode);
444         struct fuse_inode *fi = get_fuse_inode(inode);
445         enum fuse_parse_result res;
446         pgoff_t index;
447         unsigned int size;
448         struct page *page;
449         void *addr;
450
451         /* Seeked?  If so, reset the cache stream */
452         if (ff->readdir.pos != ctx->pos) {
453                 ff->readdir.pos = 0;
454                 ff->readdir.cache_off = 0;
455         }
456
457         /*
458          * We're just about to start reading into the cache or reading the
459          * cache; both cases require an up-to-date mtime value.
460          */
461         if (!ctx->pos && fc->auto_inval_data) {
462                 int err = fuse_update_attributes(inode, file);
463
464                 if (err)
465                         return err;
466         }
467
468 retry:
469         spin_lock(&fi->rdc.lock);
470 retry_locked:
471         if (!fi->rdc.cached) {
472                 /* Starting cache? Set cache mtime. */
473                 if (!ctx->pos && !fi->rdc.size) {
474                         fi->rdc.mtime = inode->i_mtime;
475                         fi->rdc.iversion = inode_query_iversion(inode);
476                 }
477                 spin_unlock(&fi->rdc.lock);
478                 return UNCACHED;
479         }
480         /*
481          * When at the beginning of the directory (i.e. just after opendir(3) or
482          * rewinddir(3)), then need to check whether directory contents have
483          * changed, and reset the cache if so.
484          */
485         if (!ctx->pos) {
486                 if (inode_peek_iversion(inode) != fi->rdc.iversion ||
487                     !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
488                         fuse_rdc_reset(inode);
489                         goto retry_locked;
490                 }
491         }
492
493         /*
494          * If cache version changed since the last getdents() call, then reset
495          * the cache stream.
496          */
497         if (ff->readdir.version != fi->rdc.version) {
498                 ff->readdir.pos = 0;
499                 ff->readdir.cache_off = 0;
500         }
501         /*
502          * If at the beginning of the cache, than reset version to
503          * current.
504          */
505         if (ff->readdir.pos == 0)
506                 ff->readdir.version = fi->rdc.version;
507
508         WARN_ON(fi->rdc.size < ff->readdir.cache_off);
509
510         index = ff->readdir.cache_off >> PAGE_SHIFT;
511
512         if (index == (fi->rdc.size >> PAGE_SHIFT))
513                 size = fi->rdc.size & ~PAGE_MASK;
514         else
515                 size = PAGE_SIZE;
516         spin_unlock(&fi->rdc.lock);
517
518         /* EOF? */
519         if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
520                 return 0;
521
522         page = find_get_page_flags(file->f_mapping, index,
523                                    FGP_ACCESSED | FGP_LOCK);
524         spin_lock(&fi->rdc.lock);
525         if (!page) {
526                 /*
527                  * Uh-oh: page gone missing, cache is useless
528                  */
529                 if (fi->rdc.version == ff->readdir.version)
530                         fuse_rdc_reset(inode);
531                 goto retry_locked;
532         }
533
534         /* Make sure it's still the same version after getting the page. */
535         if (ff->readdir.version != fi->rdc.version) {
536                 spin_unlock(&fi->rdc.lock);
537                 unlock_page(page);
538                 put_page(page);
539                 goto retry;
540         }
541         spin_unlock(&fi->rdc.lock);
542
543         /*
544          * Contents of the page are now protected against changing by holding
545          * the page lock.
546          */
547         addr = kmap(page);
548         res = fuse_parse_cache(ff, addr, size, ctx);
549         kunmap(page);
550         unlock_page(page);
551         put_page(page);
552
553         if (res == FOUND_ERR)
554                 return -EIO;
555
556         if (res == FOUND_ALL)
557                 return 0;
558
559         if (size == PAGE_SIZE) {
560                 /* We hit end of page: skip to next page. */
561                 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
562                 goto retry;
563         }
564
565         /*
566          * End of cache reached.  If found position, then we are done, otherwise
567          * need to fall back to uncached, since the position we were looking for
568          * wasn't in the cache.
569          */
570         return res == FOUND_SOME ? 0 : UNCACHED;
571 }
572
573 int fuse_readdir(struct file *file, struct dir_context *ctx)
574 {
575         struct fuse_file *ff = file->private_data;
576         struct inode *inode = file_inode(file);
577         int err;
578
579         if (fuse_is_bad(inode))
580                 return -EIO;
581
582         mutex_lock(&ff->readdir.lock);
583
584         err = UNCACHED;
585         if (ff->open_flags & FOPEN_CACHE_DIR)
586                 err = fuse_readdir_cached(file, ctx);
587         if (err == UNCACHED)
588                 err = fuse_readdir_uncached(file, ctx);
589
590         mutex_unlock(&ff->readdir.lock);
591
592         return err;
593 }