GNU Linux-libre 6.1.86-gnu
[releases.git] / fs / fuse / readdir.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8
9
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18         struct fuse_conn *fc = get_fuse_conn(dir);
19         struct fuse_inode *fi = get_fuse_inode(dir);
20
21         if (!fc->do_readdirplus)
22                 return false;
23         if (!fc->readdirplus_auto)
24                 return true;
25         if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26                 return true;
27         if (ctx->pos == 0)
28                 return true;
29         return false;
30 }
31
32 static void fuse_add_dirent_to_cache(struct file *file,
33                                      struct fuse_dirent *dirent, loff_t pos)
34 {
35         struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36         size_t reclen = FUSE_DIRENT_SIZE(dirent);
37         pgoff_t index;
38         struct page *page;
39         loff_t size;
40         u64 version;
41         unsigned int offset;
42         void *addr;
43
44         spin_lock(&fi->rdc.lock);
45         /*
46          * Is cache already completed?  Or this entry does not go at the end of
47          * cache?
48          */
49         if (fi->rdc.cached || pos != fi->rdc.pos) {
50                 spin_unlock(&fi->rdc.lock);
51                 return;
52         }
53         version = fi->rdc.version;
54         size = fi->rdc.size;
55         offset = size & ~PAGE_MASK;
56         index = size >> PAGE_SHIFT;
57         /* Dirent doesn't fit in current page?  Jump to next page. */
58         if (offset + reclen > PAGE_SIZE) {
59                 index++;
60                 offset = 0;
61         }
62         spin_unlock(&fi->rdc.lock);
63
64         if (offset) {
65                 page = find_lock_page(file->f_mapping, index);
66         } else {
67                 page = find_or_create_page(file->f_mapping, index,
68                                            mapping_gfp_mask(file->f_mapping));
69         }
70         if (!page)
71                 return;
72
73         spin_lock(&fi->rdc.lock);
74         /* Raced with another readdir */
75         if (fi->rdc.version != version || fi->rdc.size != size ||
76             WARN_ON(fi->rdc.pos != pos))
77                 goto unlock;
78
79         addr = kmap_local_page(page);
80         if (!offset) {
81                 clear_page(addr);
82                 SetPageUptodate(page);
83         }
84         memcpy(addr + offset, dirent, reclen);
85         kunmap_local(addr);
86         fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
87         fi->rdc.pos = dirent->off;
88 unlock:
89         spin_unlock(&fi->rdc.lock);
90         unlock_page(page);
91         put_page(page);
92 }
93
94 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
95 {
96         struct fuse_inode *fi = get_fuse_inode(file_inode(file));
97         loff_t end;
98
99         spin_lock(&fi->rdc.lock);
100         /* does cache end position match current position? */
101         if (fi->rdc.pos != pos) {
102                 spin_unlock(&fi->rdc.lock);
103                 return;
104         }
105
106         fi->rdc.cached = true;
107         end = ALIGN(fi->rdc.size, PAGE_SIZE);
108         spin_unlock(&fi->rdc.lock);
109
110         /* truncate unused tail of cache */
111         truncate_inode_pages(file->f_mapping, end);
112 }
113
114 static bool fuse_emit(struct file *file, struct dir_context *ctx,
115                       struct fuse_dirent *dirent)
116 {
117         struct fuse_file *ff = file->private_data;
118
119         if (ff->open_flags & FOPEN_CACHE_DIR)
120                 fuse_add_dirent_to_cache(file, dirent, ctx->pos);
121
122         return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
123                         dirent->type);
124 }
125
126 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
127                          struct dir_context *ctx)
128 {
129         while (nbytes >= FUSE_NAME_OFFSET) {
130                 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
131                 size_t reclen = FUSE_DIRENT_SIZE(dirent);
132                 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
133                         return -EIO;
134                 if (reclen > nbytes)
135                         break;
136                 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
137                         return -EIO;
138
139                 if (!fuse_emit(file, ctx, dirent))
140                         break;
141
142                 buf += reclen;
143                 nbytes -= reclen;
144                 ctx->pos = dirent->off;
145         }
146
147         return 0;
148 }
149
150 static int fuse_direntplus_link(struct file *file,
151                                 struct fuse_direntplus *direntplus,
152                                 u64 attr_version)
153 {
154         struct fuse_entry_out *o = &direntplus->entry_out;
155         struct fuse_dirent *dirent = &direntplus->dirent;
156         struct dentry *parent = file->f_path.dentry;
157         struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
158         struct dentry *dentry;
159         struct dentry *alias;
160         struct inode *dir = d_inode(parent);
161         struct fuse_conn *fc;
162         struct inode *inode;
163         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
164
165         if (!o->nodeid) {
166                 /*
167                  * Unlike in the case of fuse_lookup, zero nodeid does not mean
168                  * ENOENT. Instead, it only means the userspace filesystem did
169                  * not want to return attributes/handle for this entry.
170                  *
171                  * So do nothing.
172                  */
173                 return 0;
174         }
175
176         if (name.name[0] == '.') {
177                 /*
178                  * We could potentially refresh the attributes of the directory
179                  * and its parent?
180                  */
181                 if (name.len == 1)
182                         return 0;
183                 if (name.name[1] == '.' && name.len == 2)
184                         return 0;
185         }
186
187         if (invalid_nodeid(o->nodeid))
188                 return -EIO;
189         if (fuse_invalid_attr(&o->attr))
190                 return -EIO;
191
192         fc = get_fuse_conn(dir);
193
194         name.hash = full_name_hash(parent, name.name, name.len);
195         dentry = d_lookup(parent, &name);
196         if (!dentry) {
197 retry:
198                 dentry = d_alloc_parallel(parent, &name, &wq);
199                 if (IS_ERR(dentry))
200                         return PTR_ERR(dentry);
201         }
202         if (!d_in_lookup(dentry)) {
203                 struct fuse_inode *fi;
204                 inode = d_inode(dentry);
205                 if (inode && get_node_id(inode) != o->nodeid)
206                         inode = NULL;
207                 if (!inode ||
208                     fuse_stale_inode(inode, o->generation, &o->attr)) {
209                         if (inode)
210                                 fuse_make_bad(inode);
211                         d_invalidate(dentry);
212                         dput(dentry);
213                         goto retry;
214                 }
215                 if (fuse_is_bad(inode)) {
216                         dput(dentry);
217                         return -EIO;
218                 }
219
220                 fi = get_fuse_inode(inode);
221                 spin_lock(&fi->lock);
222                 fi->nlookup++;
223                 spin_unlock(&fi->lock);
224
225                 forget_all_cached_acls(inode);
226                 fuse_change_attributes(inode, &o->attr,
227                                        entry_attr_timeout(o),
228                                        attr_version);
229                 /*
230                  * The other branch comes via fuse_iget()
231                  * which bumps nlookup inside
232                  */
233         } else {
234                 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
235                                   &o->attr, entry_attr_timeout(o),
236                                   attr_version);
237                 if (!inode)
238                         inode = ERR_PTR(-ENOMEM);
239
240                 alias = d_splice_alias(inode, dentry);
241                 d_lookup_done(dentry);
242                 if (alias) {
243                         dput(dentry);
244                         dentry = alias;
245                 }
246                 if (IS_ERR(dentry)) {
247                         if (!IS_ERR(inode)) {
248                                 struct fuse_inode *fi = get_fuse_inode(inode);
249
250                                 spin_lock(&fi->lock);
251                                 fi->nlookup--;
252                                 spin_unlock(&fi->lock);
253                         }
254                         return PTR_ERR(dentry);
255                 }
256         }
257         if (fc->readdirplus_auto)
258                 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
259         fuse_change_entry_timeout(dentry, o);
260
261         dput(dentry);
262         return 0;
263 }
264
265 static void fuse_force_forget(struct file *file, u64 nodeid)
266 {
267         struct inode *inode = file_inode(file);
268         struct fuse_mount *fm = get_fuse_mount(inode);
269         struct fuse_forget_in inarg;
270         FUSE_ARGS(args);
271
272         memset(&inarg, 0, sizeof(inarg));
273         inarg.nlookup = 1;
274         args.opcode = FUSE_FORGET;
275         args.nodeid = nodeid;
276         args.in_numargs = 1;
277         args.in_args[0].size = sizeof(inarg);
278         args.in_args[0].value = &inarg;
279         args.force = true;
280         args.noreply = true;
281
282         fuse_simple_request(fm, &args);
283         /* ignore errors */
284 }
285
286 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
287                              struct dir_context *ctx, u64 attr_version)
288 {
289         struct fuse_direntplus *direntplus;
290         struct fuse_dirent *dirent;
291         size_t reclen;
292         int over = 0;
293         int ret;
294
295         while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
296                 direntplus = (struct fuse_direntplus *) buf;
297                 dirent = &direntplus->dirent;
298                 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
299
300                 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
301                         return -EIO;
302                 if (reclen > nbytes)
303                         break;
304                 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
305                         return -EIO;
306
307                 if (!over) {
308                         /* We fill entries into dstbuf only as much as
309                            it can hold. But we still continue iterating
310                            over remaining entries to link them. If not,
311                            we need to send a FORGET for each of those
312                            which we did not link.
313                         */
314                         over = !fuse_emit(file, ctx, dirent);
315                         if (!over)
316                                 ctx->pos = dirent->off;
317                 }
318
319                 buf += reclen;
320                 nbytes -= reclen;
321
322                 ret = fuse_direntplus_link(file, direntplus, attr_version);
323                 if (ret)
324                         fuse_force_forget(file, direntplus->entry_out.nodeid);
325         }
326
327         return 0;
328 }
329
330 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
331 {
332         int plus;
333         ssize_t res;
334         struct page *page;
335         struct inode *inode = file_inode(file);
336         struct fuse_mount *fm = get_fuse_mount(inode);
337         struct fuse_io_args ia = {};
338         struct fuse_args_pages *ap = &ia.ap;
339         struct fuse_page_desc desc = { .length = PAGE_SIZE };
340         u64 attr_version = 0;
341         bool locked;
342
343         page = alloc_page(GFP_KERNEL);
344         if (!page)
345                 return -ENOMEM;
346
347         plus = fuse_use_readdirplus(inode, ctx);
348         ap->args.out_pages = true;
349         ap->num_pages = 1;
350         ap->pages = &page;
351         ap->descs = &desc;
352         if (plus) {
353                 attr_version = fuse_get_attr_version(fm->fc);
354                 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
355                                     FUSE_READDIRPLUS);
356         } else {
357                 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
358                                     FUSE_READDIR);
359         }
360         locked = fuse_lock_inode(inode);
361         res = fuse_simple_request(fm, &ap->args);
362         fuse_unlock_inode(inode, locked);
363         if (res >= 0) {
364                 if (!res) {
365                         struct fuse_file *ff = file->private_data;
366
367                         if (ff->open_flags & FOPEN_CACHE_DIR)
368                                 fuse_readdir_cache_end(file, ctx->pos);
369                 } else if (plus) {
370                         res = parse_dirplusfile(page_address(page), res,
371                                                 file, ctx, attr_version);
372                 } else {
373                         res = parse_dirfile(page_address(page), res, file,
374                                             ctx);
375                 }
376         }
377
378         __free_page(page);
379         fuse_invalidate_atime(inode);
380         return res;
381 }
382
383 enum fuse_parse_result {
384         FOUND_ERR = -1,
385         FOUND_NONE = 0,
386         FOUND_SOME,
387         FOUND_ALL,
388 };
389
390 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
391                                                void *addr, unsigned int size,
392                                                struct dir_context *ctx)
393 {
394         unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
395         enum fuse_parse_result res = FOUND_NONE;
396
397         WARN_ON(offset >= size);
398
399         for (;;) {
400                 struct fuse_dirent *dirent = addr + offset;
401                 unsigned int nbytes = size - offset;
402                 size_t reclen;
403
404                 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
405                         break;
406
407                 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
408
409                 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
410                         return FOUND_ERR;
411                 if (WARN_ON(reclen > nbytes))
412                         return FOUND_ERR;
413                 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
414                         return FOUND_ERR;
415
416                 if (ff->readdir.pos == ctx->pos) {
417                         res = FOUND_SOME;
418                         if (!dir_emit(ctx, dirent->name, dirent->namelen,
419                                       dirent->ino, dirent->type))
420                                 return FOUND_ALL;
421                         ctx->pos = dirent->off;
422                 }
423                 ff->readdir.pos = dirent->off;
424                 ff->readdir.cache_off += reclen;
425
426                 offset += reclen;
427         }
428
429         return res;
430 }
431
432 static void fuse_rdc_reset(struct inode *inode)
433 {
434         struct fuse_inode *fi = get_fuse_inode(inode);
435
436         fi->rdc.cached = false;
437         fi->rdc.version++;
438         fi->rdc.size = 0;
439         fi->rdc.pos = 0;
440 }
441
442 #define UNCACHED 1
443
444 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
445 {
446         struct fuse_file *ff = file->private_data;
447         struct inode *inode = file_inode(file);
448         struct fuse_conn *fc = get_fuse_conn(inode);
449         struct fuse_inode *fi = get_fuse_inode(inode);
450         enum fuse_parse_result res;
451         pgoff_t index;
452         unsigned int size;
453         struct page *page;
454         void *addr;
455
456         /* Seeked?  If so, reset the cache stream */
457         if (ff->readdir.pos != ctx->pos) {
458                 ff->readdir.pos = 0;
459                 ff->readdir.cache_off = 0;
460         }
461
462         /*
463          * We're just about to start reading into the cache or reading the
464          * cache; both cases require an up-to-date mtime value.
465          */
466         if (!ctx->pos && fc->auto_inval_data) {
467                 int err = fuse_update_attributes(inode, file, STATX_MTIME);
468
469                 if (err)
470                         return err;
471         }
472
473 retry:
474         spin_lock(&fi->rdc.lock);
475 retry_locked:
476         if (!fi->rdc.cached) {
477                 /* Starting cache? Set cache mtime. */
478                 if (!ctx->pos && !fi->rdc.size) {
479                         fi->rdc.mtime = inode->i_mtime;
480                         fi->rdc.iversion = inode_query_iversion(inode);
481                 }
482                 spin_unlock(&fi->rdc.lock);
483                 return UNCACHED;
484         }
485         /*
486          * When at the beginning of the directory (i.e. just after opendir(3) or
487          * rewinddir(3)), then need to check whether directory contents have
488          * changed, and reset the cache if so.
489          */
490         if (!ctx->pos) {
491                 if (inode_peek_iversion(inode) != fi->rdc.iversion ||
492                     !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
493                         fuse_rdc_reset(inode);
494                         goto retry_locked;
495                 }
496         }
497
498         /*
499          * If cache version changed since the last getdents() call, then reset
500          * the cache stream.
501          */
502         if (ff->readdir.version != fi->rdc.version) {
503                 ff->readdir.pos = 0;
504                 ff->readdir.cache_off = 0;
505         }
506         /*
507          * If at the beginning of the cache, than reset version to
508          * current.
509          */
510         if (ff->readdir.pos == 0)
511                 ff->readdir.version = fi->rdc.version;
512
513         WARN_ON(fi->rdc.size < ff->readdir.cache_off);
514
515         index = ff->readdir.cache_off >> PAGE_SHIFT;
516
517         if (index == (fi->rdc.size >> PAGE_SHIFT))
518                 size = fi->rdc.size & ~PAGE_MASK;
519         else
520                 size = PAGE_SIZE;
521         spin_unlock(&fi->rdc.lock);
522
523         /* EOF? */
524         if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
525                 return 0;
526
527         page = find_get_page_flags(file->f_mapping, index,
528                                    FGP_ACCESSED | FGP_LOCK);
529         /* Page gone missing, then re-added to cache, but not initialized? */
530         if (page && !PageUptodate(page)) {
531                 unlock_page(page);
532                 put_page(page);
533                 page = NULL;
534         }
535         spin_lock(&fi->rdc.lock);
536         if (!page) {
537                 /*
538                  * Uh-oh: page gone missing, cache is useless
539                  */
540                 if (fi->rdc.version == ff->readdir.version)
541                         fuse_rdc_reset(inode);
542                 goto retry_locked;
543         }
544
545         /* Make sure it's still the same version after getting the page. */
546         if (ff->readdir.version != fi->rdc.version) {
547                 spin_unlock(&fi->rdc.lock);
548                 unlock_page(page);
549                 put_page(page);
550                 goto retry;
551         }
552         spin_unlock(&fi->rdc.lock);
553
554         /*
555          * Contents of the page are now protected against changing by holding
556          * the page lock.
557          */
558         addr = kmap(page);
559         res = fuse_parse_cache(ff, addr, size, ctx);
560         kunmap(page);
561         unlock_page(page);
562         put_page(page);
563
564         if (res == FOUND_ERR)
565                 return -EIO;
566
567         if (res == FOUND_ALL)
568                 return 0;
569
570         if (size == PAGE_SIZE) {
571                 /* We hit end of page: skip to next page. */
572                 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
573                 goto retry;
574         }
575
576         /*
577          * End of cache reached.  If found position, then we are done, otherwise
578          * need to fall back to uncached, since the position we were looking for
579          * wasn't in the cache.
580          */
581         return res == FOUND_SOME ? 0 : UNCACHED;
582 }
583
584 int fuse_readdir(struct file *file, struct dir_context *ctx)
585 {
586         struct fuse_file *ff = file->private_data;
587         struct inode *inode = file_inode(file);
588         int err;
589
590         if (fuse_is_bad(inode))
591                 return -EIO;
592
593         mutex_lock(&ff->readdir.lock);
594
595         err = UNCACHED;
596         if (ff->open_flags & FOPEN_CACHE_DIR)
597                 err = fuse_readdir_cached(file, ctx);
598         if (err == UNCACHED)
599                 err = fuse_readdir_uncached(file, ctx);
600
601         mutex_unlock(&ff->readdir.lock);
602
603         return err;
604 }