GNU Linux-libre 4.9.333-gnu1
[releases.git] / fs / overlayfs / readdir.c
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9
10 #include <linux/fs.h>
11 #include <linux/slab.h>
12 #include <linux/namei.h>
13 #include <linux/file.h>
14 #include <linux/xattr.h>
15 #include <linux/rbtree.h>
16 #include <linux/security.h>
17 #include <linux/cred.h>
18 #include "overlayfs.h"
19
20 struct ovl_cache_entry {
21         unsigned int len;
22         unsigned int type;
23         u64 ino;
24         struct list_head l_node;
25         struct rb_node node;
26         struct ovl_cache_entry *next_maybe_whiteout;
27         bool is_whiteout;
28         char name[];
29 };
30
31 struct ovl_dir_cache {
32         long refcount;
33         u64 version;
34         struct list_head entries;
35 };
36
37 struct ovl_readdir_data {
38         struct dir_context ctx;
39         struct dentry *dentry;
40         bool is_lowest;
41         struct rb_root root;
42         struct list_head *list;
43         struct list_head middle;
44         struct ovl_cache_entry *first_maybe_whiteout;
45         int count;
46         int err;
47         bool d_type_supported;
48 };
49
50 struct ovl_dir_file {
51         bool is_real;
52         bool is_upper;
53         struct ovl_dir_cache *cache;
54         struct list_head *cursor;
55         struct file *realfile;
56         struct file *upperfile;
57 };
58
59 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
60 {
61         return container_of(n, struct ovl_cache_entry, node);
62 }
63
64 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
65                                                     const char *name, int len)
66 {
67         struct rb_node *node = root->rb_node;
68         int cmp;
69
70         while (node) {
71                 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
72
73                 cmp = strncmp(name, p->name, len);
74                 if (cmp > 0)
75                         node = p->node.rb_right;
76                 else if (cmp < 0 || len < p->len)
77                         node = p->node.rb_left;
78                 else
79                         return p;
80         }
81
82         return NULL;
83 }
84
85 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
86                                                    const char *name, int len,
87                                                    u64 ino, unsigned int d_type)
88 {
89         struct ovl_cache_entry *p;
90         size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
91
92         p = kmalloc(size, GFP_KERNEL);
93         if (!p)
94                 return NULL;
95
96         memcpy(p->name, name, len);
97         p->name[len] = '\0';
98         p->len = len;
99         p->type = d_type;
100         p->ino = ino;
101         p->is_whiteout = false;
102
103         if (d_type == DT_CHR) {
104                 p->next_maybe_whiteout = rdd->first_maybe_whiteout;
105                 rdd->first_maybe_whiteout = p;
106         }
107         return p;
108 }
109
110 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
111                                   const char *name, int len, u64 ino,
112                                   unsigned int d_type)
113 {
114         struct rb_node **newp = &rdd->root.rb_node;
115         struct rb_node *parent = NULL;
116         struct ovl_cache_entry *p;
117
118         while (*newp) {
119                 int cmp;
120                 struct ovl_cache_entry *tmp;
121
122                 parent = *newp;
123                 tmp = ovl_cache_entry_from_node(*newp);
124                 cmp = strncmp(name, tmp->name, len);
125                 if (cmp > 0)
126                         newp = &tmp->node.rb_right;
127                 else if (cmp < 0 || len < tmp->len)
128                         newp = &tmp->node.rb_left;
129                 else
130                         return 0;
131         }
132
133         p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
134         if (p == NULL)
135                 return -ENOMEM;
136
137         list_add_tail(&p->l_node, rdd->list);
138         rb_link_node(&p->node, parent, newp);
139         rb_insert_color(&p->node, &rdd->root);
140
141         return 0;
142 }
143
144 static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
145                            const char *name, int namelen,
146                            loff_t offset, u64 ino, unsigned int d_type)
147 {
148         struct ovl_cache_entry *p;
149
150         p = ovl_cache_entry_find(&rdd->root, name, namelen);
151         if (p) {
152                 list_move_tail(&p->l_node, &rdd->middle);
153         } else {
154                 p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
155                 if (p == NULL)
156                         rdd->err = -ENOMEM;
157                 else
158                         list_add_tail(&p->l_node, &rdd->middle);
159         }
160
161         return rdd->err;
162 }
163
164 void ovl_cache_free(struct list_head *list)
165 {
166         struct ovl_cache_entry *p;
167         struct ovl_cache_entry *n;
168
169         list_for_each_entry_safe(p, n, list, l_node)
170                 kfree(p);
171
172         INIT_LIST_HEAD(list);
173 }
174
175 static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
176 {
177         struct ovl_dir_cache *cache = od->cache;
178
179         WARN_ON(cache->refcount <= 0);
180         cache->refcount--;
181         if (!cache->refcount) {
182                 if (ovl_dir_cache(dentry) == cache)
183                         ovl_set_dir_cache(dentry, NULL);
184
185                 ovl_cache_free(&cache->entries);
186                 kfree(cache);
187         }
188 }
189
190 static int ovl_fill_merge(struct dir_context *ctx, const char *name,
191                           int namelen, loff_t offset, u64 ino,
192                           unsigned int d_type)
193 {
194         struct ovl_readdir_data *rdd =
195                 container_of(ctx, struct ovl_readdir_data, ctx);
196
197         rdd->count++;
198         if (!rdd->is_lowest)
199                 return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
200         else
201                 return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
202 }
203
204 static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
205 {
206         int err;
207         struct ovl_cache_entry *p;
208         struct dentry *dentry;
209         const struct cred *old_cred;
210
211         old_cred = ovl_override_creds(rdd->dentry->d_sb);
212
213         err = down_write_killable(&dir->d_inode->i_rwsem);
214         if (!err) {
215                 while (rdd->first_maybe_whiteout) {
216                         p = rdd->first_maybe_whiteout;
217                         rdd->first_maybe_whiteout = p->next_maybe_whiteout;
218                         dentry = lookup_one_len(p->name, dir, p->len);
219                         if (!IS_ERR(dentry)) {
220                                 p->is_whiteout = ovl_is_whiteout(dentry);
221                                 dput(dentry);
222                         }
223                 }
224                 inode_unlock(dir->d_inode);
225         }
226         revert_creds(old_cred);
227
228         return err;
229 }
230
231 static inline int ovl_dir_read(struct path *realpath,
232                                struct ovl_readdir_data *rdd)
233 {
234         struct file *realfile;
235         int err;
236
237         realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
238         if (IS_ERR(realfile))
239                 return PTR_ERR(realfile);
240
241         rdd->first_maybe_whiteout = NULL;
242         rdd->ctx.pos = 0;
243         do {
244                 rdd->count = 0;
245                 rdd->err = 0;
246                 err = iterate_dir(realfile, &rdd->ctx);
247                 if (err >= 0)
248                         err = rdd->err;
249         } while (!err && rdd->count);
250
251         if (!err && rdd->first_maybe_whiteout && rdd->dentry)
252                 err = ovl_check_whiteouts(realpath->dentry, rdd);
253
254         fput(realfile);
255
256         return err;
257 }
258
259 static void ovl_dir_reset(struct file *file)
260 {
261         struct ovl_dir_file *od = file->private_data;
262         struct ovl_dir_cache *cache = od->cache;
263         struct dentry *dentry = file->f_path.dentry;
264         enum ovl_path_type type = ovl_path_type(dentry);
265
266         if (cache && ovl_dentry_version_get(dentry) != cache->version) {
267                 ovl_cache_put(od, dentry);
268                 od->cache = NULL;
269                 od->cursor = NULL;
270         }
271         WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
272         if (od->is_real && OVL_TYPE_MERGE(type))
273                 od->is_real = false;
274 }
275
276 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
277 {
278         int err;
279         struct path realpath;
280         struct ovl_readdir_data rdd = {
281                 .ctx.actor = ovl_fill_merge,
282                 .dentry = dentry,
283                 .list = list,
284                 .root = RB_ROOT,
285                 .is_lowest = false,
286         };
287         int idx, next;
288
289         for (idx = 0; idx != -1; idx = next) {
290                 next = ovl_path_next(idx, dentry, &realpath);
291
292                 if (next != -1) {
293                         err = ovl_dir_read(&realpath, &rdd);
294                         if (err)
295                                 break;
296                 } else {
297                         /*
298                          * Insert lowest layer entries before upper ones, this
299                          * allows offsets to be reasonably constant
300                          */
301                         list_add(&rdd.middle, rdd.list);
302                         rdd.is_lowest = true;
303                         err = ovl_dir_read(&realpath, &rdd);
304                         list_del(&rdd.middle);
305                 }
306         }
307         return err;
308 }
309
310 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
311 {
312         struct list_head *p;
313         loff_t off = 0;
314
315         list_for_each(p, &od->cache->entries) {
316                 if (off >= pos)
317                         break;
318                 off++;
319         }
320         /* Cursor is safe since the cache is stable */
321         od->cursor = p;
322 }
323
324 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
325 {
326         int res;
327         struct ovl_dir_cache *cache;
328
329         cache = ovl_dir_cache(dentry);
330         if (cache && ovl_dentry_version_get(dentry) == cache->version) {
331                 cache->refcount++;
332                 return cache;
333         }
334         ovl_set_dir_cache(dentry, NULL);
335
336         cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
337         if (!cache)
338                 return ERR_PTR(-ENOMEM);
339
340         cache->refcount = 1;
341         INIT_LIST_HEAD(&cache->entries);
342
343         res = ovl_dir_read_merged(dentry, &cache->entries);
344         if (res) {
345                 ovl_cache_free(&cache->entries);
346                 kfree(cache);
347                 return ERR_PTR(res);
348         }
349
350         cache->version = ovl_dentry_version_get(dentry);
351         ovl_set_dir_cache(dentry, cache);
352
353         return cache;
354 }
355
356 static int ovl_iterate(struct file *file, struct dir_context *ctx)
357 {
358         struct ovl_dir_file *od = file->private_data;
359         struct dentry *dentry = file->f_path.dentry;
360         struct ovl_cache_entry *p;
361
362         if (!ctx->pos)
363                 ovl_dir_reset(file);
364
365         if (od->is_real)
366                 return iterate_dir(od->realfile, ctx);
367
368         if (!od->cache) {
369                 struct ovl_dir_cache *cache;
370
371                 cache = ovl_cache_get(dentry);
372                 if (IS_ERR(cache))
373                         return PTR_ERR(cache);
374
375                 od->cache = cache;
376                 ovl_seek_cursor(od, ctx->pos);
377         }
378
379         while (od->cursor != &od->cache->entries) {
380                 p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
381                 if (!p->is_whiteout)
382                         if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
383                                 break;
384                 od->cursor = p->l_node.next;
385                 ctx->pos++;
386         }
387         return 0;
388 }
389
390 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
391 {
392         loff_t res;
393         struct ovl_dir_file *od = file->private_data;
394
395         inode_lock(file_inode(file));
396         if (!file->f_pos)
397                 ovl_dir_reset(file);
398
399         if (od->is_real) {
400                 res = vfs_llseek(od->realfile, offset, origin);
401                 file->f_pos = od->realfile->f_pos;
402         } else {
403                 res = -EINVAL;
404
405                 switch (origin) {
406                 case SEEK_CUR:
407                         offset += file->f_pos;
408                         break;
409                 case SEEK_SET:
410                         break;
411                 default:
412                         goto out_unlock;
413                 }
414                 if (offset < 0)
415                         goto out_unlock;
416
417                 if (offset != file->f_pos) {
418                         file->f_pos = offset;
419                         if (od->cache)
420                                 ovl_seek_cursor(od, offset);
421                 }
422                 res = offset;
423         }
424 out_unlock:
425         inode_unlock(file_inode(file));
426
427         return res;
428 }
429
430 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
431                          int datasync)
432 {
433         struct ovl_dir_file *od = file->private_data;
434         struct dentry *dentry = file->f_path.dentry;
435         struct file *realfile = od->realfile;
436
437         /* Nothing to sync for lower */
438         if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
439                 return 0;
440
441         /*
442          * Need to check if we started out being a lower dir, but got copied up
443          */
444         if (!od->is_upper) {
445                 struct inode *inode = file_inode(file);
446
447                 realfile = lockless_dereference(od->upperfile);
448                 if (!realfile) {
449                         struct path upperpath;
450
451                         ovl_path_upper(dentry, &upperpath);
452                         realfile = ovl_path_open(&upperpath, O_RDONLY);
453                         smp_mb__before_spinlock();
454                         inode_lock(inode);
455                         if (!od->upperfile) {
456                                 if (IS_ERR(realfile)) {
457                                         inode_unlock(inode);
458                                         return PTR_ERR(realfile);
459                                 }
460                                 od->upperfile = realfile;
461                         } else {
462                                 /* somebody has beaten us to it */
463                                 if (!IS_ERR(realfile))
464                                         fput(realfile);
465                                 realfile = od->upperfile;
466                         }
467                         inode_unlock(inode);
468                 }
469         }
470
471         return vfs_fsync_range(realfile, start, end, datasync);
472 }
473
474 static int ovl_dir_release(struct inode *inode, struct file *file)
475 {
476         struct ovl_dir_file *od = file->private_data;
477
478         if (od->cache) {
479                 inode_lock(inode);
480                 ovl_cache_put(od, file->f_path.dentry);
481                 inode_unlock(inode);
482         }
483         fput(od->realfile);
484         if (od->upperfile)
485                 fput(od->upperfile);
486         kfree(od);
487
488         return 0;
489 }
490
491 static int ovl_dir_open(struct inode *inode, struct file *file)
492 {
493         struct path realpath;
494         struct file *realfile;
495         struct ovl_dir_file *od;
496         enum ovl_path_type type;
497
498         od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
499         if (!od)
500                 return -ENOMEM;
501
502         type = ovl_path_real(file->f_path.dentry, &realpath);
503         realfile = ovl_path_open(&realpath, file->f_flags);
504         if (IS_ERR(realfile)) {
505                 kfree(od);
506                 return PTR_ERR(realfile);
507         }
508         od->realfile = realfile;
509         od->is_real = !OVL_TYPE_MERGE(type);
510         od->is_upper = OVL_TYPE_UPPER(type);
511         file->private_data = od;
512
513         return 0;
514 }
515
516 const struct file_operations ovl_dir_operations = {
517         .read           = generic_read_dir,
518         .open           = ovl_dir_open,
519         .iterate        = ovl_iterate,
520         .llseek         = ovl_dir_llseek,
521         .fsync          = ovl_dir_fsync,
522         .release        = ovl_dir_release,
523 };
524
525 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
526 {
527         int err;
528         struct ovl_cache_entry *p;
529
530         err = ovl_dir_read_merged(dentry, list);
531         if (err)
532                 return err;
533
534         err = 0;
535
536         list_for_each_entry(p, list, l_node) {
537                 if (p->is_whiteout)
538                         continue;
539
540                 if (p->name[0] == '.') {
541                         if (p->len == 1)
542                                 continue;
543                         if (p->len == 2 && p->name[1] == '.')
544                                 continue;
545                 }
546                 err = -ENOTEMPTY;
547                 break;
548         }
549
550         return err;
551 }
552
553 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
554 {
555         struct ovl_cache_entry *p;
556
557         inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
558         list_for_each_entry(p, list, l_node) {
559                 struct dentry *dentry;
560
561                 if (!p->is_whiteout)
562                         continue;
563
564                 dentry = lookup_one_len(p->name, upper, p->len);
565                 if (IS_ERR(dentry)) {
566                         pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
567                                upper->d_name.name, p->len, p->name,
568                                (int) PTR_ERR(dentry));
569                         continue;
570                 }
571                 if (dentry->d_inode)
572                         ovl_cleanup(upper->d_inode, dentry);
573                 dput(dentry);
574         }
575         inode_unlock(upper->d_inode);
576 }
577
578 static int ovl_check_d_type(struct dir_context *ctx, const char *name,
579                           int namelen, loff_t offset, u64 ino,
580                           unsigned int d_type)
581 {
582         struct ovl_readdir_data *rdd =
583                 container_of(ctx, struct ovl_readdir_data, ctx);
584
585         /* Even if d_type is not supported, DT_DIR is returned for . and .. */
586         if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
587                 return 0;
588
589         if (d_type != DT_UNKNOWN)
590                 rdd->d_type_supported = true;
591
592         return 0;
593 }
594
595 /*
596  * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
597  * if error is encountered.
598  */
599 int ovl_check_d_type_supported(struct path *realpath)
600 {
601         int err;
602         struct ovl_readdir_data rdd = {
603                 .ctx.actor = ovl_check_d_type,
604                 .d_type_supported = false,
605         };
606
607         err = ovl_dir_read(realpath, &rdd);
608         if (err)
609                 return err;
610
611         return rdd.d_type_supported;
612 }
613
614 static void ovl_workdir_cleanup_recurse(struct path *path, int level)
615 {
616         int err;
617         struct inode *dir = path->dentry->d_inode;
618         LIST_HEAD(list);
619         struct ovl_cache_entry *p;
620         struct ovl_readdir_data rdd = {
621                 .ctx.actor = ovl_fill_merge,
622                 .dentry = NULL,
623                 .list = &list,
624                 .root = RB_ROOT,
625                 .is_lowest = false,
626         };
627
628         err = ovl_dir_read(path, &rdd);
629         if (err)
630                 goto out;
631
632         inode_lock_nested(dir, I_MUTEX_PARENT);
633         list_for_each_entry(p, &list, l_node) {
634                 struct dentry *dentry;
635
636                 if (p->name[0] == '.') {
637                         if (p->len == 1)
638                                 continue;
639                         if (p->len == 2 && p->name[1] == '.')
640                                 continue;
641                 }
642                 dentry = lookup_one_len(p->name, path->dentry, p->len);
643                 if (IS_ERR(dentry))
644                         continue;
645                 if (dentry->d_inode)
646                         ovl_workdir_cleanup(dir, path->mnt, dentry, level);
647                 dput(dentry);
648         }
649         inode_unlock(dir);
650 out:
651         ovl_cache_free(&list);
652 }
653
654 void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
655                          struct dentry *dentry, int level)
656 {
657         int err;
658
659         if (!d_is_dir(dentry) || level > 1) {
660                 ovl_cleanup(dir, dentry);
661                 return;
662         }
663
664         err = ovl_do_rmdir(dir, dentry);
665         if (err) {
666                 struct path path = { .mnt = mnt, .dentry = dentry };
667
668                 inode_unlock(dir);
669                 ovl_workdir_cleanup_recurse(&path, level + 1);
670                 inode_lock_nested(dir, I_MUTEX_PARENT);
671                 ovl_cleanup(dir, dentry);
672         }
673 }