GNU Linux-libre 6.0.2-gnu
[releases.git] / fs / ext4 / namei.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/ext4/namei.c
4  *
5  * Copyright (C) 1992, 1993, 1994, 1995
6  * Remy Card (card@masi.ibp.fr)
7  * Laboratoire MASI - Institut Blaise Pascal
8  * Universite Pierre et Marie Curie (Paris VI)
9  *
10  *  from
11  *
12  *  linux/fs/minix/namei.c
13  *
14  *  Copyright (C) 1991, 1992  Linus Torvalds
15  *
16  *  Big-endian to little-endian byte-swapping/bitmaps by
17  *        David S. Miller (davem@caip.rutgers.edu), 1995
18  *  Directory entry file type support and forward compatibility hooks
19  *      for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
20  *  Hash Tree Directory indexing (c)
21  *      Daniel Phillips, 2001
22  *  Hash Tree Directory indexing porting
23  *      Christopher Li, 2002
24  *  Hash Tree Directory indexing cleanup
25  *      Theodore Ts'o, 2002
26  */
27
28 #include <linux/fs.h>
29 #include <linux/pagemap.h>
30 #include <linux/time.h>
31 #include <linux/fcntl.h>
32 #include <linux/stat.h>
33 #include <linux/string.h>
34 #include <linux/quotaops.h>
35 #include <linux/buffer_head.h>
36 #include <linux/bio.h>
37 #include <linux/iversion.h>
38 #include <linux/unicode.h>
39 #include "ext4.h"
40 #include "ext4_jbd2.h"
41
42 #include "xattr.h"
43 #include "acl.h"
44
45 #include <trace/events/ext4.h>
46 /*
47  * define how far ahead to read directories while searching them.
48  */
49 #define NAMEI_RA_CHUNKS  2
50 #define NAMEI_RA_BLOCKS  4
51 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
52
53 static struct buffer_head *ext4_append(handle_t *handle,
54                                         struct inode *inode,
55                                         ext4_lblk_t *block)
56 {
57         struct ext4_map_blocks map;
58         struct buffer_head *bh;
59         int err;
60
61         if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
62                      ((inode->i_size >> 10) >=
63                       EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
64                 return ERR_PTR(-ENOSPC);
65
66         *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
67         map.m_lblk = *block;
68         map.m_len = 1;
69
70         /*
71          * We're appending new directory block. Make sure the block is not
72          * allocated yet, otherwise we will end up corrupting the
73          * directory.
74          */
75         err = ext4_map_blocks(NULL, inode, &map, 0);
76         if (err < 0)
77                 return ERR_PTR(err);
78         if (err) {
79                 EXT4_ERROR_INODE(inode, "Logical block already allocated");
80                 return ERR_PTR(-EFSCORRUPTED);
81         }
82
83         bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
84         if (IS_ERR(bh))
85                 return bh;
86         inode->i_size += inode->i_sb->s_blocksize;
87         EXT4_I(inode)->i_disksize = inode->i_size;
88         BUFFER_TRACE(bh, "get_write_access");
89         err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
90                                             EXT4_JTR_NONE);
91         if (err) {
92                 brelse(bh);
93                 ext4_std_error(inode->i_sb, err);
94                 return ERR_PTR(err);
95         }
96         return bh;
97 }
98
99 static int ext4_dx_csum_verify(struct inode *inode,
100                                struct ext4_dir_entry *dirent);
101
102 /*
103  * Hints to ext4_read_dirblock regarding whether we expect a directory
104  * block being read to be an index block, or a block containing
105  * directory entries (and if the latter, whether it was found via a
106  * logical block in an htree index block).  This is used to control
107  * what sort of sanity checkinig ext4_read_dirblock() will do on the
108  * directory block read from the storage device.  EITHER will means
109  * the caller doesn't know what kind of directory block will be read,
110  * so no specific verification will be done.
111  */
112 typedef enum {
113         EITHER, INDEX, DIRENT, DIRENT_HTREE
114 } dirblock_type_t;
115
116 #define ext4_read_dirblock(inode, block, type) \
117         __ext4_read_dirblock((inode), (block), (type), __func__, __LINE__)
118
119 static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
120                                                 ext4_lblk_t block,
121                                                 dirblock_type_t type,
122                                                 const char *func,
123                                                 unsigned int line)
124 {
125         struct buffer_head *bh;
126         struct ext4_dir_entry *dirent;
127         int is_dx_block = 0;
128
129         if (block >= inode->i_size) {
130                 ext4_error_inode(inode, func, line, block,
131                        "Attempting to read directory block (%u) that is past i_size (%llu)",
132                        block, inode->i_size);
133                 return ERR_PTR(-EFSCORRUPTED);
134         }
135
136         if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
137                 bh = ERR_PTR(-EIO);
138         else
139                 bh = ext4_bread(NULL, inode, block, 0);
140         if (IS_ERR(bh)) {
141                 __ext4_warning(inode->i_sb, func, line,
142                                "inode #%lu: lblock %lu: comm %s: "
143                                "error %ld reading directory block",
144                                inode->i_ino, (unsigned long)block,
145                                current->comm, PTR_ERR(bh));
146
147                 return bh;
148         }
149         if (!bh && (type == INDEX || type == DIRENT_HTREE)) {
150                 ext4_error_inode(inode, func, line, block,
151                                  "Directory hole found for htree %s block",
152                                  (type == INDEX) ? "index" : "leaf");
153                 return ERR_PTR(-EFSCORRUPTED);
154         }
155         if (!bh)
156                 return NULL;
157         dirent = (struct ext4_dir_entry *) bh->b_data;
158         /* Determine whether or not we have an index block */
159         if (is_dx(inode)) {
160                 if (block == 0)
161                         is_dx_block = 1;
162                 else if (ext4_rec_len_from_disk(dirent->rec_len,
163                                                 inode->i_sb->s_blocksize) ==
164                          inode->i_sb->s_blocksize)
165                         is_dx_block = 1;
166         }
167         if (!is_dx_block && type == INDEX) {
168                 ext4_error_inode(inode, func, line, block,
169                        "directory leaf block found instead of index block");
170                 brelse(bh);
171                 return ERR_PTR(-EFSCORRUPTED);
172         }
173         if (!ext4_has_metadata_csum(inode->i_sb) ||
174             buffer_verified(bh))
175                 return bh;
176
177         /*
178          * An empty leaf block can get mistaken for a index block; for
179          * this reason, we can only check the index checksum when the
180          * caller is sure it should be an index block.
181          */
182         if (is_dx_block && type == INDEX) {
183                 if (ext4_dx_csum_verify(inode, dirent) &&
184                     !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
185                         set_buffer_verified(bh);
186                 else {
187                         ext4_error_inode_err(inode, func, line, block,
188                                              EFSBADCRC,
189                                              "Directory index failed checksum");
190                         brelse(bh);
191                         return ERR_PTR(-EFSBADCRC);
192                 }
193         }
194         if (!is_dx_block) {
195                 if (ext4_dirblock_csum_verify(inode, bh) &&
196                     !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
197                         set_buffer_verified(bh);
198                 else {
199                         ext4_error_inode_err(inode, func, line, block,
200                                              EFSBADCRC,
201                                              "Directory block failed checksum");
202                         brelse(bh);
203                         return ERR_PTR(-EFSBADCRC);
204                 }
205         }
206         return bh;
207 }
208
209 #ifdef DX_DEBUG
210 #define dxtrace(command) command
211 #else
212 #define dxtrace(command)
213 #endif
214
215 struct fake_dirent
216 {
217         __le32 inode;
218         __le16 rec_len;
219         u8 name_len;
220         u8 file_type;
221 };
222
223 struct dx_countlimit
224 {
225         __le16 limit;
226         __le16 count;
227 };
228
229 struct dx_entry
230 {
231         __le32 hash;
232         __le32 block;
233 };
234
235 /*
236  * dx_root_info is laid out so that if it should somehow get overlaid by a
237  * dirent the two low bits of the hash version will be zero.  Therefore, the
238  * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
239  */
240
241 struct dx_root
242 {
243         struct fake_dirent dot;
244         char dot_name[4];
245         struct fake_dirent dotdot;
246         char dotdot_name[4];
247         struct dx_root_info
248         {
249                 __le32 reserved_zero;
250                 u8 hash_version;
251                 u8 info_length; /* 8 */
252                 u8 indirect_levels;
253                 u8 unused_flags;
254         }
255         info;
256         struct dx_entry entries[];
257 };
258
259 struct dx_node
260 {
261         struct fake_dirent fake;
262         struct dx_entry entries[];
263 };
264
265
266 struct dx_frame
267 {
268         struct buffer_head *bh;
269         struct dx_entry *entries;
270         struct dx_entry *at;
271 };
272
273 struct dx_map_entry
274 {
275         u32 hash;
276         u16 offs;
277         u16 size;
278 };
279
280 /*
281  * This goes at the end of each htree block.
282  */
283 struct dx_tail {
284         u32 dt_reserved;
285         __le32 dt_checksum;     /* crc32c(uuid+inum+dirblock) */
286 };
287
288 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
289 static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
290 static inline unsigned dx_get_hash(struct dx_entry *entry);
291 static void dx_set_hash(struct dx_entry *entry, unsigned value);
292 static unsigned dx_get_count(struct dx_entry *entries);
293 static unsigned dx_get_limit(struct dx_entry *entries);
294 static void dx_set_count(struct dx_entry *entries, unsigned value);
295 static void dx_set_limit(struct dx_entry *entries, unsigned value);
296 static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
297 static unsigned dx_node_limit(struct inode *dir);
298 static struct dx_frame *dx_probe(struct ext4_filename *fname,
299                                  struct inode *dir,
300                                  struct dx_hash_info *hinfo,
301                                  struct dx_frame *frame);
302 static void dx_release(struct dx_frame *frames);
303 static int dx_make_map(struct inode *dir, struct buffer_head *bh,
304                        struct dx_hash_info *hinfo,
305                        struct dx_map_entry *map_tail);
306 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
307 static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from,
308                                         char *to, struct dx_map_entry *offsets,
309                                         int count, unsigned int blocksize);
310 static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
311                                                 unsigned int blocksize);
312 static void dx_insert_block(struct dx_frame *frame,
313                                         u32 hash, ext4_lblk_t block);
314 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
315                                  struct dx_frame *frame,
316                                  struct dx_frame *frames,
317                                  __u32 *start_hash);
318 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
319                 struct ext4_filename *fname,
320                 struct ext4_dir_entry_2 **res_dir);
321 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
322                              struct inode *dir, struct inode *inode);
323
324 /* checksumming functions */
325 void ext4_initialize_dirent_tail(struct buffer_head *bh,
326                                  unsigned int blocksize)
327 {
328         struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
329
330         memset(t, 0, sizeof(struct ext4_dir_entry_tail));
331         t->det_rec_len = ext4_rec_len_to_disk(
332                         sizeof(struct ext4_dir_entry_tail), blocksize);
333         t->det_reserved_ft = EXT4_FT_DIR_CSUM;
334 }
335
336 /* Walk through a dirent block to find a checksum "dirent" at the tail */
337 static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
338                                                    struct buffer_head *bh)
339 {
340         struct ext4_dir_entry_tail *t;
341
342 #ifdef PARANOID
343         struct ext4_dir_entry *d, *top;
344
345         d = (struct ext4_dir_entry *)bh->b_data;
346         top = (struct ext4_dir_entry *)(bh->b_data +
347                 (EXT4_BLOCK_SIZE(inode->i_sb) -
348                  sizeof(struct ext4_dir_entry_tail)));
349         while (d < top && d->rec_len)
350                 d = (struct ext4_dir_entry *)(((void *)d) +
351                     le16_to_cpu(d->rec_len));
352
353         if (d != top)
354                 return NULL;
355
356         t = (struct ext4_dir_entry_tail *)d;
357 #else
358         t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb));
359 #endif
360
361         if (t->det_reserved_zero1 ||
362             le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
363             t->det_reserved_zero2 ||
364             t->det_reserved_ft != EXT4_FT_DIR_CSUM)
365                 return NULL;
366
367         return t;
368 }
369
370 static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size)
371 {
372         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
373         struct ext4_inode_info *ei = EXT4_I(inode);
374         __u32 csum;
375
376         csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
377         return cpu_to_le32(csum);
378 }
379
380 #define warn_no_space_for_csum(inode)                                   \
381         __warn_no_space_for_csum((inode), __func__, __LINE__)
382
383 static void __warn_no_space_for_csum(struct inode *inode, const char *func,
384                                      unsigned int line)
385 {
386         __ext4_warning_inode(inode, func, line,
387                 "No space for directory leaf checksum. Please run e2fsck -D.");
388 }
389
390 int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh)
391 {
392         struct ext4_dir_entry_tail *t;
393
394         if (!ext4_has_metadata_csum(inode->i_sb))
395                 return 1;
396
397         t = get_dirent_tail(inode, bh);
398         if (!t) {
399                 warn_no_space_for_csum(inode);
400                 return 0;
401         }
402
403         if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data,
404                                                   (char *)t - bh->b_data))
405                 return 0;
406
407         return 1;
408 }
409
410 static void ext4_dirblock_csum_set(struct inode *inode,
411                                  struct buffer_head *bh)
412 {
413         struct ext4_dir_entry_tail *t;
414
415         if (!ext4_has_metadata_csum(inode->i_sb))
416                 return;
417
418         t = get_dirent_tail(inode, bh);
419         if (!t) {
420                 warn_no_space_for_csum(inode);
421                 return;
422         }
423
424         t->det_checksum = ext4_dirblock_csum(inode, bh->b_data,
425                                              (char *)t - bh->b_data);
426 }
427
428 int ext4_handle_dirty_dirblock(handle_t *handle,
429                                struct inode *inode,
430                                struct buffer_head *bh)
431 {
432         ext4_dirblock_csum_set(inode, bh);
433         return ext4_handle_dirty_metadata(handle, inode, bh);
434 }
435
436 static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
437                                                struct ext4_dir_entry *dirent,
438                                                int *offset)
439 {
440         struct ext4_dir_entry *dp;
441         struct dx_root_info *root;
442         int count_offset;
443
444         if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
445                 count_offset = 8;
446         else if (le16_to_cpu(dirent->rec_len) == 12) {
447                 dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
448                 if (le16_to_cpu(dp->rec_len) !=
449                     EXT4_BLOCK_SIZE(inode->i_sb) - 12)
450                         return NULL;
451                 root = (struct dx_root_info *)(((void *)dp + 12));
452                 if (root->reserved_zero ||
453                     root->info_length != sizeof(struct dx_root_info))
454                         return NULL;
455                 count_offset = 32;
456         } else
457                 return NULL;
458
459         if (offset)
460                 *offset = count_offset;
461         return (struct dx_countlimit *)(((void *)dirent) + count_offset);
462 }
463
464 static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
465                            int count_offset, int count, struct dx_tail *t)
466 {
467         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
468         struct ext4_inode_info *ei = EXT4_I(inode);
469         __u32 csum;
470         int size;
471         __u32 dummy_csum = 0;
472         int offset = offsetof(struct dx_tail, dt_checksum);
473
474         size = count_offset + (count * sizeof(struct dx_entry));
475         csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
476         csum = ext4_chksum(sbi, csum, (__u8 *)t, offset);
477         csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
478
479         return cpu_to_le32(csum);
480 }
481
482 static int ext4_dx_csum_verify(struct inode *inode,
483                                struct ext4_dir_entry *dirent)
484 {
485         struct dx_countlimit *c;
486         struct dx_tail *t;
487         int count_offset, limit, count;
488
489         if (!ext4_has_metadata_csum(inode->i_sb))
490                 return 1;
491
492         c = get_dx_countlimit(inode, dirent, &count_offset);
493         if (!c) {
494                 EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
495                 return 0;
496         }
497         limit = le16_to_cpu(c->limit);
498         count = le16_to_cpu(c->count);
499         if (count_offset + (limit * sizeof(struct dx_entry)) >
500             EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
501                 warn_no_space_for_csum(inode);
502                 return 0;
503         }
504         t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
505
506         if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
507                                             count, t))
508                 return 0;
509         return 1;
510 }
511
512 static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
513 {
514         struct dx_countlimit *c;
515         struct dx_tail *t;
516         int count_offset, limit, count;
517
518         if (!ext4_has_metadata_csum(inode->i_sb))
519                 return;
520
521         c = get_dx_countlimit(inode, dirent, &count_offset);
522         if (!c) {
523                 EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
524                 return;
525         }
526         limit = le16_to_cpu(c->limit);
527         count = le16_to_cpu(c->count);
528         if (count_offset + (limit * sizeof(struct dx_entry)) >
529             EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
530                 warn_no_space_for_csum(inode);
531                 return;
532         }
533         t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
534
535         t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
536 }
537
538 static inline int ext4_handle_dirty_dx_node(handle_t *handle,
539                                             struct inode *inode,
540                                             struct buffer_head *bh)
541 {
542         ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
543         return ext4_handle_dirty_metadata(handle, inode, bh);
544 }
545
546 /*
547  * p is at least 6 bytes before the end of page
548  */
549 static inline struct ext4_dir_entry_2 *
550 ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
551 {
552         return (struct ext4_dir_entry_2 *)((char *)p +
553                 ext4_rec_len_from_disk(p->rec_len, blocksize));
554 }
555
556 /*
557  * Future: use high four bits of block for coalesce-on-delete flags
558  * Mask them off for now.
559  */
560
561 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
562 {
563         return le32_to_cpu(entry->block) & 0x0fffffff;
564 }
565
566 static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
567 {
568         entry->block = cpu_to_le32(value);
569 }
570
571 static inline unsigned dx_get_hash(struct dx_entry *entry)
572 {
573         return le32_to_cpu(entry->hash);
574 }
575
576 static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
577 {
578         entry->hash = cpu_to_le32(value);
579 }
580
581 static inline unsigned dx_get_count(struct dx_entry *entries)
582 {
583         return le16_to_cpu(((struct dx_countlimit *) entries)->count);
584 }
585
586 static inline unsigned dx_get_limit(struct dx_entry *entries)
587 {
588         return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
589 }
590
591 static inline void dx_set_count(struct dx_entry *entries, unsigned value)
592 {
593         ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
594 }
595
596 static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
597 {
598         ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
599 }
600
601 static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
602 {
603         unsigned int entry_space = dir->i_sb->s_blocksize -
604                         ext4_dir_rec_len(1, NULL) -
605                         ext4_dir_rec_len(2, NULL) - infosize;
606
607         if (ext4_has_metadata_csum(dir->i_sb))
608                 entry_space -= sizeof(struct dx_tail);
609         return entry_space / sizeof(struct dx_entry);
610 }
611
612 static inline unsigned dx_node_limit(struct inode *dir)
613 {
614         unsigned int entry_space = dir->i_sb->s_blocksize -
615                         ext4_dir_rec_len(0, dir);
616
617         if (ext4_has_metadata_csum(dir->i_sb))
618                 entry_space -= sizeof(struct dx_tail);
619         return entry_space / sizeof(struct dx_entry);
620 }
621
622 /*
623  * Debug
624  */
625 #ifdef DX_DEBUG
626 static void dx_show_index(char * label, struct dx_entry *entries)
627 {
628         int i, n = dx_get_count (entries);
629         printk(KERN_DEBUG "%s index", label);
630         for (i = 0; i < n; i++) {
631                 printk(KERN_CONT " %x->%lu",
632                        i ? dx_get_hash(entries + i) : 0,
633                        (unsigned long)dx_get_block(entries + i));
634         }
635         printk(KERN_CONT "\n");
636 }
637
638 struct stats
639 {
640         unsigned names;
641         unsigned space;
642         unsigned bcount;
643 };
644
645 static struct stats dx_show_leaf(struct inode *dir,
646                                 struct dx_hash_info *hinfo,
647                                 struct ext4_dir_entry_2 *de,
648                                 int size, int show_names)
649 {
650         unsigned names = 0, space = 0;
651         char *base = (char *) de;
652         struct dx_hash_info h = *hinfo;
653
654         printk("names: ");
655         while ((char *) de < base + size)
656         {
657                 if (de->inode)
658                 {
659                         if (show_names)
660                         {
661 #ifdef CONFIG_FS_ENCRYPTION
662                                 int len;
663                                 char *name;
664                                 struct fscrypt_str fname_crypto_str =
665                                         FSTR_INIT(NULL, 0);
666                                 int res = 0;
667
668                                 name  = de->name;
669                                 len = de->name_len;
670                                 if (!IS_ENCRYPTED(dir)) {
671                                         /* Directory is not encrypted */
672                                         ext4fs_dirhash(dir, de->name,
673                                                 de->name_len, &h);
674                                         printk("%*.s:(U)%x.%u ", len,
675                                                name, h.hash,
676                                                (unsigned) ((char *) de
677                                                            - base));
678                                 } else {
679                                         struct fscrypt_str de_name =
680                                                 FSTR_INIT(name, len);
681
682                                         /* Directory is encrypted */
683                                         res = fscrypt_fname_alloc_buffer(
684                                                 len, &fname_crypto_str);
685                                         if (res)
686                                                 printk(KERN_WARNING "Error "
687                                                         "allocating crypto "
688                                                         "buffer--skipping "
689                                                         "crypto\n");
690                                         res = fscrypt_fname_disk_to_usr(dir,
691                                                 0, 0, &de_name,
692                                                 &fname_crypto_str);
693                                         if (res) {
694                                                 printk(KERN_WARNING "Error "
695                                                         "converting filename "
696                                                         "from disk to usr"
697                                                         "\n");
698                                                 name = "??";
699                                                 len = 2;
700                                         } else {
701                                                 name = fname_crypto_str.name;
702                                                 len = fname_crypto_str.len;
703                                         }
704                                         if (IS_CASEFOLDED(dir))
705                                                 h.hash = EXT4_DIRENT_HASH(de);
706                                         else
707                                                 ext4fs_dirhash(dir, de->name,
708                                                        de->name_len, &h);
709                                         printk("%*.s:(E)%x.%u ", len, name,
710                                                h.hash, (unsigned) ((char *) de
711                                                                    - base));
712                                         fscrypt_fname_free_buffer(
713                                                         &fname_crypto_str);
714                                 }
715 #else
716                                 int len = de->name_len;
717                                 char *name = de->name;
718                                 ext4fs_dirhash(dir, de->name, de->name_len, &h);
719                                 printk("%*.s:%x.%u ", len, name, h.hash,
720                                        (unsigned) ((char *) de - base));
721 #endif
722                         }
723                         space += ext4_dir_rec_len(de->name_len, dir);
724                         names++;
725                 }
726                 de = ext4_next_entry(de, size);
727         }
728         printk(KERN_CONT "(%i)\n", names);
729         return (struct stats) { names, space, 1 };
730 }
731
732 struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
733                              struct dx_entry *entries, int levels)
734 {
735         unsigned blocksize = dir->i_sb->s_blocksize;
736         unsigned count = dx_get_count(entries), names = 0, space = 0, i;
737         unsigned bcount = 0;
738         struct buffer_head *bh;
739         printk("%i indexed blocks...\n", count);
740         for (i = 0; i < count; i++, entries++)
741         {
742                 ext4_lblk_t block = dx_get_block(entries);
743                 ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
744                 u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
745                 struct stats stats;
746                 printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
747                 bh = ext4_bread(NULL,dir, block, 0);
748                 if (!bh || IS_ERR(bh))
749                         continue;
750                 stats = levels?
751                    dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
752                    dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *)
753                         bh->b_data, blocksize, 0);
754                 names += stats.names;
755                 space += stats.space;
756                 bcount += stats.bcount;
757                 brelse(bh);
758         }
759         if (bcount)
760                 printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
761                        levels ? "" : "   ", names, space/bcount,
762                        (space/bcount)*100/blocksize);
763         return (struct stats) { names, space, bcount};
764 }
765
766 /*
767  * Linear search cross check
768  */
769 static inline void htree_rep_invariant_check(struct dx_entry *at,
770                                              struct dx_entry *target,
771                                              u32 hash, unsigned int n)
772 {
773         while (n--) {
774                 dxtrace(printk(KERN_CONT ","));
775                 if (dx_get_hash(++at) > hash) {
776                         at--;
777                         break;
778                 }
779         }
780         ASSERT(at == target - 1);
781 }
782 #else /* DX_DEBUG */
783 static inline void htree_rep_invariant_check(struct dx_entry *at,
784                                              struct dx_entry *target,
785                                              u32 hash, unsigned int n)
786 {
787 }
788 #endif /* DX_DEBUG */
789
790 /*
791  * Probe for a directory leaf block to search.
792  *
793  * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
794  * error in the directory index, and the caller should fall back to
795  * searching the directory normally.  The callers of dx_probe **MUST**
796  * check for this error code, and make sure it never gets reflected
797  * back to userspace.
798  */
799 static struct dx_frame *
800 dx_probe(struct ext4_filename *fname, struct inode *dir,
801          struct dx_hash_info *hinfo, struct dx_frame *frame_in)
802 {
803         unsigned count, indirect, level, i;
804         struct dx_entry *at, *entries, *p, *q, *m;
805         struct dx_root *root;
806         struct dx_frame *frame = frame_in;
807         struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
808         u32 hash;
809         ext4_lblk_t block;
810         ext4_lblk_t blocks[EXT4_HTREE_LEVEL];
811
812         memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
813         frame->bh = ext4_read_dirblock(dir, 0, INDEX);
814         if (IS_ERR(frame->bh))
815                 return (struct dx_frame *) frame->bh;
816
817         root = (struct dx_root *) frame->bh->b_data;
818         if (root->info.hash_version != DX_HASH_TEA &&
819             root->info.hash_version != DX_HASH_HALF_MD4 &&
820             root->info.hash_version != DX_HASH_LEGACY &&
821             root->info.hash_version != DX_HASH_SIPHASH) {
822                 ext4_warning_inode(dir, "Unrecognised inode hash code %u",
823                                    root->info.hash_version);
824                 goto fail;
825         }
826         if (ext4_hash_in_dirent(dir)) {
827                 if (root->info.hash_version != DX_HASH_SIPHASH) {
828                         ext4_warning_inode(dir,
829                                 "Hash in dirent, but hash is not SIPHASH");
830                         goto fail;
831                 }
832         } else {
833                 if (root->info.hash_version == DX_HASH_SIPHASH) {
834                         ext4_warning_inode(dir,
835                                 "Hash code is SIPHASH, but hash not in dirent");
836                         goto fail;
837                 }
838         }
839         if (fname)
840                 hinfo = &fname->hinfo;
841         hinfo->hash_version = root->info.hash_version;
842         if (hinfo->hash_version <= DX_HASH_TEA)
843                 hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
844         hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
845         /* hash is already computed for encrypted casefolded directory */
846         if (fname && fname_name(fname) &&
847                                 !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
848                 ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
849         hash = hinfo->hash;
850
851         if (root->info.unused_flags & 1) {
852                 ext4_warning_inode(dir, "Unimplemented hash flags: %#06x",
853                                    root->info.unused_flags);
854                 goto fail;
855         }
856
857         indirect = root->info.indirect_levels;
858         if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
859                 ext4_warning(dir->i_sb,
860                              "Directory (ino: %lu) htree depth %#06x exceed"
861                              "supported value", dir->i_ino,
862                              ext4_dir_htree_level(dir->i_sb));
863                 if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
864                         ext4_warning(dir->i_sb, "Enable large directory "
865                                                 "feature to access it");
866                 }
867                 goto fail;
868         }
869
870         entries = (struct dx_entry *)(((char *)&root->info) +
871                                       root->info.info_length);
872
873         if (dx_get_limit(entries) != dx_root_limit(dir,
874                                                    root->info.info_length)) {
875                 ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
876                                    dx_get_limit(entries),
877                                    dx_root_limit(dir, root->info.info_length));
878                 goto fail;
879         }
880
881         dxtrace(printk("Look up %x", hash));
882         level = 0;
883         blocks[0] = 0;
884         while (1) {
885                 count = dx_get_count(entries);
886                 if (!count || count > dx_get_limit(entries)) {
887                         ext4_warning_inode(dir,
888                                            "dx entry: count %u beyond limit %u",
889                                            count, dx_get_limit(entries));
890                         goto fail;
891                 }
892
893                 p = entries + 1;
894                 q = entries + count - 1;
895                 while (p <= q) {
896                         m = p + (q - p) / 2;
897                         dxtrace(printk(KERN_CONT "."));
898                         if (dx_get_hash(m) > hash)
899                                 q = m - 1;
900                         else
901                                 p = m + 1;
902                 }
903
904                 htree_rep_invariant_check(entries, p, hash, count - 1);
905
906                 at = p - 1;
907                 dxtrace(printk(KERN_CONT " %x->%u\n",
908                                at == entries ? 0 : dx_get_hash(at),
909                                dx_get_block(at)));
910                 frame->entries = entries;
911                 frame->at = at;
912
913                 block = dx_get_block(at);
914                 for (i = 0; i <= level; i++) {
915                         if (blocks[i] == block) {
916                                 ext4_warning_inode(dir,
917                                         "dx entry: tree cycle block %u points back to block %u",
918                                         blocks[level], block);
919                                 goto fail;
920                         }
921                 }
922                 if (++level > indirect)
923                         return frame;
924                 blocks[level] = block;
925                 frame++;
926                 frame->bh = ext4_read_dirblock(dir, block, INDEX);
927                 if (IS_ERR(frame->bh)) {
928                         ret_err = (struct dx_frame *) frame->bh;
929                         frame->bh = NULL;
930                         goto fail;
931                 }
932
933                 entries = ((struct dx_node *) frame->bh->b_data)->entries;
934
935                 if (dx_get_limit(entries) != dx_node_limit(dir)) {
936                         ext4_warning_inode(dir,
937                                 "dx entry: limit %u != node limit %u",
938                                 dx_get_limit(entries), dx_node_limit(dir));
939                         goto fail;
940                 }
941         }
942 fail:
943         while (frame >= frame_in) {
944                 brelse(frame->bh);
945                 frame--;
946         }
947
948         if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
949                 ext4_warning_inode(dir,
950                         "Corrupt directory, running e2fsck is recommended");
951         return ret_err;
952 }
953
954 static void dx_release(struct dx_frame *frames)
955 {
956         struct dx_root_info *info;
957         int i;
958         unsigned int indirect_levels;
959
960         if (frames[0].bh == NULL)
961                 return;
962
963         info = &((struct dx_root *)frames[0].bh->b_data)->info;
964         /* save local copy, "info" may be freed after brelse() */
965         indirect_levels = info->indirect_levels;
966         for (i = 0; i <= indirect_levels; i++) {
967                 if (frames[i].bh == NULL)
968                         break;
969                 brelse(frames[i].bh);
970                 frames[i].bh = NULL;
971         }
972 }
973
974 /*
975  * This function increments the frame pointer to search the next leaf
976  * block, and reads in the necessary intervening nodes if the search
977  * should be necessary.  Whether or not the search is necessary is
978  * controlled by the hash parameter.  If the hash value is even, then
979  * the search is only continued if the next block starts with that
980  * hash value.  This is used if we are searching for a specific file.
981  *
982  * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
983  *
984  * This function returns 1 if the caller should continue to search,
985  * or 0 if it should not.  If there is an error reading one of the
986  * index blocks, it will a negative error code.
987  *
988  * If start_hash is non-null, it will be filled in with the starting
989  * hash of the next page.
990  */
991 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
992                                  struct dx_frame *frame,
993                                  struct dx_frame *frames,
994                                  __u32 *start_hash)
995 {
996         struct dx_frame *p;
997         struct buffer_head *bh;
998         int num_frames = 0;
999         __u32 bhash;
1000
1001         p = frame;
1002         /*
1003          * Find the next leaf page by incrementing the frame pointer.
1004          * If we run out of entries in the interior node, loop around and
1005          * increment pointer in the parent node.  When we break out of
1006          * this loop, num_frames indicates the number of interior
1007          * nodes need to be read.
1008          */
1009         while (1) {
1010                 if (++(p->at) < p->entries + dx_get_count(p->entries))
1011                         break;
1012                 if (p == frames)
1013                         return 0;
1014                 num_frames++;
1015                 p--;
1016         }
1017
1018         /*
1019          * If the hash is 1, then continue only if the next page has a
1020          * continuation hash of any value.  This is used for readdir
1021          * handling.  Otherwise, check to see if the hash matches the
1022          * desired continuation hash.  If it doesn't, return since
1023          * there's no point to read in the successive index pages.
1024          */
1025         bhash = dx_get_hash(p->at);
1026         if (start_hash)
1027                 *start_hash = bhash;
1028         if ((hash & 1) == 0) {
1029                 if ((bhash & ~1) != hash)
1030                         return 0;
1031         }
1032         /*
1033          * If the hash is HASH_NB_ALWAYS, we always go to the next
1034          * block so no check is necessary
1035          */
1036         while (num_frames--) {
1037                 bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
1038                 if (IS_ERR(bh))
1039                         return PTR_ERR(bh);
1040                 p++;
1041                 brelse(p->bh);
1042                 p->bh = bh;
1043                 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
1044         }
1045         return 1;
1046 }
1047
1048
1049 /*
1050  * This function fills a red-black tree with information from a
1051  * directory block.  It returns the number directory entries loaded
1052  * into the tree.  If there is an error it is returned in err.
1053  */
1054 static int htree_dirblock_to_tree(struct file *dir_file,
1055                                   struct inode *dir, ext4_lblk_t block,
1056                                   struct dx_hash_info *hinfo,
1057                                   __u32 start_hash, __u32 start_minor_hash)
1058 {
1059         struct buffer_head *bh;
1060         struct ext4_dir_entry_2 *de, *top;
1061         int err = 0, count = 0;
1062         struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str;
1063         int csum = ext4_has_metadata_csum(dir->i_sb);
1064
1065         dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
1066                                                         (unsigned long)block));
1067         bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
1068         if (IS_ERR(bh))
1069                 return PTR_ERR(bh);
1070
1071         de = (struct ext4_dir_entry_2 *) bh->b_data;
1072         /* csum entries are not larger in the casefolded encrypted case */
1073         top = (struct ext4_dir_entry_2 *) ((char *) de +
1074                                            dir->i_sb->s_blocksize -
1075                                            ext4_dir_rec_len(0,
1076                                                            csum ? NULL : dir));
1077         /* Check if the directory is encrypted */
1078         if (IS_ENCRYPTED(dir)) {
1079                 err = fscrypt_prepare_readdir(dir);
1080                 if (err < 0) {
1081                         brelse(bh);
1082                         return err;
1083                 }
1084                 err = fscrypt_fname_alloc_buffer(EXT4_NAME_LEN,
1085                                                  &fname_crypto_str);
1086                 if (err < 0) {
1087                         brelse(bh);
1088                         return err;
1089                 }
1090         }
1091
1092         for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
1093                 if (ext4_check_dir_entry(dir, NULL, de, bh,
1094                                 bh->b_data, bh->b_size,
1095                                 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
1096                                          + ((char *)de - bh->b_data))) {
1097                         /* silently ignore the rest of the block */
1098                         break;
1099                 }
1100                 if (ext4_hash_in_dirent(dir)) {
1101                         if (de->name_len && de->inode) {
1102                                 hinfo->hash = EXT4_DIRENT_HASH(de);
1103                                 hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
1104                         } else {
1105                                 hinfo->hash = 0;
1106                                 hinfo->minor_hash = 0;
1107                         }
1108                 } else {
1109                         ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
1110                 }
1111                 if ((hinfo->hash < start_hash) ||
1112                     ((hinfo->hash == start_hash) &&
1113                      (hinfo->minor_hash < start_minor_hash)))
1114                         continue;
1115                 if (de->inode == 0)
1116                         continue;
1117                 if (!IS_ENCRYPTED(dir)) {
1118                         tmp_str.name = de->name;
1119                         tmp_str.len = de->name_len;
1120                         err = ext4_htree_store_dirent(dir_file,
1121                                    hinfo->hash, hinfo->minor_hash, de,
1122                                    &tmp_str);
1123                 } else {
1124                         int save_len = fname_crypto_str.len;
1125                         struct fscrypt_str de_name = FSTR_INIT(de->name,
1126                                                                 de->name_len);
1127
1128                         /* Directory is encrypted */
1129                         err = fscrypt_fname_disk_to_usr(dir, hinfo->hash,
1130                                         hinfo->minor_hash, &de_name,
1131                                         &fname_crypto_str);
1132                         if (err) {
1133                                 count = err;
1134                                 goto errout;
1135                         }
1136                         err = ext4_htree_store_dirent(dir_file,
1137                                    hinfo->hash, hinfo->minor_hash, de,
1138                                         &fname_crypto_str);
1139                         fname_crypto_str.len = save_len;
1140                 }
1141                 if (err != 0) {
1142                         count = err;
1143                         goto errout;
1144                 }
1145                 count++;
1146         }
1147 errout:
1148         brelse(bh);
1149         fscrypt_fname_free_buffer(&fname_crypto_str);
1150         return count;
1151 }
1152
1153
1154 /*
1155  * This function fills a red-black tree with information from a
1156  * directory.  We start scanning the directory in hash order, starting
1157  * at start_hash and start_minor_hash.
1158  *
1159  * This function returns the number of entries inserted into the tree,
1160  * or a negative error code.
1161  */
1162 int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1163                          __u32 start_minor_hash, __u32 *next_hash)
1164 {
1165         struct dx_hash_info hinfo;
1166         struct ext4_dir_entry_2 *de;
1167         struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1168         struct inode *dir;
1169         ext4_lblk_t block;
1170         int count = 0;
1171         int ret, err;
1172         __u32 hashval;
1173         struct fscrypt_str tmp_str;
1174
1175         dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
1176                        start_hash, start_minor_hash));
1177         dir = file_inode(dir_file);
1178         if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
1179                 if (ext4_hash_in_dirent(dir))
1180                         hinfo.hash_version = DX_HASH_SIPHASH;
1181                 else
1182                         hinfo.hash_version =
1183                                         EXT4_SB(dir->i_sb)->s_def_hash_version;
1184                 if (hinfo.hash_version <= DX_HASH_TEA)
1185                         hinfo.hash_version +=
1186                                 EXT4_SB(dir->i_sb)->s_hash_unsigned;
1187                 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1188                 if (ext4_has_inline_data(dir)) {
1189                         int has_inline_data = 1;
1190                         count = ext4_inlinedir_to_tree(dir_file, dir, 0,
1191                                                        &hinfo, start_hash,
1192                                                        start_minor_hash,
1193                                                        &has_inline_data);
1194                         if (has_inline_data) {
1195                                 *next_hash = ~0;
1196                                 return count;
1197                         }
1198                 }
1199                 count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
1200                                                start_hash, start_minor_hash);
1201                 *next_hash = ~0;
1202                 return count;
1203         }
1204         hinfo.hash = start_hash;
1205         hinfo.minor_hash = 0;
1206         frame = dx_probe(NULL, dir, &hinfo, frames);
1207         if (IS_ERR(frame))
1208                 return PTR_ERR(frame);
1209
1210         /* Add '.' and '..' from the htree header */
1211         if (!start_hash && !start_minor_hash) {
1212                 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1213                 tmp_str.name = de->name;
1214                 tmp_str.len = de->name_len;
1215                 err = ext4_htree_store_dirent(dir_file, 0, 0,
1216                                               de, &tmp_str);
1217                 if (err != 0)
1218                         goto errout;
1219                 count++;
1220         }
1221         if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1222                 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1223                 de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1224                 tmp_str.name = de->name;
1225                 tmp_str.len = de->name_len;
1226                 err = ext4_htree_store_dirent(dir_file, 2, 0,
1227                                               de, &tmp_str);
1228                 if (err != 0)
1229                         goto errout;
1230                 count++;
1231         }
1232
1233         while (1) {
1234                 if (fatal_signal_pending(current)) {
1235                         err = -ERESTARTSYS;
1236                         goto errout;
1237                 }
1238                 cond_resched();
1239                 block = dx_get_block(frame->at);
1240                 ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
1241                                              start_hash, start_minor_hash);
1242                 if (ret < 0) {
1243                         err = ret;
1244                         goto errout;
1245                 }
1246                 count += ret;
1247                 hashval = ~0;
1248                 ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1249                                             frame, frames, &hashval);
1250                 *next_hash = hashval;
1251                 if (ret < 0) {
1252                         err = ret;
1253                         goto errout;
1254                 }
1255                 /*
1256                  * Stop if:  (a) there are no more entries, or
1257                  * (b) we have inserted at least one entry and the
1258                  * next hash value is not a continuation
1259                  */
1260                 if ((ret == 0) ||
1261                     (count && ((hashval & 1) == 0)))
1262                         break;
1263         }
1264         dx_release(frames);
1265         dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
1266                        "next hash: %x\n", count, *next_hash));
1267         return count;
1268 errout:
1269         dx_release(frames);
1270         return (err);
1271 }
1272
1273 static inline int search_dirblock(struct buffer_head *bh,
1274                                   struct inode *dir,
1275                                   struct ext4_filename *fname,
1276                                   unsigned int offset,
1277                                   struct ext4_dir_entry_2 **res_dir)
1278 {
1279         return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
1280                                fname, offset, res_dir);
1281 }
1282
1283 /*
1284  * Directory block splitting, compacting
1285  */
1286
1287 /*
1288  * Create map of hash values, offsets, and sizes, stored at end of block.
1289  * Returns number of entries mapped.
1290  */
1291 static int dx_make_map(struct inode *dir, struct buffer_head *bh,
1292                        struct dx_hash_info *hinfo,
1293                        struct dx_map_entry *map_tail)
1294 {
1295         int count = 0;
1296         struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
1297         unsigned int buflen = bh->b_size;
1298         char *base = bh->b_data;
1299         struct dx_hash_info h = *hinfo;
1300
1301         if (ext4_has_metadata_csum(dir->i_sb))
1302                 buflen -= sizeof(struct ext4_dir_entry_tail);
1303
1304         while ((char *) de < base + buflen) {
1305                 if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen,
1306                                          ((char *)de) - base))
1307                         return -EFSCORRUPTED;
1308                 if (de->name_len && de->inode) {
1309                         if (ext4_hash_in_dirent(dir))
1310                                 h.hash = EXT4_DIRENT_HASH(de);
1311                         else
1312                                 ext4fs_dirhash(dir, de->name, de->name_len, &h);
1313                         map_tail--;
1314                         map_tail->hash = h.hash;
1315                         map_tail->offs = ((char *) de - base)>>2;
1316                         map_tail->size = le16_to_cpu(de->rec_len);
1317                         count++;
1318                         cond_resched();
1319                 }
1320                 de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1321         }
1322         return count;
1323 }
1324
1325 /* Sort map by hash value */
1326 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
1327 {
1328         struct dx_map_entry *p, *q, *top = map + count - 1;
1329         int more;
1330         /* Combsort until bubble sort doesn't suck */
1331         while (count > 2) {
1332                 count = count*10/13;
1333                 if (count - 9 < 2) /* 9, 10 -> 11 */
1334                         count = 11;
1335                 for (p = top, q = p - count; q >= map; p--, q--)
1336                         if (p->hash < q->hash)
1337                                 swap(*p, *q);
1338         }
1339         /* Garden variety bubble sort */
1340         do {
1341                 more = 0;
1342                 q = top;
1343                 while (q-- > map) {
1344                         if (q[1].hash >= q[0].hash)
1345                                 continue;
1346                         swap(*(q+1), *q);
1347                         more = 1;
1348                 }
1349         } while(more);
1350 }
1351
1352 static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1353 {
1354         struct dx_entry *entries = frame->entries;
1355         struct dx_entry *old = frame->at, *new = old + 1;
1356         int count = dx_get_count(entries);
1357
1358         ASSERT(count < dx_get_limit(entries));
1359         ASSERT(old < entries + count);
1360         memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
1361         dx_set_hash(new, hash);
1362         dx_set_block(new, block);
1363         dx_set_count(entries, count + 1);
1364 }
1365
1366 #if IS_ENABLED(CONFIG_UNICODE)
1367 /*
1368  * Test whether a case-insensitive directory entry matches the filename
1369  * being searched for.  If quick is set, assume the name being looked up
1370  * is already in the casefolded form.
1371  *
1372  * Returns: 0 if the directory entry matches, more than 0 if it
1373  * doesn't match or less than zero on error.
1374  */
1375 static int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
1376                            u8 *de_name, size_t de_name_len, bool quick)
1377 {
1378         const struct super_block *sb = parent->i_sb;
1379         const struct unicode_map *um = sb->s_encoding;
1380         struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
1381         struct qstr entry = QSTR_INIT(de_name, de_name_len);
1382         int ret;
1383
1384         if (IS_ENCRYPTED(parent)) {
1385                 const struct fscrypt_str encrypted_name =
1386                                 FSTR_INIT(de_name, de_name_len);
1387
1388                 decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
1389                 if (!decrypted_name.name)
1390                         return -ENOMEM;
1391                 ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
1392                                                 &decrypted_name);
1393                 if (ret < 0)
1394                         goto out;
1395                 entry.name = decrypted_name.name;
1396                 entry.len = decrypted_name.len;
1397         }
1398
1399         if (quick)
1400                 ret = utf8_strncasecmp_folded(um, name, &entry);
1401         else
1402                 ret = utf8_strncasecmp(um, name, &entry);
1403         if (ret < 0) {
1404                 /* Handle invalid character sequence as either an error
1405                  * or as an opaque byte sequence.
1406                  */
1407                 if (sb_has_strict_encoding(sb))
1408                         ret = -EINVAL;
1409                 else if (name->len != entry.len)
1410                         ret = 1;
1411                 else
1412                         ret = !!memcmp(name->name, entry.name, entry.len);
1413         }
1414 out:
1415         kfree(decrypted_name.name);
1416         return ret;
1417 }
1418
1419 int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
1420                                   struct ext4_filename *name)
1421 {
1422         struct fscrypt_str *cf_name = &name->cf_name;
1423         struct dx_hash_info *hinfo = &name->hinfo;
1424         int len;
1425
1426         if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding ||
1427             (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
1428                 cf_name->name = NULL;
1429                 return 0;
1430         }
1431
1432         cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
1433         if (!cf_name->name)
1434                 return -ENOMEM;
1435
1436         len = utf8_casefold(dir->i_sb->s_encoding,
1437                             iname, cf_name->name,
1438                             EXT4_NAME_LEN);
1439         if (len <= 0) {
1440                 kfree(cf_name->name);
1441                 cf_name->name = NULL;
1442         }
1443         cf_name->len = (unsigned) len;
1444         if (!IS_ENCRYPTED(dir))
1445                 return 0;
1446
1447         hinfo->hash_version = DX_HASH_SIPHASH;
1448         hinfo->seed = NULL;
1449         if (cf_name->name)
1450                 ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
1451         else
1452                 ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
1453         return 0;
1454 }
1455 #endif
1456
1457 /*
1458  * Test whether a directory entry matches the filename being searched for.
1459  *
1460  * Return: %true if the directory entry matches, otherwise %false.
1461  */
1462 static bool ext4_match(struct inode *parent,
1463                               const struct ext4_filename *fname,
1464                               struct ext4_dir_entry_2 *de)
1465 {
1466         struct fscrypt_name f;
1467
1468         if (!de->inode)
1469                 return false;
1470
1471         f.usr_fname = fname->usr_fname;
1472         f.disk_name = fname->disk_name;
1473 #ifdef CONFIG_FS_ENCRYPTION
1474         f.crypto_buf = fname->crypto_buf;
1475 #endif
1476
1477 #if IS_ENABLED(CONFIG_UNICODE)
1478         if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
1479             (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
1480                 if (fname->cf_name.name) {
1481                         struct qstr cf = {.name = fname->cf_name.name,
1482                                           .len = fname->cf_name.len};
1483                         if (IS_ENCRYPTED(parent)) {
1484                                 if (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
1485                                         fname->hinfo.minor_hash !=
1486                                                 EXT4_DIRENT_MINOR_HASH(de)) {
1487
1488                                         return false;
1489                                 }
1490                         }
1491                         return !ext4_ci_compare(parent, &cf, de->name,
1492                                                         de->name_len, true);
1493                 }
1494                 return !ext4_ci_compare(parent, fname->usr_fname, de->name,
1495                                                 de->name_len, false);
1496         }
1497 #endif
1498
1499         return fscrypt_match_name(&f, de->name, de->name_len);
1500 }
1501
1502 /*
1503  * Returns 0 if not found, -1 on failure, and 1 on success
1504  */
1505 int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
1506                     struct inode *dir, struct ext4_filename *fname,
1507                     unsigned int offset, struct ext4_dir_entry_2 **res_dir)
1508 {
1509         struct ext4_dir_entry_2 * de;
1510         char * dlimit;
1511         int de_len;
1512
1513         de = (struct ext4_dir_entry_2 *)search_buf;
1514         dlimit = search_buf + buf_size;
1515         while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) {
1516                 /* this code is executed quadratically often */
1517                 /* do minimal checking `by hand' */
1518                 if (de->name + de->name_len <= dlimit &&
1519                     ext4_match(dir, fname, de)) {
1520                         /* found a match - just to be sure, do
1521                          * a full check */
1522                         if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
1523                                                  buf_size, offset))
1524                                 return -1;
1525                         *res_dir = de;
1526                         return 1;
1527                 }
1528                 /* prevent looping on a bad block */
1529                 de_len = ext4_rec_len_from_disk(de->rec_len,
1530                                                 dir->i_sb->s_blocksize);
1531                 if (de_len <= 0)
1532                         return -1;
1533                 offset += de_len;
1534                 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1535         }
1536         return 0;
1537 }
1538
1539 static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
1540                                struct ext4_dir_entry *de)
1541 {
1542         struct super_block *sb = dir->i_sb;
1543
1544         if (!is_dx(dir))
1545                 return 0;
1546         if (block == 0)
1547                 return 1;
1548         if (de->inode == 0 &&
1549             ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
1550                         sb->s_blocksize)
1551                 return 1;
1552         return 0;
1553 }
1554
1555 /*
1556  *      __ext4_find_entry()
1557  *
1558  * finds an entry in the specified directory with the wanted name. It
1559  * returns the cache buffer in which the entry was found, and the entry
1560  * itself (as a parameter - res_dir). It does NOT read the inode of the
1561  * entry - you'll have to do that yourself if you want to.
1562  *
1563  * The returned buffer_head has ->b_count elevated.  The caller is expected
1564  * to brelse() it when appropriate.
1565  */
1566 static struct buffer_head *__ext4_find_entry(struct inode *dir,
1567                                              struct ext4_filename *fname,
1568                                              struct ext4_dir_entry_2 **res_dir,
1569                                              int *inlined)
1570 {
1571         struct super_block *sb;
1572         struct buffer_head *bh_use[NAMEI_RA_SIZE];
1573         struct buffer_head *bh, *ret = NULL;
1574         ext4_lblk_t start, block;
1575         const u8 *name = fname->usr_fname->name;
1576         size_t ra_max = 0;      /* Number of bh's in the readahead
1577                                    buffer, bh_use[] */
1578         size_t ra_ptr = 0;      /* Current index into readahead
1579                                    buffer */
1580         ext4_lblk_t  nblocks;
1581         int i, namelen, retval;
1582
1583         *res_dir = NULL;
1584         sb = dir->i_sb;
1585         namelen = fname->usr_fname->len;
1586         if (namelen > EXT4_NAME_LEN)
1587                 return NULL;
1588
1589         if (ext4_has_inline_data(dir)) {
1590                 int has_inline_data = 1;
1591                 ret = ext4_find_inline_entry(dir, fname, res_dir,
1592                                              &has_inline_data);
1593                 if (has_inline_data) {
1594                         if (inlined)
1595                                 *inlined = 1;
1596                         goto cleanup_and_exit;
1597                 }
1598         }
1599
1600         if ((namelen <= 2) && (name[0] == '.') &&
1601             (name[1] == '.' || name[1] == '\0')) {
1602                 /*
1603                  * "." or ".." will only be in the first block
1604                  * NFS may look up ".."; "." should be handled by the VFS
1605                  */
1606                 block = start = 0;
1607                 nblocks = 1;
1608                 goto restart;
1609         }
1610         if (is_dx(dir)) {
1611                 ret = ext4_dx_find_entry(dir, fname, res_dir);
1612                 /*
1613                  * On success, or if the error was file not found,
1614                  * return.  Otherwise, fall back to doing a search the
1615                  * old fashioned way.
1616                  */
1617                 if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR)
1618                         goto cleanup_and_exit;
1619                 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1620                                "falling back\n"));
1621                 ret = NULL;
1622         }
1623         nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1624         if (!nblocks) {
1625                 ret = NULL;
1626                 goto cleanup_and_exit;
1627         }
1628         start = EXT4_I(dir)->i_dir_start_lookup;
1629         if (start >= nblocks)
1630                 start = 0;
1631         block = start;
1632 restart:
1633         do {
1634                 /*
1635                  * We deal with the read-ahead logic here.
1636                  */
1637                 cond_resched();
1638                 if (ra_ptr >= ra_max) {
1639                         /* Refill the readahead buffer */
1640                         ra_ptr = 0;
1641                         if (block < start)
1642                                 ra_max = start - block;
1643                         else
1644                                 ra_max = nblocks - block;
1645                         ra_max = min(ra_max, ARRAY_SIZE(bh_use));
1646                         retval = ext4_bread_batch(dir, block, ra_max,
1647                                                   false /* wait */, bh_use);
1648                         if (retval) {
1649                                 ret = ERR_PTR(retval);
1650                                 ra_max = 0;
1651                                 goto cleanup_and_exit;
1652                         }
1653                 }
1654                 if ((bh = bh_use[ra_ptr++]) == NULL)
1655                         goto next;
1656                 wait_on_buffer(bh);
1657                 if (!buffer_uptodate(bh)) {
1658                         EXT4_ERROR_INODE_ERR(dir, EIO,
1659                                              "reading directory lblock %lu",
1660                                              (unsigned long) block);
1661                         brelse(bh);
1662                         ret = ERR_PTR(-EIO);
1663                         goto cleanup_and_exit;
1664                 }
1665                 if (!buffer_verified(bh) &&
1666                     !is_dx_internal_node(dir, block,
1667                                          (struct ext4_dir_entry *)bh->b_data) &&
1668                     !ext4_dirblock_csum_verify(dir, bh)) {
1669                         EXT4_ERROR_INODE_ERR(dir, EFSBADCRC,
1670                                              "checksumming directory "
1671                                              "block %lu", (unsigned long)block);
1672                         brelse(bh);
1673                         ret = ERR_PTR(-EFSBADCRC);
1674                         goto cleanup_and_exit;
1675                 }
1676                 set_buffer_verified(bh);
1677                 i = search_dirblock(bh, dir, fname,
1678                             block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1679                 if (i == 1) {
1680                         EXT4_I(dir)->i_dir_start_lookup = block;
1681                         ret = bh;
1682                         goto cleanup_and_exit;
1683                 } else {
1684                         brelse(bh);
1685                         if (i < 0)
1686                                 goto cleanup_and_exit;
1687                 }
1688         next:
1689                 if (++block >= nblocks)
1690                         block = 0;
1691         } while (block != start);
1692
1693         /*
1694          * If the directory has grown while we were searching, then
1695          * search the last part of the directory before giving up.
1696          */
1697         block = nblocks;
1698         nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1699         if (block < nblocks) {
1700                 start = 0;
1701                 goto restart;
1702         }
1703
1704 cleanup_and_exit:
1705         /* Clean up the read-ahead blocks */
1706         for (; ra_ptr < ra_max; ra_ptr++)
1707                 brelse(bh_use[ra_ptr]);
1708         return ret;
1709 }
1710
1711 static struct buffer_head *ext4_find_entry(struct inode *dir,
1712                                            const struct qstr *d_name,
1713                                            struct ext4_dir_entry_2 **res_dir,
1714                                            int *inlined)
1715 {
1716         int err;
1717         struct ext4_filename fname;
1718         struct buffer_head *bh;
1719
1720         err = ext4_fname_setup_filename(dir, d_name, 1, &fname);
1721         if (err == -ENOENT)
1722                 return NULL;
1723         if (err)
1724                 return ERR_PTR(err);
1725
1726         bh = __ext4_find_entry(dir, &fname, res_dir, inlined);
1727
1728         ext4_fname_free_filename(&fname);
1729         return bh;
1730 }
1731
1732 static struct buffer_head *ext4_lookup_entry(struct inode *dir,
1733                                              struct dentry *dentry,
1734                                              struct ext4_dir_entry_2 **res_dir)
1735 {
1736         int err;
1737         struct ext4_filename fname;
1738         struct buffer_head *bh;
1739
1740         err = ext4_fname_prepare_lookup(dir, dentry, &fname);
1741         generic_set_encrypted_ci_d_ops(dentry);
1742         if (err == -ENOENT)
1743                 return NULL;
1744         if (err)
1745                 return ERR_PTR(err);
1746
1747         bh = __ext4_find_entry(dir, &fname, res_dir, NULL);
1748
1749         ext4_fname_free_filename(&fname);
1750         return bh;
1751 }
1752
1753 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
1754                         struct ext4_filename *fname,
1755                         struct ext4_dir_entry_2 **res_dir)
1756 {
1757         struct super_block * sb = dir->i_sb;
1758         struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1759         struct buffer_head *bh;
1760         ext4_lblk_t block;
1761         int retval;
1762
1763 #ifdef CONFIG_FS_ENCRYPTION
1764         *res_dir = NULL;
1765 #endif
1766         frame = dx_probe(fname, dir, NULL, frames);
1767         if (IS_ERR(frame))
1768                 return (struct buffer_head *) frame;
1769         do {
1770                 block = dx_get_block(frame->at);
1771                 bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
1772                 if (IS_ERR(bh))
1773                         goto errout;
1774
1775                 retval = search_dirblock(bh, dir, fname,
1776                                          block << EXT4_BLOCK_SIZE_BITS(sb),
1777                                          res_dir);
1778                 if (retval == 1)
1779                         goto success;
1780                 brelse(bh);
1781                 if (retval == -1) {
1782                         bh = ERR_PTR(ERR_BAD_DX_DIR);
1783                         goto errout;
1784                 }
1785
1786                 /* Check to see if we should continue to search */
1787                 retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame,
1788                                                frames, NULL);
1789                 if (retval < 0) {
1790                         ext4_warning_inode(dir,
1791                                 "error %d reading directory index block",
1792                                 retval);
1793                         bh = ERR_PTR(retval);
1794                         goto errout;
1795                 }
1796         } while (retval == 1);
1797
1798         bh = NULL;
1799 errout:
1800         dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name));
1801 success:
1802         dx_release(frames);
1803         return bh;
1804 }
1805
1806 static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1807 {
1808         struct inode *inode;
1809         struct ext4_dir_entry_2 *de;
1810         struct buffer_head *bh;
1811
1812         if (dentry->d_name.len > EXT4_NAME_LEN)
1813                 return ERR_PTR(-ENAMETOOLONG);
1814
1815         bh = ext4_lookup_entry(dir, dentry, &de);
1816         if (IS_ERR(bh))
1817                 return ERR_CAST(bh);
1818         inode = NULL;
1819         if (bh) {
1820                 __u32 ino = le32_to_cpu(de->inode);
1821                 brelse(bh);
1822                 if (!ext4_valid_inum(dir->i_sb, ino)) {
1823                         EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
1824                         return ERR_PTR(-EFSCORRUPTED);
1825                 }
1826                 if (unlikely(ino == dir->i_ino)) {
1827                         EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir",
1828                                          dentry);
1829                         return ERR_PTR(-EFSCORRUPTED);
1830                 }
1831                 inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL);
1832                 if (inode == ERR_PTR(-ESTALE)) {
1833                         EXT4_ERROR_INODE(dir,
1834                                          "deleted inode referenced: %u",
1835                                          ino);
1836                         return ERR_PTR(-EFSCORRUPTED);
1837                 }
1838                 if (!IS_ERR(inode) && IS_ENCRYPTED(dir) &&
1839                     (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
1840                     !fscrypt_has_permitted_context(dir, inode)) {
1841                         ext4_warning(inode->i_sb,
1842                                      "Inconsistent encryption contexts: %lu/%lu",
1843                                      dir->i_ino, inode->i_ino);
1844                         iput(inode);
1845                         return ERR_PTR(-EPERM);
1846                 }
1847         }
1848
1849 #if IS_ENABLED(CONFIG_UNICODE)
1850         if (!inode && IS_CASEFOLDED(dir)) {
1851                 /* Eventually we want to call d_add_ci(dentry, NULL)
1852                  * for negative dentries in the encoding case as
1853                  * well.  For now, prevent the negative dentry
1854                  * from being cached.
1855                  */
1856                 return NULL;
1857         }
1858 #endif
1859         return d_splice_alias(inode, dentry);
1860 }
1861
1862
1863 struct dentry *ext4_get_parent(struct dentry *child)
1864 {
1865         __u32 ino;
1866         struct ext4_dir_entry_2 * de;
1867         struct buffer_head *bh;
1868
1869         bh = ext4_find_entry(d_inode(child), &dotdot_name, &de, NULL);
1870         if (IS_ERR(bh))
1871                 return ERR_CAST(bh);
1872         if (!bh)
1873                 return ERR_PTR(-ENOENT);
1874         ino = le32_to_cpu(de->inode);
1875         brelse(bh);
1876
1877         if (!ext4_valid_inum(child->d_sb, ino)) {
1878                 EXT4_ERROR_INODE(d_inode(child),
1879                                  "bad parent inode number: %u", ino);
1880                 return ERR_PTR(-EFSCORRUPTED);
1881         }
1882
1883         return d_obtain_alias(ext4_iget(child->d_sb, ino, EXT4_IGET_NORMAL));
1884 }
1885
1886 /*
1887  * Move count entries from end of map between two memory locations.
1888  * Returns pointer to last entry moved.
1889  */
1890 static struct ext4_dir_entry_2 *
1891 dx_move_dirents(struct inode *dir, char *from, char *to,
1892                 struct dx_map_entry *map, int count,
1893                 unsigned blocksize)
1894 {
1895         unsigned rec_len = 0;
1896
1897         while (count--) {
1898                 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1899                                                 (from + (map->offs<<2));
1900                 rec_len = ext4_dir_rec_len(de->name_len, dir);
1901
1902                 memcpy (to, de, rec_len);
1903                 ((struct ext4_dir_entry_2 *) to)->rec_len =
1904                                 ext4_rec_len_to_disk(rec_len, blocksize);
1905
1906                 /* wipe dir_entry excluding the rec_len field */
1907                 de->inode = 0;
1908                 memset(&de->name_len, 0, ext4_rec_len_from_disk(de->rec_len,
1909                                                                 blocksize) -
1910                                          offsetof(struct ext4_dir_entry_2,
1911                                                                 name_len));
1912
1913                 map++;
1914                 to += rec_len;
1915         }
1916         return (struct ext4_dir_entry_2 *) (to - rec_len);
1917 }
1918
1919 /*
1920  * Compact each dir entry in the range to the minimal rec_len.
1921  * Returns pointer to last entry in range.
1922  */
1923 static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
1924                                                         unsigned int blocksize)
1925 {
1926         struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
1927         unsigned rec_len = 0;
1928
1929         prev = to = de;
1930         while ((char*)de < base + blocksize) {
1931                 next = ext4_next_entry(de, blocksize);
1932                 if (de->inode && de->name_len) {
1933                         rec_len = ext4_dir_rec_len(de->name_len, dir);
1934                         if (de > to)
1935                                 memmove(to, de, rec_len);
1936                         to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
1937                         prev = to;
1938                         to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
1939                 }
1940                 de = next;
1941         }
1942         return prev;
1943 }
1944
1945 /*
1946  * Split a full leaf block to make room for a new dir entry.
1947  * Allocate a new block, and move entries so that they are approx. equally full.
1948  * Returns pointer to de in block into which the new entry will be inserted.
1949  */
1950 static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1951                         struct buffer_head **bh,struct dx_frame *frame,
1952                         struct dx_hash_info *hinfo)
1953 {
1954         unsigned blocksize = dir->i_sb->s_blocksize;
1955         unsigned continued;
1956         int count;
1957         struct buffer_head *bh2;
1958         ext4_lblk_t newblock;
1959         u32 hash2;
1960         struct dx_map_entry *map;
1961         char *data1 = (*bh)->b_data, *data2;
1962         unsigned split, move, size;
1963         struct ext4_dir_entry_2 *de = NULL, *de2;
1964         int     csum_size = 0;
1965         int     err = 0, i;
1966
1967         if (ext4_has_metadata_csum(dir->i_sb))
1968                 csum_size = sizeof(struct ext4_dir_entry_tail);
1969
1970         bh2 = ext4_append(handle, dir, &newblock);
1971         if (IS_ERR(bh2)) {
1972                 brelse(*bh);
1973                 *bh = NULL;
1974                 return (struct ext4_dir_entry_2 *) bh2;
1975         }
1976
1977         BUFFER_TRACE(*bh, "get_write_access");
1978         err = ext4_journal_get_write_access(handle, dir->i_sb, *bh,
1979                                             EXT4_JTR_NONE);
1980         if (err)
1981                 goto journal_error;
1982
1983         BUFFER_TRACE(frame->bh, "get_write_access");
1984         err = ext4_journal_get_write_access(handle, dir->i_sb, frame->bh,
1985                                             EXT4_JTR_NONE);
1986         if (err)
1987                 goto journal_error;
1988
1989         data2 = bh2->b_data;
1990
1991         /* create map in the end of data2 block */
1992         map = (struct dx_map_entry *) (data2 + blocksize);
1993         count = dx_make_map(dir, *bh, hinfo, map);
1994         if (count < 0) {
1995                 err = count;
1996                 goto journal_error;
1997         }
1998         map -= count;
1999         dx_sort_map(map, count);
2000         /* Ensure that neither split block is over half full */
2001         size = 0;
2002         move = 0;
2003         for (i = count-1; i >= 0; i--) {
2004                 /* is more than half of this entry in 2nd half of the block? */
2005                 if (size + map[i].size/2 > blocksize/2)
2006                         break;
2007                 size += map[i].size;
2008                 move++;
2009         }
2010         /*
2011          * map index at which we will split
2012          *
2013          * If the sum of active entries didn't exceed half the block size, just
2014          * split it in half by count; each resulting block will have at least
2015          * half the space free.
2016          */
2017         if (i > 0)
2018                 split = count - move;
2019         else
2020                 split = count/2;
2021
2022         hash2 = map[split].hash;
2023         continued = hash2 == map[split - 1].hash;
2024         dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
2025                         (unsigned long)dx_get_block(frame->at),
2026                                         hash2, split, count-split));
2027
2028         /* Fancy dance to stay within two buffers */
2029         de2 = dx_move_dirents(dir, data1, data2, map + split, count - split,
2030                               blocksize);
2031         de = dx_pack_dirents(dir, data1, blocksize);
2032         de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
2033                                            (char *) de,
2034                                            blocksize);
2035         de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
2036                                             (char *) de2,
2037                                             blocksize);
2038         if (csum_size) {
2039                 ext4_initialize_dirent_tail(*bh, blocksize);
2040                 ext4_initialize_dirent_tail(bh2, blocksize);
2041         }
2042
2043         dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
2044                         blocksize, 1));
2045         dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2,
2046                         blocksize, 1));
2047
2048         /* Which block gets the new entry? */
2049         if (hinfo->hash >= hash2) {
2050                 swap(*bh, bh2);
2051                 de = de2;
2052         }
2053         dx_insert_block(frame, hash2 + continued, newblock);
2054         err = ext4_handle_dirty_dirblock(handle, dir, bh2);
2055         if (err)
2056                 goto journal_error;
2057         err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2058         if (err)
2059                 goto journal_error;
2060         brelse(bh2);
2061         dxtrace(dx_show_index("frame", frame->entries));
2062         return de;
2063
2064 journal_error:
2065         brelse(*bh);
2066         brelse(bh2);
2067         *bh = NULL;
2068         ext4_std_error(dir->i_sb, err);
2069         return ERR_PTR(err);
2070 }
2071
2072 int ext4_find_dest_de(struct inode *dir, struct inode *inode,
2073                       struct buffer_head *bh,
2074                       void *buf, int buf_size,
2075                       struct ext4_filename *fname,
2076                       struct ext4_dir_entry_2 **dest_de)
2077 {
2078         struct ext4_dir_entry_2 *de;
2079         unsigned short reclen = ext4_dir_rec_len(fname_len(fname), dir);
2080         int nlen, rlen;
2081         unsigned int offset = 0;
2082         char *top;
2083
2084         de = buf;
2085         top = buf + buf_size - reclen;
2086         while ((char *) de <= top) {
2087                 if (ext4_check_dir_entry(dir, NULL, de, bh,
2088                                          buf, buf_size, offset))
2089                         return -EFSCORRUPTED;
2090                 if (ext4_match(dir, fname, de))
2091                         return -EEXIST;
2092                 nlen = ext4_dir_rec_len(de->name_len, dir);
2093                 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
2094                 if ((de->inode ? rlen - nlen : rlen) >= reclen)
2095                         break;
2096                 de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
2097                 offset += rlen;
2098         }
2099         if ((char *) de > top)
2100                 return -ENOSPC;
2101
2102         *dest_de = de;
2103         return 0;
2104 }
2105
2106 void ext4_insert_dentry(struct inode *dir,
2107                         struct inode *inode,
2108                         struct ext4_dir_entry_2 *de,
2109                         int buf_size,
2110                         struct ext4_filename *fname)
2111 {
2112
2113         int nlen, rlen;
2114
2115         nlen = ext4_dir_rec_len(de->name_len, dir);
2116         rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
2117         if (de->inode) {
2118                 struct ext4_dir_entry_2 *de1 =
2119                         (struct ext4_dir_entry_2 *)((char *)de + nlen);
2120                 de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
2121                 de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
2122                 de = de1;
2123         }
2124         de->file_type = EXT4_FT_UNKNOWN;
2125         de->inode = cpu_to_le32(inode->i_ino);
2126         ext4_set_de_type(inode->i_sb, de, inode->i_mode);
2127         de->name_len = fname_len(fname);
2128         memcpy(de->name, fname_name(fname), fname_len(fname));
2129         if (ext4_hash_in_dirent(dir)) {
2130                 struct dx_hash_info *hinfo = &fname->hinfo;
2131
2132                 EXT4_DIRENT_HASHES(de)->hash = cpu_to_le32(hinfo->hash);
2133                 EXT4_DIRENT_HASHES(de)->minor_hash =
2134                                                 cpu_to_le32(hinfo->minor_hash);
2135         }
2136 }
2137
2138 /*
2139  * Add a new entry into a directory (leaf) block.  If de is non-NULL,
2140  * it points to a directory entry which is guaranteed to be large
2141  * enough for new directory entry.  If de is NULL, then
2142  * add_dirent_to_buf will attempt search the directory block for
2143  * space.  It will return -ENOSPC if no space is available, and -EIO
2144  * and -EEXIST if directory entry already exists.
2145  */
2146 static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
2147                              struct inode *dir,
2148                              struct inode *inode, struct ext4_dir_entry_2 *de,
2149                              struct buffer_head *bh)
2150 {
2151         unsigned int    blocksize = dir->i_sb->s_blocksize;
2152         int             csum_size = 0;
2153         int             err, err2;
2154
2155         if (ext4_has_metadata_csum(inode->i_sb))
2156                 csum_size = sizeof(struct ext4_dir_entry_tail);
2157
2158         if (!de) {
2159                 err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
2160                                         blocksize - csum_size, fname, &de);
2161                 if (err)
2162                         return err;
2163         }
2164         BUFFER_TRACE(bh, "get_write_access");
2165         err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2166                                             EXT4_JTR_NONE);
2167         if (err) {
2168                 ext4_std_error(dir->i_sb, err);
2169                 return err;
2170         }
2171
2172         /* By now the buffer is marked for journaling */
2173         ext4_insert_dentry(dir, inode, de, blocksize, fname);
2174
2175         /*
2176          * XXX shouldn't update any times until successful
2177          * completion of syscall, but too many callers depend
2178          * on this.
2179          *
2180          * XXX similarly, too many callers depend on
2181          * ext4_new_inode() setting the times, but error
2182          * recovery deletes the inode, so the worst that can
2183          * happen is that the times are slightly out of date
2184          * and/or different from the directory change time.
2185          */
2186         dir->i_mtime = dir->i_ctime = current_time(dir);
2187         ext4_update_dx_flag(dir);
2188         inode_inc_iversion(dir);
2189         err2 = ext4_mark_inode_dirty(handle, dir);
2190         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2191         err = ext4_handle_dirty_dirblock(handle, dir, bh);
2192         if (err)
2193                 ext4_std_error(dir->i_sb, err);
2194         return err ? err : err2;
2195 }
2196
2197 /*
2198  * This converts a one block unindexed directory to a 3 block indexed
2199  * directory, and adds the dentry to the indexed directory.
2200  */
2201 static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
2202                             struct inode *dir,
2203                             struct inode *inode, struct buffer_head *bh)
2204 {
2205         struct buffer_head *bh2;
2206         struct dx_root  *root;
2207         struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
2208         struct dx_entry *entries;
2209         struct ext4_dir_entry_2 *de, *de2;
2210         char            *data2, *top;
2211         unsigned        len;
2212         int             retval;
2213         unsigned        blocksize;
2214         ext4_lblk_t  block;
2215         struct fake_dirent *fde;
2216         int csum_size = 0;
2217
2218         if (ext4_has_metadata_csum(inode->i_sb))
2219                 csum_size = sizeof(struct ext4_dir_entry_tail);
2220
2221         blocksize =  dir->i_sb->s_blocksize;
2222         dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
2223         BUFFER_TRACE(bh, "get_write_access");
2224         retval = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2225                                                EXT4_JTR_NONE);
2226         if (retval) {
2227                 ext4_std_error(dir->i_sb, retval);
2228                 brelse(bh);
2229                 return retval;
2230         }
2231         root = (struct dx_root *) bh->b_data;
2232
2233         /* The 0th block becomes the root, move the dirents out */
2234         fde = &root->dotdot;
2235         de = (struct ext4_dir_entry_2 *)((char *)fde +
2236                 ext4_rec_len_from_disk(fde->rec_len, blocksize));
2237         if ((char *) de >= (((char *) root) + blocksize)) {
2238                 EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
2239                 brelse(bh);
2240                 return -EFSCORRUPTED;
2241         }
2242         len = ((char *) root) + (blocksize - csum_size) - (char *) de;
2243
2244         /* Allocate new block for the 0th block's dirents */
2245         bh2 = ext4_append(handle, dir, &block);
2246         if (IS_ERR(bh2)) {
2247                 brelse(bh);
2248                 return PTR_ERR(bh2);
2249         }
2250         ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
2251         data2 = bh2->b_data;
2252
2253         memcpy(data2, de, len);
2254         memset(de, 0, len); /* wipe old data */
2255         de = (struct ext4_dir_entry_2 *) data2;
2256         top = data2 + len;
2257         while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
2258                 de = de2;
2259         de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
2260                                            (char *) de, blocksize);
2261
2262         if (csum_size)
2263                 ext4_initialize_dirent_tail(bh2, blocksize);
2264
2265         /* Initialize the root; the dot dirents already exist */
2266         de = (struct ext4_dir_entry_2 *) (&root->dotdot);
2267         de->rec_len = ext4_rec_len_to_disk(
2268                         blocksize - ext4_dir_rec_len(2, NULL), blocksize);
2269         memset (&root->info, 0, sizeof(root->info));
2270         root->info.info_length = sizeof(root->info);
2271         if (ext4_hash_in_dirent(dir))
2272                 root->info.hash_version = DX_HASH_SIPHASH;
2273         else
2274                 root->info.hash_version =
2275                                 EXT4_SB(dir->i_sb)->s_def_hash_version;
2276
2277         entries = root->entries;
2278         dx_set_block(entries, 1);
2279         dx_set_count(entries, 1);
2280         dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
2281
2282         /* Initialize as for dx_probe */
2283         fname->hinfo.hash_version = root->info.hash_version;
2284         if (fname->hinfo.hash_version <= DX_HASH_TEA)
2285                 fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
2286         fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
2287
2288         /* casefolded encrypted hashes are computed on fname setup */
2289         if (!ext4_hash_in_dirent(dir))
2290                 ext4fs_dirhash(dir, fname_name(fname),
2291                                 fname_len(fname), &fname->hinfo);
2292
2293         memset(frames, 0, sizeof(frames));
2294         frame = frames;
2295         frame->entries = entries;
2296         frame->at = entries;
2297         frame->bh = bh;
2298
2299         retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2300         if (retval)
2301                 goto out_frames;
2302         retval = ext4_handle_dirty_dirblock(handle, dir, bh2);
2303         if (retval)
2304                 goto out_frames;
2305
2306         de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
2307         if (IS_ERR(de)) {
2308                 retval = PTR_ERR(de);
2309                 goto out_frames;
2310         }
2311
2312         retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
2313 out_frames:
2314         /*
2315          * Even if the block split failed, we have to properly write
2316          * out all the changes we did so far. Otherwise we can end up
2317          * with corrupted filesystem.
2318          */
2319         if (retval)
2320                 ext4_mark_inode_dirty(handle, dir);
2321         dx_release(frames);
2322         brelse(bh2);
2323         return retval;
2324 }
2325
2326 /*
2327  *      ext4_add_entry()
2328  *
2329  * adds a file entry to the specified directory, using the same
2330  * semantics as ext4_find_entry(). It returns NULL if it failed.
2331  *
2332  * NOTE!! The inode part of 'de' is left at 0 - which means you
2333  * may not sleep between calling this and putting something into
2334  * the entry, as someone else might have used it while you slept.
2335  */
2336 static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
2337                           struct inode *inode)
2338 {
2339         struct inode *dir = d_inode(dentry->d_parent);
2340         struct buffer_head *bh = NULL;
2341         struct ext4_dir_entry_2 *de;
2342         struct super_block *sb;
2343         struct ext4_filename fname;
2344         int     retval;
2345         int     dx_fallback=0;
2346         unsigned blocksize;
2347         ext4_lblk_t block, blocks;
2348         int     csum_size = 0;
2349
2350         if (ext4_has_metadata_csum(inode->i_sb))
2351                 csum_size = sizeof(struct ext4_dir_entry_tail);
2352
2353         sb = dir->i_sb;
2354         blocksize = sb->s_blocksize;
2355         if (!dentry->d_name.len)
2356                 return -EINVAL;
2357
2358         if (fscrypt_is_nokey_name(dentry))
2359                 return -ENOKEY;
2360
2361 #if IS_ENABLED(CONFIG_UNICODE)
2362         if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
2363             sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
2364                 return -EINVAL;
2365 #endif
2366
2367         retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
2368         if (retval)
2369                 return retval;
2370
2371         if (ext4_has_inline_data(dir)) {
2372                 retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
2373                 if (retval < 0)
2374                         goto out;
2375                 if (retval == 1) {
2376                         retval = 0;
2377                         goto out;
2378                 }
2379         }
2380
2381         if (is_dx(dir)) {
2382                 retval = ext4_dx_add_entry(handle, &fname, dir, inode);
2383                 if (!retval || (retval != ERR_BAD_DX_DIR))
2384                         goto out;
2385                 /* Can we just ignore htree data? */
2386                 if (ext4_has_metadata_csum(sb)) {
2387                         EXT4_ERROR_INODE(dir,
2388                                 "Directory has corrupted htree index.");
2389                         retval = -EFSCORRUPTED;
2390                         goto out;
2391                 }
2392                 ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
2393                 dx_fallback++;
2394                 retval = ext4_mark_inode_dirty(handle, dir);
2395                 if (unlikely(retval))
2396                         goto out;
2397         }
2398         blocks = dir->i_size >> sb->s_blocksize_bits;
2399         for (block = 0; block < blocks; block++) {
2400                 bh = ext4_read_dirblock(dir, block, DIRENT);
2401                 if (bh == NULL) {
2402                         bh = ext4_bread(handle, dir, block,
2403                                         EXT4_GET_BLOCKS_CREATE);
2404                         goto add_to_new_block;
2405                 }
2406                 if (IS_ERR(bh)) {
2407                         retval = PTR_ERR(bh);
2408                         bh = NULL;
2409                         goto out;
2410                 }
2411                 retval = add_dirent_to_buf(handle, &fname, dir, inode,
2412                                            NULL, bh);
2413                 if (retval != -ENOSPC)
2414                         goto out;
2415
2416                 if (blocks == 1 && !dx_fallback &&
2417                     ext4_has_feature_dir_index(sb)) {
2418                         retval = make_indexed_dir(handle, &fname, dir,
2419                                                   inode, bh);
2420                         bh = NULL; /* make_indexed_dir releases bh */
2421                         goto out;
2422                 }
2423                 brelse(bh);
2424         }
2425         bh = ext4_append(handle, dir, &block);
2426 add_to_new_block:
2427         if (IS_ERR(bh)) {
2428                 retval = PTR_ERR(bh);
2429                 bh = NULL;
2430                 goto out;
2431         }
2432         de = (struct ext4_dir_entry_2 *) bh->b_data;
2433         de->inode = 0;
2434         de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
2435
2436         if (csum_size)
2437                 ext4_initialize_dirent_tail(bh, blocksize);
2438
2439         retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
2440 out:
2441         ext4_fname_free_filename(&fname);
2442         brelse(bh);
2443         if (retval == 0)
2444                 ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
2445         return retval;
2446 }
2447
2448 /*
2449  * Returns 0 for success, or a negative error value
2450  */
2451 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2452                              struct inode *dir, struct inode *inode)
2453 {
2454         struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
2455         struct dx_entry *entries, *at;
2456         struct buffer_head *bh;
2457         struct super_block *sb = dir->i_sb;
2458         struct ext4_dir_entry_2 *de;
2459         int restart;
2460         int err;
2461
2462 again:
2463         restart = 0;
2464         frame = dx_probe(fname, dir, NULL, frames);
2465         if (IS_ERR(frame))
2466                 return PTR_ERR(frame);
2467         entries = frame->entries;
2468         at = frame->at;
2469         bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE);
2470         if (IS_ERR(bh)) {
2471                 err = PTR_ERR(bh);
2472                 bh = NULL;
2473                 goto cleanup;
2474         }
2475
2476         BUFFER_TRACE(bh, "get_write_access");
2477         err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
2478         if (err)
2479                 goto journal_error;
2480
2481         err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh);
2482         if (err != -ENOSPC)
2483                 goto cleanup;
2484
2485         err = 0;
2486         /* Block full, should compress but for now just split */
2487         dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
2488                        dx_get_count(entries), dx_get_limit(entries)));
2489         /* Need to split index? */
2490         if (dx_get_count(entries) == dx_get_limit(entries)) {
2491                 ext4_lblk_t newblock;
2492                 int levels = frame - frames + 1;
2493                 unsigned int icount;
2494                 int add_level = 1;
2495                 struct dx_entry *entries2;
2496                 struct dx_node *node2;
2497                 struct buffer_head *bh2;
2498
2499                 while (frame > frames) {
2500                         if (dx_get_count((frame - 1)->entries) <
2501                             dx_get_limit((frame - 1)->entries)) {
2502                                 add_level = 0;
2503                                 break;
2504                         }
2505                         frame--; /* split higher index block */
2506                         at = frame->at;
2507                         entries = frame->entries;
2508                         restart = 1;
2509                 }
2510                 if (add_level && levels == ext4_dir_htree_level(sb)) {
2511                         ext4_warning(sb, "Directory (ino: %lu) index full, "
2512                                          "reach max htree level :%d",
2513                                          dir->i_ino, levels);
2514                         if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
2515                                 ext4_warning(sb, "Large directory feature is "
2516                                                  "not enabled on this "
2517                                                  "filesystem");
2518                         }
2519                         err = -ENOSPC;
2520                         goto cleanup;
2521                 }
2522                 icount = dx_get_count(entries);
2523                 bh2 = ext4_append(handle, dir, &newblock);
2524                 if (IS_ERR(bh2)) {
2525                         err = PTR_ERR(bh2);
2526                         goto cleanup;
2527                 }
2528                 node2 = (struct dx_node *)(bh2->b_data);
2529                 entries2 = node2->entries;
2530                 memset(&node2->fake, 0, sizeof(struct fake_dirent));
2531                 node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
2532                                                            sb->s_blocksize);
2533                 BUFFER_TRACE(frame->bh, "get_write_access");
2534                 err = ext4_journal_get_write_access(handle, sb, frame->bh,
2535                                                     EXT4_JTR_NONE);
2536                 if (err)
2537                         goto journal_error;
2538                 if (!add_level) {
2539                         unsigned icount1 = icount/2, icount2 = icount - icount1;
2540                         unsigned hash2 = dx_get_hash(entries + icount1);
2541                         dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
2542                                        icount1, icount2));
2543
2544                         BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
2545                         err = ext4_journal_get_write_access(handle, sb,
2546                                                             (frame - 1)->bh,
2547                                                             EXT4_JTR_NONE);
2548                         if (err)
2549                                 goto journal_error;
2550
2551                         memcpy((char *) entries2, (char *) (entries + icount1),
2552                                icount2 * sizeof(struct dx_entry));
2553                         dx_set_count(entries, icount1);
2554                         dx_set_count(entries2, icount2);
2555                         dx_set_limit(entries2, dx_node_limit(dir));
2556
2557                         /* Which index block gets the new entry? */
2558                         if (at - entries >= icount1) {
2559                                 frame->at = at - entries - icount1 + entries2;
2560                                 frame->entries = entries = entries2;
2561                                 swap(frame->bh, bh2);
2562                         }
2563                         dx_insert_block((frame - 1), hash2, newblock);
2564                         dxtrace(dx_show_index("node", frame->entries));
2565                         dxtrace(dx_show_index("node",
2566                                ((struct dx_node *) bh2->b_data)->entries));
2567                         err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2568                         if (err)
2569                                 goto journal_error;
2570                         brelse (bh2);
2571                         err = ext4_handle_dirty_dx_node(handle, dir,
2572                                                    (frame - 1)->bh);
2573                         if (err)
2574                                 goto journal_error;
2575                         err = ext4_handle_dirty_dx_node(handle, dir,
2576                                                         frame->bh);
2577                         if (restart || err)
2578                                 goto journal_error;
2579                 } else {
2580                         struct dx_root *dxroot;
2581                         memcpy((char *) entries2, (char *) entries,
2582                                icount * sizeof(struct dx_entry));
2583                         dx_set_limit(entries2, dx_node_limit(dir));
2584
2585                         /* Set up root */
2586                         dx_set_count(entries, 1);
2587                         dx_set_block(entries + 0, newblock);
2588                         dxroot = (struct dx_root *)frames[0].bh->b_data;
2589                         dxroot->info.indirect_levels += 1;
2590                         dxtrace(printk(KERN_DEBUG
2591                                        "Creating %d level index...\n",
2592                                        dxroot->info.indirect_levels));
2593                         err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2594                         if (err)
2595                                 goto journal_error;
2596                         err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2597                         brelse(bh2);
2598                         restart = 1;
2599                         goto journal_error;
2600                 }
2601         }
2602         de = do_split(handle, dir, &bh, frame, &fname->hinfo);
2603         if (IS_ERR(de)) {
2604                 err = PTR_ERR(de);
2605                 goto cleanup;
2606         }
2607         err = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
2608         goto cleanup;
2609
2610 journal_error:
2611         ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
2612 cleanup:
2613         brelse(bh);
2614         dx_release(frames);
2615         /* @restart is true means htree-path has been changed, we need to
2616          * repeat dx_probe() to find out valid htree-path
2617          */
2618         if (restart && err == 0)
2619                 goto again;
2620         return err;
2621 }
2622
2623 /*
2624  * ext4_generic_delete_entry deletes a directory entry by merging it
2625  * with the previous entry
2626  */
2627 int ext4_generic_delete_entry(struct inode *dir,
2628                               struct ext4_dir_entry_2 *de_del,
2629                               struct buffer_head *bh,
2630                               void *entry_buf,
2631                               int buf_size,
2632                               int csum_size)
2633 {
2634         struct ext4_dir_entry_2 *de, *pde;
2635         unsigned int blocksize = dir->i_sb->s_blocksize;
2636         int i;
2637
2638         i = 0;
2639         pde = NULL;
2640         de = entry_buf;
2641         while (i < buf_size - csum_size) {
2642                 if (ext4_check_dir_entry(dir, NULL, de, bh,
2643                                          entry_buf, buf_size, i))
2644                         return -EFSCORRUPTED;
2645                 if (de == de_del)  {
2646                         if (pde) {
2647                                 pde->rec_len = ext4_rec_len_to_disk(
2648                                         ext4_rec_len_from_disk(pde->rec_len,
2649                                                                blocksize) +
2650                                         ext4_rec_len_from_disk(de->rec_len,
2651                                                                blocksize),
2652                                         blocksize);
2653
2654                                 /* wipe entire dir_entry */
2655                                 memset(de, 0, ext4_rec_len_from_disk(de->rec_len,
2656                                                                 blocksize));
2657                         } else {
2658                                 /* wipe dir_entry excluding the rec_len field */
2659                                 de->inode = 0;
2660                                 memset(&de->name_len, 0,
2661                                         ext4_rec_len_from_disk(de->rec_len,
2662                                                                 blocksize) -
2663                                         offsetof(struct ext4_dir_entry_2,
2664                                                                 name_len));
2665                         }
2666
2667                         inode_inc_iversion(dir);
2668                         return 0;
2669                 }
2670                 i += ext4_rec_len_from_disk(de->rec_len, blocksize);
2671                 pde = de;
2672                 de = ext4_next_entry(de, blocksize);
2673         }
2674         return -ENOENT;
2675 }
2676
2677 static int ext4_delete_entry(handle_t *handle,
2678                              struct inode *dir,
2679                              struct ext4_dir_entry_2 *de_del,
2680                              struct buffer_head *bh)
2681 {
2682         int err, csum_size = 0;
2683
2684         if (ext4_has_inline_data(dir)) {
2685                 int has_inline_data = 1;
2686                 err = ext4_delete_inline_entry(handle, dir, de_del, bh,
2687                                                &has_inline_data);
2688                 if (has_inline_data)
2689                         return err;
2690         }
2691
2692         if (ext4_has_metadata_csum(dir->i_sb))
2693                 csum_size = sizeof(struct ext4_dir_entry_tail);
2694
2695         BUFFER_TRACE(bh, "get_write_access");
2696         err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2697                                             EXT4_JTR_NONE);
2698         if (unlikely(err))
2699                 goto out;
2700
2701         err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data,
2702                                         dir->i_sb->s_blocksize, csum_size);
2703         if (err)
2704                 goto out;
2705
2706         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2707         err = ext4_handle_dirty_dirblock(handle, dir, bh);
2708         if (unlikely(err))
2709                 goto out;
2710
2711         return 0;
2712 out:
2713         if (err != -ENOENT)
2714                 ext4_std_error(dir->i_sb, err);
2715         return err;
2716 }
2717
2718 /*
2719  * Set directory link count to 1 if nlinks > EXT4_LINK_MAX, or if nlinks == 2
2720  * since this indicates that nlinks count was previously 1 to avoid overflowing
2721  * the 16-bit i_links_count field on disk.  Directories with i_nlink == 1 mean
2722  * that subdirectory link counts are not being maintained accurately.
2723  *
2724  * The caller has already checked for i_nlink overflow in case the DIR_LINK
2725  * feature is not enabled and returned -EMLINK.  The is_dx() check is a proxy
2726  * for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set
2727  * on regular files) and to avoid creating huge/slow non-HTREE directories.
2728  */
2729 static void ext4_inc_count(struct inode *inode)
2730 {
2731         inc_nlink(inode);
2732         if (is_dx(inode) &&
2733             (inode->i_nlink > EXT4_LINK_MAX || inode->i_nlink == 2))
2734                 set_nlink(inode, 1);
2735 }
2736
2737 /*
2738  * If a directory had nlink == 1, then we should let it be 1. This indicates
2739  * directory has >EXT4_LINK_MAX subdirs.
2740  */
2741 static void ext4_dec_count(struct inode *inode)
2742 {
2743         if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
2744                 drop_nlink(inode);
2745 }
2746
2747
2748 /*
2749  * Add non-directory inode to a directory. On success, the inode reference is
2750  * consumed by dentry is instantiation. This is also indicated by clearing of
2751  * *inodep pointer. On failure, the caller is responsible for dropping the
2752  * inode reference in the safe context.
2753  */
2754 static int ext4_add_nondir(handle_t *handle,
2755                 struct dentry *dentry, struct inode **inodep)
2756 {
2757         struct inode *dir = d_inode(dentry->d_parent);
2758         struct inode *inode = *inodep;
2759         int err = ext4_add_entry(handle, dentry, inode);
2760         if (!err) {
2761                 err = ext4_mark_inode_dirty(handle, inode);
2762                 if (IS_DIRSYNC(dir))
2763                         ext4_handle_sync(handle);
2764                 d_instantiate_new(dentry, inode);
2765                 *inodep = NULL;
2766                 return err;
2767         }
2768         drop_nlink(inode);
2769         ext4_orphan_add(handle, inode);
2770         unlock_new_inode(inode);
2771         return err;
2772 }
2773
2774 /*
2775  * By the time this is called, we already have created
2776  * the directory cache entry for the new file, but it
2777  * is so far negative - it has no inode.
2778  *
2779  * If the create succeeds, we fill in the inode information
2780  * with d_instantiate().
2781  */
2782 static int ext4_create(struct user_namespace *mnt_userns, struct inode *dir,
2783                        struct dentry *dentry, umode_t mode, bool excl)
2784 {
2785         handle_t *handle;
2786         struct inode *inode;
2787         int err, credits, retries = 0;
2788
2789         err = dquot_initialize(dir);
2790         if (err)
2791                 return err;
2792
2793         credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2794                    EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2795 retry:
2796         inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
2797                                             0, NULL, EXT4_HT_DIR, credits);
2798         handle = ext4_journal_current_handle();
2799         err = PTR_ERR(inode);
2800         if (!IS_ERR(inode)) {
2801                 inode->i_op = &ext4_file_inode_operations;
2802                 inode->i_fop = &ext4_file_operations;
2803                 ext4_set_aops(inode);
2804                 err = ext4_add_nondir(handle, dentry, &inode);
2805                 if (!err)
2806                         ext4_fc_track_create(handle, dentry);
2807         }
2808         if (handle)
2809                 ext4_journal_stop(handle);
2810         if (!IS_ERR_OR_NULL(inode))
2811                 iput(inode);
2812         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2813                 goto retry;
2814         return err;
2815 }
2816
2817 static int ext4_mknod(struct user_namespace *mnt_userns, struct inode *dir,
2818                       struct dentry *dentry, umode_t mode, dev_t rdev)
2819 {
2820         handle_t *handle;
2821         struct inode *inode;
2822         int err, credits, retries = 0;
2823
2824         err = dquot_initialize(dir);
2825         if (err)
2826                 return err;
2827
2828         credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2829                    EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2830 retry:
2831         inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
2832                                             0, NULL, EXT4_HT_DIR, credits);
2833         handle = ext4_journal_current_handle();
2834         err = PTR_ERR(inode);
2835         if (!IS_ERR(inode)) {
2836                 init_special_inode(inode, inode->i_mode, rdev);
2837                 inode->i_op = &ext4_special_inode_operations;
2838                 err = ext4_add_nondir(handle, dentry, &inode);
2839                 if (!err)
2840                         ext4_fc_track_create(handle, dentry);
2841         }
2842         if (handle)
2843                 ext4_journal_stop(handle);
2844         if (!IS_ERR_OR_NULL(inode))
2845                 iput(inode);
2846         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2847                 goto retry;
2848         return err;
2849 }
2850
2851 static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
2852                         struct dentry *dentry, umode_t mode)
2853 {
2854         handle_t *handle;
2855         struct inode *inode;
2856         int err, retries = 0;
2857
2858         err = dquot_initialize(dir);
2859         if (err)
2860                 return err;
2861
2862 retry:
2863         inode = ext4_new_inode_start_handle(mnt_userns, dir, mode,
2864                                             NULL, 0, NULL,
2865                                             EXT4_HT_DIR,
2866                         EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2867                           4 + EXT4_XATTR_TRANS_BLOCKS);
2868         handle = ext4_journal_current_handle();
2869         err = PTR_ERR(inode);
2870         if (!IS_ERR(inode)) {
2871                 inode->i_op = &ext4_file_inode_operations;
2872                 inode->i_fop = &ext4_file_operations;
2873                 ext4_set_aops(inode);
2874                 d_tmpfile(dentry, inode);
2875                 err = ext4_orphan_add(handle, inode);
2876                 if (err)
2877                         goto err_unlock_inode;
2878                 mark_inode_dirty(inode);
2879                 unlock_new_inode(inode);
2880         }
2881         if (handle)
2882                 ext4_journal_stop(handle);
2883         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2884                 goto retry;
2885         return err;
2886 err_unlock_inode:
2887         ext4_journal_stop(handle);
2888         unlock_new_inode(inode);
2889         return err;
2890 }
2891
2892 struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
2893                           struct ext4_dir_entry_2 *de,
2894                           int blocksize, int csum_size,
2895                           unsigned int parent_ino, int dotdot_real_len)
2896 {
2897         de->inode = cpu_to_le32(inode->i_ino);
2898         de->name_len = 1;
2899         de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL),
2900                                            blocksize);
2901         strcpy(de->name, ".");
2902         ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2903
2904         de = ext4_next_entry(de, blocksize);
2905         de->inode = cpu_to_le32(parent_ino);
2906         de->name_len = 2;
2907         if (!dotdot_real_len)
2908                 de->rec_len = ext4_rec_len_to_disk(blocksize -
2909                                         (csum_size + ext4_dir_rec_len(1, NULL)),
2910                                         blocksize);
2911         else
2912                 de->rec_len = ext4_rec_len_to_disk(
2913                                         ext4_dir_rec_len(de->name_len, NULL),
2914                                         blocksize);
2915         strcpy(de->name, "..");
2916         ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2917
2918         return ext4_next_entry(de, blocksize);
2919 }
2920
2921 int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2922                              struct inode *inode)
2923 {
2924         struct buffer_head *dir_block = NULL;
2925         struct ext4_dir_entry_2 *de;
2926         ext4_lblk_t block = 0;
2927         unsigned int blocksize = dir->i_sb->s_blocksize;
2928         int csum_size = 0;
2929         int err;
2930
2931         if (ext4_has_metadata_csum(dir->i_sb))
2932                 csum_size = sizeof(struct ext4_dir_entry_tail);
2933
2934         if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
2935                 err = ext4_try_create_inline_dir(handle, dir, inode);
2936                 if (err < 0 && err != -ENOSPC)
2937                         goto out;
2938                 if (!err)
2939                         goto out;
2940         }
2941
2942         inode->i_size = 0;
2943         dir_block = ext4_append(handle, inode, &block);
2944         if (IS_ERR(dir_block))
2945                 return PTR_ERR(dir_block);
2946         de = (struct ext4_dir_entry_2 *)dir_block->b_data;
2947         ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
2948         set_nlink(inode, 2);
2949         if (csum_size)
2950                 ext4_initialize_dirent_tail(dir_block, blocksize);
2951
2952         BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
2953         err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
2954         if (err)
2955                 goto out;
2956         set_buffer_verified(dir_block);
2957 out:
2958         brelse(dir_block);
2959         return err;
2960 }
2961
2962 static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
2963                       struct dentry *dentry, umode_t mode)
2964 {
2965         handle_t *handle;
2966         struct inode *inode;
2967         int err, err2 = 0, credits, retries = 0;
2968
2969         if (EXT4_DIR_LINK_MAX(dir))
2970                 return -EMLINK;
2971
2972         err = dquot_initialize(dir);
2973         if (err)
2974                 return err;
2975
2976         credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2977                    EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2978 retry:
2979         inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFDIR | mode,
2980                                             &dentry->d_name,
2981                                             0, NULL, EXT4_HT_DIR, credits);
2982         handle = ext4_journal_current_handle();
2983         err = PTR_ERR(inode);
2984         if (IS_ERR(inode))
2985                 goto out_stop;
2986
2987         inode->i_op = &ext4_dir_inode_operations;
2988         inode->i_fop = &ext4_dir_operations;
2989         err = ext4_init_new_dir(handle, dir, inode);
2990         if (err)
2991                 goto out_clear_inode;
2992         err = ext4_mark_inode_dirty(handle, inode);
2993         if (!err)
2994                 err = ext4_add_entry(handle, dentry, inode);
2995         if (err) {
2996 out_clear_inode:
2997                 clear_nlink(inode);
2998                 ext4_orphan_add(handle, inode);
2999                 unlock_new_inode(inode);
3000                 err2 = ext4_mark_inode_dirty(handle, inode);
3001                 if (unlikely(err2))
3002                         err = err2;
3003                 ext4_journal_stop(handle);
3004                 iput(inode);
3005                 goto out_retry;
3006         }
3007         ext4_inc_count(dir);
3008
3009         ext4_update_dx_flag(dir);
3010         err = ext4_mark_inode_dirty(handle, dir);
3011         if (err)
3012                 goto out_clear_inode;
3013         d_instantiate_new(dentry, inode);
3014         ext4_fc_track_create(handle, dentry);
3015         if (IS_DIRSYNC(dir))
3016                 ext4_handle_sync(handle);
3017
3018 out_stop:
3019         if (handle)
3020                 ext4_journal_stop(handle);
3021 out_retry:
3022         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3023                 goto retry;
3024         return err;
3025 }
3026
3027 /*
3028  * routine to check that the specified directory is empty (for rmdir)
3029  */
3030 bool ext4_empty_dir(struct inode *inode)
3031 {
3032         unsigned int offset;
3033         struct buffer_head *bh;
3034         struct ext4_dir_entry_2 *de;
3035         struct super_block *sb;
3036
3037         if (ext4_has_inline_data(inode)) {
3038                 int has_inline_data = 1;
3039                 int ret;
3040
3041                 ret = empty_inline_dir(inode, &has_inline_data);
3042                 if (has_inline_data)
3043                         return ret;
3044         }
3045
3046         sb = inode->i_sb;
3047         if (inode->i_size < ext4_dir_rec_len(1, NULL) +
3048                                         ext4_dir_rec_len(2, NULL)) {
3049                 EXT4_ERROR_INODE(inode, "invalid size");
3050                 return false;
3051         }
3052         /* The first directory block must not be a hole,
3053          * so treat it as DIRENT_HTREE
3054          */
3055         bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
3056         if (IS_ERR(bh))
3057                 return false;
3058
3059         de = (struct ext4_dir_entry_2 *) bh->b_data;
3060         if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
3061                                  0) ||
3062             le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
3063                 ext4_warning_inode(inode, "directory missing '.'");
3064                 brelse(bh);
3065                 return false;
3066         }
3067         offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3068         de = ext4_next_entry(de, sb->s_blocksize);
3069         if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
3070                                  offset) ||
3071             le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
3072                 ext4_warning_inode(inode, "directory missing '..'");
3073                 brelse(bh);
3074                 return false;
3075         }
3076         offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3077         while (offset < inode->i_size) {
3078                 if (!(offset & (sb->s_blocksize - 1))) {
3079                         unsigned int lblock;
3080                         brelse(bh);
3081                         lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
3082                         bh = ext4_read_dirblock(inode, lblock, EITHER);
3083                         if (bh == NULL) {
3084                                 offset += sb->s_blocksize;
3085                                 continue;
3086                         }
3087                         if (IS_ERR(bh))
3088                                 return false;
3089                 }
3090                 de = (struct ext4_dir_entry_2 *) (bh->b_data +
3091                                         (offset & (sb->s_blocksize - 1)));
3092                 if (ext4_check_dir_entry(inode, NULL, de, bh,
3093                                          bh->b_data, bh->b_size, offset) ||
3094                     le32_to_cpu(de->inode)) {
3095                         brelse(bh);
3096                         return false;
3097                 }
3098                 offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3099         }
3100         brelse(bh);
3101         return true;
3102 }
3103
3104 static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
3105 {
3106         int retval;
3107         struct inode *inode;
3108         struct buffer_head *bh;
3109         struct ext4_dir_entry_2 *de;
3110         handle_t *handle = NULL;
3111
3112         if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
3113                 return -EIO;
3114
3115         /* Initialize quotas before so that eventual writes go in
3116          * separate transaction */
3117         retval = dquot_initialize(dir);
3118         if (retval)
3119                 return retval;
3120         retval = dquot_initialize(d_inode(dentry));
3121         if (retval)
3122                 return retval;
3123
3124         retval = -ENOENT;
3125         bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
3126         if (IS_ERR(bh))
3127                 return PTR_ERR(bh);
3128         if (!bh)
3129                 goto end_rmdir;
3130
3131         inode = d_inode(dentry);
3132
3133         retval = -EFSCORRUPTED;
3134         if (le32_to_cpu(de->inode) != inode->i_ino)
3135                 goto end_rmdir;
3136
3137         retval = -ENOTEMPTY;
3138         if (!ext4_empty_dir(inode))
3139                 goto end_rmdir;
3140
3141         handle = ext4_journal_start(dir, EXT4_HT_DIR,
3142                                     EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
3143         if (IS_ERR(handle)) {
3144                 retval = PTR_ERR(handle);
3145                 handle = NULL;
3146                 goto end_rmdir;
3147         }
3148
3149         if (IS_DIRSYNC(dir))
3150                 ext4_handle_sync(handle);
3151
3152         retval = ext4_delete_entry(handle, dir, de, bh);
3153         if (retval)
3154                 goto end_rmdir;
3155         if (!EXT4_DIR_LINK_EMPTY(inode))
3156                 ext4_warning_inode(inode,
3157                              "empty directory '%.*s' has too many links (%u)",
3158                              dentry->d_name.len, dentry->d_name.name,
3159                              inode->i_nlink);
3160         inode_inc_iversion(inode);
3161         clear_nlink(inode);
3162         /* There's no need to set i_disksize: the fact that i_nlink is
3163          * zero will ensure that the right thing happens during any
3164          * recovery. */
3165         inode->i_size = 0;
3166         ext4_orphan_add(handle, inode);
3167         inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
3168         retval = ext4_mark_inode_dirty(handle, inode);
3169         if (retval)
3170                 goto end_rmdir;
3171         ext4_dec_count(dir);
3172         ext4_update_dx_flag(dir);
3173         ext4_fc_track_unlink(handle, dentry);
3174         retval = ext4_mark_inode_dirty(handle, dir);
3175
3176 #if IS_ENABLED(CONFIG_UNICODE)
3177         /* VFS negative dentries are incompatible with Encoding and
3178          * Case-insensitiveness. Eventually we'll want avoid
3179          * invalidating the dentries here, alongside with returning the
3180          * negative dentries at ext4_lookup(), when it is better
3181          * supported by the VFS for the CI case.
3182          */
3183         if (IS_CASEFOLDED(dir))
3184                 d_invalidate(dentry);
3185 #endif
3186
3187 end_rmdir:
3188         brelse(bh);
3189         if (handle)
3190                 ext4_journal_stop(handle);
3191         return retval;
3192 }
3193
3194 int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
3195                   struct inode *inode)
3196 {
3197         int retval = -ENOENT;
3198         struct buffer_head *bh;
3199         struct ext4_dir_entry_2 *de;
3200         int skip_remove_dentry = 0;
3201
3202         bh = ext4_find_entry(dir, d_name, &de, NULL);
3203         if (IS_ERR(bh))
3204                 return PTR_ERR(bh);
3205
3206         if (!bh)
3207                 return -ENOENT;
3208
3209         if (le32_to_cpu(de->inode) != inode->i_ino) {
3210                 /*
3211                  * It's okay if we find dont find dentry which matches
3212                  * the inode. That's because it might have gotten
3213                  * renamed to a different inode number
3214                  */
3215                 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
3216                         skip_remove_dentry = 1;
3217                 else
3218                         goto out;
3219         }
3220
3221         if (IS_DIRSYNC(dir))
3222                 ext4_handle_sync(handle);
3223
3224         if (!skip_remove_dentry) {
3225                 retval = ext4_delete_entry(handle, dir, de, bh);
3226                 if (retval)
3227                         goto out;
3228                 dir->i_ctime = dir->i_mtime = current_time(dir);
3229                 ext4_update_dx_flag(dir);
3230                 retval = ext4_mark_inode_dirty(handle, dir);
3231                 if (retval)
3232                         goto out;
3233         } else {
3234                 retval = 0;
3235         }
3236         if (inode->i_nlink == 0)
3237                 ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
3238                                    d_name->len, d_name->name);
3239         else
3240                 drop_nlink(inode);
3241         if (!inode->i_nlink)
3242                 ext4_orphan_add(handle, inode);
3243         inode->i_ctime = current_time(inode);
3244         retval = ext4_mark_inode_dirty(handle, inode);
3245
3246 out:
3247         brelse(bh);
3248         return retval;
3249 }
3250
3251 static int ext4_unlink(struct inode *dir, struct dentry *dentry)
3252 {
3253         handle_t *handle;
3254         int retval;
3255
3256         if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
3257                 return -EIO;
3258
3259         trace_ext4_unlink_enter(dir, dentry);
3260         /*
3261          * Initialize quotas before so that eventual writes go
3262          * in separate transaction
3263          */
3264         retval = dquot_initialize(dir);
3265         if (retval)
3266                 goto out_trace;
3267         retval = dquot_initialize(d_inode(dentry));
3268         if (retval)
3269                 goto out_trace;
3270
3271         handle = ext4_journal_start(dir, EXT4_HT_DIR,
3272                                     EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
3273         if (IS_ERR(handle)) {
3274                 retval = PTR_ERR(handle);
3275                 goto out_trace;
3276         }
3277
3278         retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
3279         if (!retval)
3280                 ext4_fc_track_unlink(handle, dentry);
3281 #if IS_ENABLED(CONFIG_UNICODE)
3282         /* VFS negative dentries are incompatible with Encoding and
3283          * Case-insensitiveness. Eventually we'll want avoid
3284          * invalidating the dentries here, alongside with returning the
3285          * negative dentries at ext4_lookup(), when it is  better
3286          * supported by the VFS for the CI case.
3287          */
3288         if (IS_CASEFOLDED(dir))
3289                 d_invalidate(dentry);
3290 #endif
3291         if (handle)
3292                 ext4_journal_stop(handle);
3293
3294 out_trace:
3295         trace_ext4_unlink_exit(dentry, retval);
3296         return retval;
3297 }
3298
3299 static int ext4_init_symlink_block(handle_t *handle, struct inode *inode,
3300                                    struct fscrypt_str *disk_link)
3301 {
3302         struct buffer_head *bh;
3303         char *kaddr;
3304         int err = 0;
3305
3306         bh = ext4_bread(handle, inode, 0, EXT4_GET_BLOCKS_CREATE);
3307         if (IS_ERR(bh))
3308                 return PTR_ERR(bh);
3309
3310         BUFFER_TRACE(bh, "get_write_access");
3311         err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE);
3312         if (err)
3313                 goto out;
3314
3315         kaddr = (char *)bh->b_data;
3316         memcpy(kaddr, disk_link->name, disk_link->len);
3317         inode->i_size = disk_link->len - 1;
3318         EXT4_I(inode)->i_disksize = inode->i_size;
3319         err = ext4_handle_dirty_metadata(handle, inode, bh);
3320 out:
3321         brelse(bh);
3322         return err;
3323 }
3324
3325 static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
3326                         struct dentry *dentry, const char *symname)
3327 {
3328         handle_t *handle;
3329         struct inode *inode;
3330         int err, len = strlen(symname);
3331         int credits;
3332         struct fscrypt_str disk_link;
3333         int retries = 0;
3334
3335         if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
3336                 return -EIO;
3337
3338         err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
3339                                       &disk_link);
3340         if (err)
3341                 return err;
3342
3343         err = dquot_initialize(dir);
3344         if (err)
3345                 return err;
3346
3347         /*
3348          * EXT4_INDEX_EXTRA_TRANS_BLOCKS for addition of entry into the
3349          * directory. +3 for inode, inode bitmap, group descriptor allocation.
3350          * EXT4_DATA_TRANS_BLOCKS for the data block allocation and
3351          * modification.
3352          */
3353         credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3354                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
3355 retry:
3356         inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO,
3357                                             &dentry->d_name, 0, NULL,
3358                                             EXT4_HT_DIR, credits);
3359         handle = ext4_journal_current_handle();
3360         if (IS_ERR(inode)) {
3361                 if (handle)
3362                         ext4_journal_stop(handle);
3363                 err = PTR_ERR(inode);
3364                 goto out_retry;
3365         }
3366
3367         if (IS_ENCRYPTED(inode)) {
3368                 err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link);
3369                 if (err)
3370                         goto err_drop_inode;
3371                 inode->i_op = &ext4_encrypted_symlink_inode_operations;
3372         } else {
3373                 if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3374                         inode->i_op = &ext4_symlink_inode_operations;
3375                 } else {
3376                         inode->i_op = &ext4_fast_symlink_inode_operations;
3377                         inode->i_link = (char *)&EXT4_I(inode)->i_data;
3378                 }
3379         }
3380
3381         if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3382                 /* alloc symlink block and fill it */
3383                 err = ext4_init_symlink_block(handle, inode, &disk_link);
3384                 if (err)
3385                         goto err_drop_inode;
3386         } else {
3387                 /* clear the extent format for fast symlink */
3388                 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
3389                 memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
3390                        disk_link.len);
3391                 inode->i_size = disk_link.len - 1;
3392                 EXT4_I(inode)->i_disksize = inode->i_size;
3393         }
3394         err = ext4_add_nondir(handle, dentry, &inode);
3395         if (handle)
3396                 ext4_journal_stop(handle);
3397         iput(inode);
3398         goto out_retry;
3399
3400 err_drop_inode:
3401         clear_nlink(inode);
3402         ext4_orphan_add(handle, inode);
3403         unlock_new_inode(inode);
3404         if (handle)
3405                 ext4_journal_stop(handle);
3406         iput(inode);
3407 out_retry:
3408         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3409                 goto retry;
3410         if (disk_link.name != (unsigned char *)symname)
3411                 kfree(disk_link.name);
3412         return err;
3413 }
3414
3415 int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
3416 {
3417         handle_t *handle;
3418         int err, retries = 0;
3419 retry:
3420         handle = ext4_journal_start(dir, EXT4_HT_DIR,
3421                 (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3422                  EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1);
3423         if (IS_ERR(handle))
3424                 return PTR_ERR(handle);
3425
3426         if (IS_DIRSYNC(dir))
3427                 ext4_handle_sync(handle);
3428
3429         inode->i_ctime = current_time(inode);
3430         ext4_inc_count(inode);
3431         ihold(inode);
3432
3433         err = ext4_add_entry(handle, dentry, inode);
3434         if (!err) {
3435                 err = ext4_mark_inode_dirty(handle, inode);
3436                 /* this can happen only for tmpfile being
3437                  * linked the first time
3438                  */
3439                 if (inode->i_nlink == 1)
3440                         ext4_orphan_del(handle, inode);
3441                 d_instantiate(dentry, inode);
3442                 ext4_fc_track_link(handle, dentry);
3443         } else {
3444                 drop_nlink(inode);
3445                 iput(inode);
3446         }
3447         ext4_journal_stop(handle);
3448         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3449                 goto retry;
3450         return err;
3451 }
3452
3453 static int ext4_link(struct dentry *old_dentry,
3454                      struct inode *dir, struct dentry *dentry)
3455 {
3456         struct inode *inode = d_inode(old_dentry);
3457         int err;
3458
3459         if (inode->i_nlink >= EXT4_LINK_MAX)
3460                 return -EMLINK;
3461
3462         err = fscrypt_prepare_link(old_dentry, dir, dentry);
3463         if (err)
3464                 return err;
3465
3466         if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
3467             (!projid_eq(EXT4_I(dir)->i_projid,
3468                         EXT4_I(old_dentry->d_inode)->i_projid)))
3469                 return -EXDEV;
3470
3471         err = dquot_initialize(dir);
3472         if (err)
3473                 return err;
3474         return __ext4_link(dir, inode, dentry);
3475 }
3476
3477 /*
3478  * Try to find buffer head where contains the parent block.
3479  * It should be the inode block if it is inlined or the 1st block
3480  * if it is a normal dir.
3481  */
3482 static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
3483                                         struct inode *inode,
3484                                         int *retval,
3485                                         struct ext4_dir_entry_2 **parent_de,
3486                                         int *inlined)
3487 {
3488         struct buffer_head *bh;
3489
3490         if (!ext4_has_inline_data(inode)) {
3491                 struct ext4_dir_entry_2 *de;
3492                 unsigned int offset;
3493
3494                 /* The first directory block must not be a hole, so
3495                  * treat it as DIRENT_HTREE
3496                  */
3497                 bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
3498                 if (IS_ERR(bh)) {
3499                         *retval = PTR_ERR(bh);
3500                         return NULL;
3501                 }
3502
3503                 de = (struct ext4_dir_entry_2 *) bh->b_data;
3504                 if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
3505                                          bh->b_size, 0) ||
3506                     le32_to_cpu(de->inode) != inode->i_ino ||
3507                     strcmp(".", de->name)) {
3508                         EXT4_ERROR_INODE(inode, "directory missing '.'");
3509                         brelse(bh);
3510                         *retval = -EFSCORRUPTED;
3511                         return NULL;
3512                 }
3513                 offset = ext4_rec_len_from_disk(de->rec_len,
3514                                                 inode->i_sb->s_blocksize);
3515                 de = ext4_next_entry(de, inode->i_sb->s_blocksize);
3516                 if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
3517                                          bh->b_size, offset) ||
3518                     le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
3519                         EXT4_ERROR_INODE(inode, "directory missing '..'");
3520                         brelse(bh);
3521                         *retval = -EFSCORRUPTED;
3522                         return NULL;
3523                 }
3524                 *parent_de = de;
3525
3526                 return bh;
3527         }
3528
3529         *inlined = 1;
3530         return ext4_get_first_inline_block(inode, parent_de, retval);
3531 }
3532
3533 struct ext4_renament {
3534         struct inode *dir;
3535         struct dentry *dentry;
3536         struct inode *inode;
3537         bool is_dir;
3538         int dir_nlink_delta;
3539
3540         /* entry for "dentry" */
3541         struct buffer_head *bh;
3542         struct ext4_dir_entry_2 *de;
3543         int inlined;
3544
3545         /* entry for ".." in inode if it's a directory */
3546         struct buffer_head *dir_bh;
3547         struct ext4_dir_entry_2 *parent_de;
3548         int dir_inlined;
3549 };
3550
3551 static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent)
3552 {
3553         int retval;
3554
3555         ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode,
3556                                               &retval, &ent->parent_de,
3557                                               &ent->dir_inlined);
3558         if (!ent->dir_bh)
3559                 return retval;
3560         if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
3561                 return -EFSCORRUPTED;
3562         BUFFER_TRACE(ent->dir_bh, "get_write_access");
3563         return ext4_journal_get_write_access(handle, ent->dir->i_sb,
3564                                              ent->dir_bh, EXT4_JTR_NONE);
3565 }
3566
3567 static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
3568                                   unsigned dir_ino)
3569 {
3570         int retval;
3571
3572         ent->parent_de->inode = cpu_to_le32(dir_ino);
3573         BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata");
3574         if (!ent->dir_inlined) {
3575                 if (is_dx(ent->inode)) {
3576                         retval = ext4_handle_dirty_dx_node(handle,
3577                                                            ent->inode,
3578                                                            ent->dir_bh);
3579                 } else {
3580                         retval = ext4_handle_dirty_dirblock(handle, ent->inode,
3581                                                             ent->dir_bh);
3582                 }
3583         } else {
3584                 retval = ext4_mark_inode_dirty(handle, ent->inode);
3585         }
3586         if (retval) {
3587                 ext4_std_error(ent->dir->i_sb, retval);
3588                 return retval;
3589         }
3590         return 0;
3591 }
3592
3593 static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
3594                        unsigned ino, unsigned file_type)
3595 {
3596         int retval, retval2;
3597
3598         BUFFER_TRACE(ent->bh, "get write access");
3599         retval = ext4_journal_get_write_access(handle, ent->dir->i_sb, ent->bh,
3600                                                EXT4_JTR_NONE);
3601         if (retval)
3602                 return retval;
3603         ent->de->inode = cpu_to_le32(ino);
3604         if (ext4_has_feature_filetype(ent->dir->i_sb))
3605                 ent->de->file_type = file_type;
3606         inode_inc_iversion(ent->dir);
3607         ent->dir->i_ctime = ent->dir->i_mtime =
3608                 current_time(ent->dir);
3609         retval = ext4_mark_inode_dirty(handle, ent->dir);
3610         BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
3611         if (!ent->inlined) {
3612                 retval2 = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
3613                 if (unlikely(retval2)) {
3614                         ext4_std_error(ent->dir->i_sb, retval2);
3615                         return retval2;
3616                 }
3617         }
3618         return retval;
3619 }
3620
3621 static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
3622                           unsigned ino, unsigned file_type)
3623 {
3624         struct ext4_renament old = *ent;
3625         int retval = 0;
3626
3627         /*
3628          * old->de could have moved from under us during make indexed dir,
3629          * so the old->de may no longer valid and need to find it again
3630          * before reset old inode info.
3631          */
3632         old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3633         if (IS_ERR(old.bh))
3634                 retval = PTR_ERR(old.bh);
3635         if (!old.bh)
3636                 retval = -ENOENT;
3637         if (retval) {
3638                 ext4_std_error(old.dir->i_sb, retval);
3639                 return;
3640         }
3641
3642         ext4_setent(handle, &old, ino, file_type);
3643         brelse(old.bh);
3644 }
3645
3646 static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3647                                   const struct qstr *d_name)
3648 {
3649         int retval = -ENOENT;
3650         struct buffer_head *bh;
3651         struct ext4_dir_entry_2 *de;
3652
3653         bh = ext4_find_entry(dir, d_name, &de, NULL);
3654         if (IS_ERR(bh))
3655                 return PTR_ERR(bh);
3656         if (bh) {
3657                 retval = ext4_delete_entry(handle, dir, de, bh);
3658                 brelse(bh);
3659         }
3660         return retval;
3661 }
3662
3663 static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
3664                                int force_reread)
3665 {
3666         int retval;
3667         /*
3668          * ent->de could have moved from under us during htree split, so make
3669          * sure that we are deleting the right entry.  We might also be pointing
3670          * to a stale entry in the unused part of ent->bh so just checking inum
3671          * and the name isn't enough.
3672          */
3673         if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
3674             ent->de->name_len != ent->dentry->d_name.len ||
3675             strncmp(ent->de->name, ent->dentry->d_name.name,
3676                     ent->de->name_len) ||
3677             force_reread) {
3678                 retval = ext4_find_delete_entry(handle, ent->dir,
3679                                                 &ent->dentry->d_name);
3680         } else {
3681                 retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
3682                 if (retval == -ENOENT) {
3683                         retval = ext4_find_delete_entry(handle, ent->dir,
3684                                                         &ent->dentry->d_name);
3685                 }
3686         }
3687
3688         if (retval) {
3689                 ext4_warning_inode(ent->dir,
3690                                    "Deleting old file: nlink %d, error=%d",
3691                                    ent->dir->i_nlink, retval);
3692         }
3693 }
3694
3695 static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
3696 {
3697         if (ent->dir_nlink_delta) {
3698                 if (ent->dir_nlink_delta == -1)
3699                         ext4_dec_count(ent->dir);
3700                 else
3701                         ext4_inc_count(ent->dir);
3702                 ext4_mark_inode_dirty(handle, ent->dir);
3703         }
3704 }
3705
3706 static struct inode *ext4_whiteout_for_rename(struct user_namespace *mnt_userns,
3707                                               struct ext4_renament *ent,
3708                                               int credits, handle_t **h)
3709 {
3710         struct inode *wh;
3711         handle_t *handle;
3712         int retries = 0;
3713
3714         /*
3715          * for inode block, sb block, group summaries,
3716          * and inode bitmap
3717          */
3718         credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
3719                     EXT4_XATTR_TRANS_BLOCKS + 4);
3720 retry:
3721         wh = ext4_new_inode_start_handle(mnt_userns, ent->dir,
3722                                          S_IFCHR | WHITEOUT_MODE,
3723                                          &ent->dentry->d_name, 0, NULL,
3724                                          EXT4_HT_DIR, credits);
3725
3726         handle = ext4_journal_current_handle();
3727         if (IS_ERR(wh)) {
3728                 if (handle)
3729                         ext4_journal_stop(handle);
3730                 if (PTR_ERR(wh) == -ENOSPC &&
3731                     ext4_should_retry_alloc(ent->dir->i_sb, &retries))
3732                         goto retry;
3733         } else {
3734                 *h = handle;
3735                 init_special_inode(wh, wh->i_mode, WHITEOUT_DEV);
3736                 wh->i_op = &ext4_special_inode_operations;
3737         }
3738         return wh;
3739 }
3740
3741 /*
3742  * Anybody can rename anything with this: the permission checks are left to the
3743  * higher-level routines.
3744  *
3745  * n.b.  old_{dentry,inode) refers to the source dentry/inode
3746  * while new_{dentry,inode) refers to the destination dentry/inode
3747  * This comes from rename(const char *oldpath, const char *newpath)
3748  */
3749 static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
3750                        struct dentry *old_dentry, struct inode *new_dir,
3751                        struct dentry *new_dentry, unsigned int flags)
3752 {
3753         handle_t *handle = NULL;
3754         struct ext4_renament old = {
3755                 .dir = old_dir,
3756                 .dentry = old_dentry,
3757                 .inode = d_inode(old_dentry),
3758         };
3759         struct ext4_renament new = {
3760                 .dir = new_dir,
3761                 .dentry = new_dentry,
3762                 .inode = d_inode(new_dentry),
3763         };
3764         int force_reread;
3765         int retval;
3766         struct inode *whiteout = NULL;
3767         int credits;
3768         u8 old_file_type;
3769
3770         if (new.inode && new.inode->i_nlink == 0) {
3771                 EXT4_ERROR_INODE(new.inode,
3772                                  "target of rename is already freed");
3773                 return -EFSCORRUPTED;
3774         }
3775
3776         if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
3777             (!projid_eq(EXT4_I(new_dir)->i_projid,
3778                         EXT4_I(old_dentry->d_inode)->i_projid)))
3779                 return -EXDEV;
3780
3781         retval = dquot_initialize(old.dir);
3782         if (retval)
3783                 return retval;
3784         retval = dquot_initialize(new.dir);
3785         if (retval)
3786                 return retval;
3787
3788         /* Initialize quotas before so that eventual writes go
3789          * in separate transaction */
3790         if (new.inode) {
3791                 retval = dquot_initialize(new.inode);
3792                 if (retval)
3793                         return retval;
3794         }
3795
3796         old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3797         if (IS_ERR(old.bh))
3798                 return PTR_ERR(old.bh);
3799         /*
3800          *  Check for inode number is _not_ due to possible IO errors.
3801          *  We might rmdir the source, keep it as pwd of some process
3802          *  and merrily kill the link to whatever was created under the
3803          *  same name. Goodbye sticky bit ;-<
3804          */
3805         retval = -ENOENT;
3806         if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
3807                 goto release_bh;
3808
3809         new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3810                                  &new.de, &new.inlined);
3811         if (IS_ERR(new.bh)) {
3812                 retval = PTR_ERR(new.bh);
3813                 new.bh = NULL;
3814                 goto release_bh;
3815         }
3816         if (new.bh) {
3817                 if (!new.inode) {
3818                         brelse(new.bh);
3819                         new.bh = NULL;
3820                 }
3821         }
3822         if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
3823                 ext4_alloc_da_blocks(old.inode);
3824
3825         credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3826                    EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
3827         if (!(flags & RENAME_WHITEOUT)) {
3828                 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
3829                 if (IS_ERR(handle)) {
3830                         retval = PTR_ERR(handle);
3831                         goto release_bh;
3832                 }
3833         } else {
3834                 whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle);
3835                 if (IS_ERR(whiteout)) {
3836                         retval = PTR_ERR(whiteout);
3837                         goto release_bh;
3838                 }
3839         }
3840
3841         old_file_type = old.de->file_type;
3842         if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3843                 ext4_handle_sync(handle);
3844
3845         if (S_ISDIR(old.inode->i_mode)) {
3846                 if (new.inode) {
3847                         retval = -ENOTEMPTY;
3848                         if (!ext4_empty_dir(new.inode))
3849                                 goto end_rename;
3850                 } else {
3851                         retval = -EMLINK;
3852                         if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
3853                                 goto end_rename;
3854                 }
3855                 retval = ext4_rename_dir_prepare(handle, &old);
3856                 if (retval)
3857                         goto end_rename;
3858         }
3859         /*
3860          * If we're renaming a file within an inline_data dir and adding or
3861          * setting the new dirent causes a conversion from inline_data to
3862          * extents/blockmap, we need to force the dirent delete code to
3863          * re-read the directory, or else we end up trying to delete a dirent
3864          * from what is now the extent tree root (or a block map).
3865          */
3866         force_reread = (new.dir->i_ino == old.dir->i_ino &&
3867                         ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
3868
3869         if (whiteout) {
3870                 /*
3871                  * Do this before adding a new entry, so the old entry is sure
3872                  * to be still pointing to the valid old entry.
3873                  */
3874                 retval = ext4_setent(handle, &old, whiteout->i_ino,
3875                                      EXT4_FT_CHRDEV);
3876                 if (retval)
3877                         goto end_rename;
3878                 retval = ext4_mark_inode_dirty(handle, whiteout);
3879                 if (unlikely(retval))
3880                         goto end_rename;
3881
3882         }
3883         if (!new.bh) {
3884                 retval = ext4_add_entry(handle, new.dentry, old.inode);
3885                 if (retval)
3886                         goto end_rename;
3887         } else {
3888                 retval = ext4_setent(handle, &new,
3889                                      old.inode->i_ino, old_file_type);
3890                 if (retval)
3891                         goto end_rename;
3892         }
3893         if (force_reread)
3894                 force_reread = !ext4_test_inode_flag(new.dir,
3895                                                      EXT4_INODE_INLINE_DATA);
3896
3897         /*
3898          * Like most other Unix systems, set the ctime for inodes on a
3899          * rename.
3900          */
3901         old.inode->i_ctime = current_time(old.inode);
3902         retval = ext4_mark_inode_dirty(handle, old.inode);
3903         if (unlikely(retval))
3904                 goto end_rename;
3905
3906         if (!whiteout) {
3907                 /*
3908                  * ok, that's it
3909                  */
3910                 ext4_rename_delete(handle, &old, force_reread);
3911         }
3912
3913         if (new.inode) {
3914                 ext4_dec_count(new.inode);
3915                 new.inode->i_ctime = current_time(new.inode);
3916         }
3917         old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);
3918         ext4_update_dx_flag(old.dir);
3919         if (old.dir_bh) {
3920                 retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
3921                 if (retval)
3922                         goto end_rename;
3923
3924                 ext4_dec_count(old.dir);
3925                 if (new.inode) {
3926                         /* checked ext4_empty_dir above, can't have another
3927                          * parent, ext4_dec_count() won't work for many-linked
3928                          * dirs */
3929                         clear_nlink(new.inode);
3930                 } else {
3931                         ext4_inc_count(new.dir);
3932                         ext4_update_dx_flag(new.dir);
3933                         retval = ext4_mark_inode_dirty(handle, new.dir);
3934                         if (unlikely(retval))
3935                                 goto end_rename;
3936                 }
3937         }
3938         retval = ext4_mark_inode_dirty(handle, old.dir);
3939         if (unlikely(retval))
3940                 goto end_rename;
3941
3942         if (S_ISDIR(old.inode->i_mode)) {
3943                 /*
3944                  * We disable fast commits here that's because the
3945                  * replay code is not yet capable of changing dot dot
3946                  * dirents in directories.
3947                  */
3948                 ext4_fc_mark_ineligible(old.inode->i_sb,
3949                         EXT4_FC_REASON_RENAME_DIR, handle);
3950         } else {
3951                 struct super_block *sb = old.inode->i_sb;
3952
3953                 if (new.inode)
3954                         ext4_fc_track_unlink(handle, new.dentry);
3955                 if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
3956                     !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) &&
3957                     !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) {
3958                         __ext4_fc_track_link(handle, old.inode, new.dentry);
3959                         __ext4_fc_track_unlink(handle, old.inode, old.dentry);
3960                         if (whiteout)
3961                                 __ext4_fc_track_create(handle, whiteout,
3962                                                        old.dentry);
3963                 }
3964         }
3965
3966         if (new.inode) {
3967                 retval = ext4_mark_inode_dirty(handle, new.inode);
3968                 if (unlikely(retval))
3969                         goto end_rename;
3970                 if (!new.inode->i_nlink)
3971                         ext4_orphan_add(handle, new.inode);
3972         }
3973         retval = 0;
3974
3975 end_rename:
3976         if (whiteout) {
3977                 if (retval) {
3978                         ext4_resetent(handle, &old,
3979                                       old.inode->i_ino, old_file_type);
3980                         drop_nlink(whiteout);
3981                         ext4_orphan_add(handle, whiteout);
3982                 }
3983                 unlock_new_inode(whiteout);
3984                 ext4_journal_stop(handle);
3985                 iput(whiteout);
3986         } else {
3987                 ext4_journal_stop(handle);
3988         }
3989 release_bh:
3990         brelse(old.dir_bh);
3991         brelse(old.bh);
3992         brelse(new.bh);
3993         return retval;
3994 }
3995
3996 static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3997                              struct inode *new_dir, struct dentry *new_dentry)
3998 {
3999         handle_t *handle = NULL;
4000         struct ext4_renament old = {
4001                 .dir = old_dir,
4002                 .dentry = old_dentry,
4003                 .inode = d_inode(old_dentry),
4004         };
4005         struct ext4_renament new = {
4006                 .dir = new_dir,
4007                 .dentry = new_dentry,
4008                 .inode = d_inode(new_dentry),
4009         };
4010         u8 new_file_type;
4011         int retval;
4012         struct timespec64 ctime;
4013
4014         if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
4015              !projid_eq(EXT4_I(new_dir)->i_projid,
4016                         EXT4_I(old_dentry->d_inode)->i_projid)) ||
4017             (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
4018              !projid_eq(EXT4_I(old_dir)->i_projid,
4019                         EXT4_I(new_dentry->d_inode)->i_projid)))
4020                 return -EXDEV;
4021
4022         retval = dquot_initialize(old.dir);
4023         if (retval)
4024                 return retval;
4025         retval = dquot_initialize(new.dir);
4026         if (retval)
4027                 return retval;
4028
4029         old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
4030                                  &old.de, &old.inlined);
4031         if (IS_ERR(old.bh))
4032                 return PTR_ERR(old.bh);
4033         /*
4034          *  Check for inode number is _not_ due to possible IO errors.
4035          *  We might rmdir the source, keep it as pwd of some process
4036          *  and merrily kill the link to whatever was created under the
4037          *  same name. Goodbye sticky bit ;-<
4038          */
4039         retval = -ENOENT;
4040         if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
4041                 goto end_rename;
4042
4043         new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
4044                                  &new.de, &new.inlined);
4045         if (IS_ERR(new.bh)) {
4046                 retval = PTR_ERR(new.bh);
4047                 new.bh = NULL;
4048                 goto end_rename;
4049         }
4050
4051         /* RENAME_EXCHANGE case: old *and* new must both exist */
4052         if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
4053                 goto end_rename;
4054
4055         handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
4056                 (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
4057                  2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
4058         if (IS_ERR(handle)) {
4059                 retval = PTR_ERR(handle);
4060                 handle = NULL;
4061                 goto end_rename;
4062         }
4063
4064         if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
4065                 ext4_handle_sync(handle);
4066
4067         if (S_ISDIR(old.inode->i_mode)) {
4068                 old.is_dir = true;
4069                 retval = ext4_rename_dir_prepare(handle, &old);
4070                 if (retval)
4071                         goto end_rename;
4072         }
4073         if (S_ISDIR(new.inode->i_mode)) {
4074                 new.is_dir = true;
4075                 retval = ext4_rename_dir_prepare(handle, &new);
4076                 if (retval)
4077                         goto end_rename;
4078         }
4079
4080         /*
4081          * Other than the special case of overwriting a directory, parents'
4082          * nlink only needs to be modified if this is a cross directory rename.
4083          */
4084         if (old.dir != new.dir && old.is_dir != new.is_dir) {
4085                 old.dir_nlink_delta = old.is_dir ? -1 : 1;
4086                 new.dir_nlink_delta = -old.dir_nlink_delta;
4087                 retval = -EMLINK;
4088                 if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) ||
4089                     (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir)))
4090                         goto end_rename;
4091         }
4092
4093         new_file_type = new.de->file_type;
4094         retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type);
4095         if (retval)
4096                 goto end_rename;
4097
4098         retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type);
4099         if (retval)
4100                 goto end_rename;
4101
4102         /*
4103          * Like most other Unix systems, set the ctime for inodes on a
4104          * rename.
4105          */
4106         ctime = current_time(old.inode);
4107         old.inode->i_ctime = ctime;
4108         new.inode->i_ctime = ctime;
4109         retval = ext4_mark_inode_dirty(handle, old.inode);
4110         if (unlikely(retval))
4111                 goto end_rename;
4112         retval = ext4_mark_inode_dirty(handle, new.inode);
4113         if (unlikely(retval))
4114                 goto end_rename;
4115         ext4_fc_mark_ineligible(new.inode->i_sb,
4116                                 EXT4_FC_REASON_CROSS_RENAME, handle);
4117         if (old.dir_bh) {
4118                 retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
4119                 if (retval)
4120                         goto end_rename;
4121         }
4122         if (new.dir_bh) {
4123                 retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino);
4124                 if (retval)
4125                         goto end_rename;
4126         }
4127         ext4_update_dir_count(handle, &old);
4128         ext4_update_dir_count(handle, &new);
4129         retval = 0;
4130
4131 end_rename:
4132         brelse(old.dir_bh);
4133         brelse(new.dir_bh);
4134         brelse(old.bh);
4135         brelse(new.bh);
4136         if (handle)
4137                 ext4_journal_stop(handle);
4138         return retval;
4139 }
4140
4141 static int ext4_rename2(struct user_namespace *mnt_userns,
4142                         struct inode *old_dir, struct dentry *old_dentry,
4143                         struct inode *new_dir, struct dentry *new_dentry,
4144                         unsigned int flags)
4145 {
4146         int err;
4147
4148         if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
4149                 return -EIO;
4150
4151         if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
4152                 return -EINVAL;
4153
4154         err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
4155                                      flags);
4156         if (err)
4157                 return err;
4158
4159         if (flags & RENAME_EXCHANGE) {
4160                 return ext4_cross_rename(old_dir, old_dentry,
4161                                          new_dir, new_dentry);
4162         }
4163
4164         return ext4_rename(mnt_userns, old_dir, old_dentry, new_dir, new_dentry, flags);
4165 }
4166
4167 /*
4168  * directories can handle most operations...
4169  */
4170 const struct inode_operations ext4_dir_inode_operations = {
4171         .create         = ext4_create,
4172         .lookup         = ext4_lookup,
4173         .link           = ext4_link,
4174         .unlink         = ext4_unlink,
4175         .symlink        = ext4_symlink,
4176         .mkdir          = ext4_mkdir,
4177         .rmdir          = ext4_rmdir,
4178         .mknod          = ext4_mknod,
4179         .tmpfile        = ext4_tmpfile,
4180         .rename         = ext4_rename2,
4181         .setattr        = ext4_setattr,
4182         .getattr        = ext4_getattr,
4183         .listxattr      = ext4_listxattr,
4184         .get_acl        = ext4_get_acl,
4185         .set_acl        = ext4_set_acl,
4186         .fiemap         = ext4_fiemap,
4187         .fileattr_get   = ext4_fileattr_get,
4188         .fileattr_set   = ext4_fileattr_set,
4189 };
4190
4191 const struct inode_operations ext4_special_inode_operations = {
4192         .setattr        = ext4_setattr,
4193         .getattr        = ext4_getattr,
4194         .listxattr      = ext4_listxattr,
4195         .get_acl        = ext4_get_acl,
4196         .set_acl        = ext4_set_acl,
4197 };