GNU Linux-libre 6.8.9-gnu
[releases.git] / fs / erofs / zmap.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018-2019 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  */
6 #include "internal.h"
7 #include <asm/unaligned.h>
8 #include <trace/events/erofs.h>
9
10 struct z_erofs_maprecorder {
11         struct inode *inode;
12         struct erofs_map_blocks *map;
13         void *kaddr;
14
15         unsigned long lcn;
16         /* compression extent information gathered */
17         u8  type, headtype;
18         u16 clusterofs;
19         u16 delta[2];
20         erofs_blk_t pblk, compressedblks;
21         erofs_off_t nextpackoff;
22         bool partialref;
23 };
24
25 static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
26                                       unsigned long lcn)
27 {
28         struct inode *const inode = m->inode;
29         struct erofs_inode *const vi = EROFS_I(inode);
30         const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(erofs_iloc(inode) +
31                         vi->inode_isize + vi->xattr_isize) +
32                         lcn * sizeof(struct z_erofs_lcluster_index);
33         struct z_erofs_lcluster_index *di;
34         unsigned int advise, type;
35
36         m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
37                                       erofs_blknr(inode->i_sb, pos), EROFS_KMAP);
38         if (IS_ERR(m->kaddr))
39                 return PTR_ERR(m->kaddr);
40
41         m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index);
42         m->lcn = lcn;
43         di = m->kaddr + erofs_blkoff(inode->i_sb, pos);
44
45         advise = le16_to_cpu(di->di_advise);
46         type = (advise >> Z_EROFS_LI_LCLUSTER_TYPE_BIT) &
47                 ((1 << Z_EROFS_LI_LCLUSTER_TYPE_BITS) - 1);
48         switch (type) {
49         case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
50                 m->clusterofs = 1 << vi->z_logical_clusterbits;
51                 m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
52                 if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
53                         if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
54                                         Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
55                                 DBG_BUGON(1);
56                                 return -EFSCORRUPTED;
57                         }
58                         m->compressedblks = m->delta[0] &
59                                 ~Z_EROFS_LI_D0_CBLKCNT;
60                         m->delta[0] = 1;
61                 }
62                 m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
63                 break;
64         case Z_EROFS_LCLUSTER_TYPE_PLAIN:
65         case Z_EROFS_LCLUSTER_TYPE_HEAD1:
66         case Z_EROFS_LCLUSTER_TYPE_HEAD2:
67                 if (advise & Z_EROFS_LI_PARTIAL_REF)
68                         m->partialref = true;
69                 m->clusterofs = le16_to_cpu(di->di_clusterofs);
70                 if (m->clusterofs >= 1 << vi->z_logical_clusterbits) {
71                         DBG_BUGON(1);
72                         return -EFSCORRUPTED;
73                 }
74                 m->pblk = le32_to_cpu(di->di_u.blkaddr);
75                 break;
76         default:
77                 DBG_BUGON(1);
78                 return -EOPNOTSUPP;
79         }
80         m->type = type;
81         return 0;
82 }
83
84 static unsigned int decode_compactedbits(unsigned int lobits,
85                                          u8 *in, unsigned int pos, u8 *type)
86 {
87         const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
88         const unsigned int lo = v & ((1 << lobits) - 1);
89
90         *type = (v >> lobits) & 3;
91         return lo;
92 }
93
94 static int get_compacted_la_distance(unsigned int lobits,
95                                      unsigned int encodebits,
96                                      unsigned int vcnt, u8 *in, int i)
97 {
98         unsigned int lo, d1 = 0;
99         u8 type;
100
101         DBG_BUGON(i >= vcnt);
102
103         do {
104                 lo = decode_compactedbits(lobits, in, encodebits * i, &type);
105
106                 if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
107                         return d1;
108                 ++d1;
109         } while (++i < vcnt);
110
111         /* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */
112         if (!(lo & Z_EROFS_LI_D0_CBLKCNT))
113                 d1 += lo - 1;
114         return d1;
115 }
116
117 static int unpack_compacted_index(struct z_erofs_maprecorder *m,
118                                   unsigned int amortizedshift,
119                                   erofs_off_t pos, bool lookahead)
120 {
121         struct erofs_inode *const vi = EROFS_I(m->inode);
122         const unsigned int lclusterbits = vi->z_logical_clusterbits;
123         unsigned int vcnt, base, lo, lobits, encodebits, nblk, eofs;
124         int i;
125         u8 *in, type;
126         bool big_pcluster;
127
128         if (1 << amortizedshift == 4 && lclusterbits <= 14)
129                 vcnt = 2;
130         else if (1 << amortizedshift == 2 && lclusterbits <= 12)
131                 vcnt = 16;
132         else
133                 return -EOPNOTSUPP;
134
135         /* it doesn't equal to round_up(..) */
136         m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
137                          (vcnt << amortizedshift);
138         big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
139         lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U);
140         encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
141         eofs = erofs_blkoff(m->inode->i_sb, pos);
142         base = round_down(eofs, vcnt << amortizedshift);
143         in = m->kaddr + base;
144
145         i = (eofs - base) >> amortizedshift;
146
147         lo = decode_compactedbits(lobits, in, encodebits * i, &type);
148         m->type = type;
149         if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
150                 m->clusterofs = 1 << lclusterbits;
151
152                 /* figure out lookahead_distance: delta[1] if needed */
153                 if (lookahead)
154                         m->delta[1] = get_compacted_la_distance(lobits,
155                                                 encodebits, vcnt, in, i);
156                 if (lo & Z_EROFS_LI_D0_CBLKCNT) {
157                         if (!big_pcluster) {
158                                 DBG_BUGON(1);
159                                 return -EFSCORRUPTED;
160                         }
161                         m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT;
162                         m->delta[0] = 1;
163                         return 0;
164                 } else if (i + 1 != (int)vcnt) {
165                         m->delta[0] = lo;
166                         return 0;
167                 }
168                 /*
169                  * since the last lcluster in the pack is special,
170                  * of which lo saves delta[1] rather than delta[0].
171                  * Hence, get delta[0] by the previous lcluster indirectly.
172                  */
173                 lo = decode_compactedbits(lobits, in,
174                                           encodebits * (i - 1), &type);
175                 if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
176                         lo = 0;
177                 else if (lo & Z_EROFS_LI_D0_CBLKCNT)
178                         lo = 1;
179                 m->delta[0] = lo + 1;
180                 return 0;
181         }
182         m->clusterofs = lo;
183         m->delta[0] = 0;
184         /* figout out blkaddr (pblk) for HEAD lclusters */
185         if (!big_pcluster) {
186                 nblk = 1;
187                 while (i > 0) {
188                         --i;
189                         lo = decode_compactedbits(lobits, in,
190                                                   encodebits * i, &type);
191                         if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD)
192                                 i -= lo;
193
194                         if (i >= 0)
195                                 ++nblk;
196                 }
197         } else {
198                 nblk = 0;
199                 while (i > 0) {
200                         --i;
201                         lo = decode_compactedbits(lobits, in,
202                                                   encodebits * i, &type);
203                         if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
204                                 if (lo & Z_EROFS_LI_D0_CBLKCNT) {
205                                         --i;
206                                         nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT;
207                                         continue;
208                                 }
209                                 /* bigpcluster shouldn't have plain d0 == 1 */
210                                 if (lo <= 1) {
211                                         DBG_BUGON(1);
212                                         return -EFSCORRUPTED;
213                                 }
214                                 i -= lo - 2;
215                                 continue;
216                         }
217                         ++nblk;
218                 }
219         }
220         in += (vcnt << amortizedshift) - sizeof(__le32);
221         m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
222         return 0;
223 }
224
225 static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
226                                          unsigned long lcn, bool lookahead)
227 {
228         struct inode *const inode = m->inode;
229         struct erofs_inode *const vi = EROFS_I(inode);
230         const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
231                 ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
232         unsigned int totalidx = erofs_iblks(inode);
233         unsigned int compacted_4b_initial, compacted_2b;
234         unsigned int amortizedshift;
235         erofs_off_t pos;
236
237         if (lcn >= totalidx)
238                 return -EINVAL;
239
240         m->lcn = lcn;
241         /* used to align to 32-byte (compacted_2b) alignment */
242         compacted_4b_initial = (32 - ebase % 32) / 4;
243         if (compacted_4b_initial == 32 / 4)
244                 compacted_4b_initial = 0;
245
246         if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
247             compacted_4b_initial < totalidx)
248                 compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
249         else
250                 compacted_2b = 0;
251
252         pos = ebase;
253         if (lcn < compacted_4b_initial) {
254                 amortizedshift = 2;
255                 goto out;
256         }
257         pos += compacted_4b_initial * 4;
258         lcn -= compacted_4b_initial;
259
260         if (lcn < compacted_2b) {
261                 amortizedshift = 1;
262                 goto out;
263         }
264         pos += compacted_2b * 2;
265         lcn -= compacted_2b;
266         amortizedshift = 2;
267 out:
268         pos += lcn * (1 << amortizedshift);
269         m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
270                                       erofs_blknr(inode->i_sb, pos), EROFS_KMAP);
271         if (IS_ERR(m->kaddr))
272                 return PTR_ERR(m->kaddr);
273         return unpack_compacted_index(m, amortizedshift, pos, lookahead);
274 }
275
276 static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
277                                            unsigned int lcn, bool lookahead)
278 {
279         switch (EROFS_I(m->inode)->datalayout) {
280         case EROFS_INODE_COMPRESSED_FULL:
281                 return z_erofs_load_full_lcluster(m, lcn);
282         case EROFS_INODE_COMPRESSED_COMPACT:
283                 return z_erofs_load_compact_lcluster(m, lcn, lookahead);
284         default:
285                 return -EINVAL;
286         }
287 }
288
289 static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
290                                    unsigned int lookback_distance)
291 {
292         struct super_block *sb = m->inode->i_sb;
293         struct erofs_inode *const vi = EROFS_I(m->inode);
294         const unsigned int lclusterbits = vi->z_logical_clusterbits;
295
296         while (m->lcn >= lookback_distance) {
297                 unsigned long lcn = m->lcn - lookback_distance;
298                 int err;
299
300                 err = z_erofs_load_lcluster_from_disk(m, lcn, false);
301                 if (err)
302                         return err;
303
304                 switch (m->type) {
305                 case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
306                         lookback_distance = m->delta[0];
307                         if (!lookback_distance)
308                                 goto err_bogus;
309                         continue;
310                 case Z_EROFS_LCLUSTER_TYPE_PLAIN:
311                 case Z_EROFS_LCLUSTER_TYPE_HEAD1:
312                 case Z_EROFS_LCLUSTER_TYPE_HEAD2:
313                         m->headtype = m->type;
314                         m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
315                         return 0;
316                 default:
317                         erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu",
318                                   m->type, lcn, vi->nid);
319                         DBG_BUGON(1);
320                         return -EOPNOTSUPP;
321                 }
322         }
323 err_bogus:
324         erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu",
325                   lookback_distance, m->lcn, vi->nid);
326         DBG_BUGON(1);
327         return -EFSCORRUPTED;
328 }
329
330 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
331                                             unsigned int initial_lcn)
332 {
333         struct super_block *sb = m->inode->i_sb;
334         struct erofs_inode *const vi = EROFS_I(m->inode);
335         struct erofs_map_blocks *const map = m->map;
336         const unsigned int lclusterbits = vi->z_logical_clusterbits;
337         unsigned long lcn;
338         int err;
339
340         DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN &&
341                   m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1 &&
342                   m->type != Z_EROFS_LCLUSTER_TYPE_HEAD2);
343         DBG_BUGON(m->type != m->headtype);
344
345         if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
346             ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1) &&
347              !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
348             ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) &&
349              !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
350                 map->m_plen = 1ULL << lclusterbits;
351                 return 0;
352         }
353         lcn = m->lcn + 1;
354         if (m->compressedblks)
355                 goto out;
356
357         err = z_erofs_load_lcluster_from_disk(m, lcn, false);
358         if (err)
359                 return err;
360
361         /*
362          * If the 1st NONHEAD lcluster has already been handled initially w/o
363          * valid compressedblks, which means at least it mustn't be CBLKCNT, or
364          * an internal implemenatation error is detected.
365          *
366          * The following code can also handle it properly anyway, but let's
367          * BUG_ON in the debugging mode only for developers to notice that.
368          */
369         DBG_BUGON(lcn == initial_lcn &&
370                   m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
371
372         switch (m->type) {
373         case Z_EROFS_LCLUSTER_TYPE_PLAIN:
374         case Z_EROFS_LCLUSTER_TYPE_HEAD1:
375         case Z_EROFS_LCLUSTER_TYPE_HEAD2:
376                 /*
377                  * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
378                  * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
379                  */
380                 m->compressedblks = 1 << (lclusterbits - sb->s_blocksize_bits);
381                 break;
382         case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
383                 if (m->delta[0] != 1)
384                         goto err_bonus_cblkcnt;
385                 if (m->compressedblks)
386                         break;
387                 fallthrough;
388         default:
389                 erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn,
390                           vi->nid);
391                 DBG_BUGON(1);
392                 return -EFSCORRUPTED;
393         }
394 out:
395         map->m_plen = erofs_pos(sb, m->compressedblks);
396         return 0;
397 err_bonus_cblkcnt:
398         erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
399         DBG_BUGON(1);
400         return -EFSCORRUPTED;
401 }
402
403 static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
404 {
405         struct inode *inode = m->inode;
406         struct erofs_inode *vi = EROFS_I(inode);
407         struct erofs_map_blocks *map = m->map;
408         unsigned int lclusterbits = vi->z_logical_clusterbits;
409         u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
410         int err;
411
412         do {
413                 /* handle the last EOF pcluster (no next HEAD lcluster) */
414                 if ((lcn << lclusterbits) >= inode->i_size) {
415                         map->m_llen = inode->i_size - map->m_la;
416                         return 0;
417                 }
418
419                 err = z_erofs_load_lcluster_from_disk(m, lcn, true);
420                 if (err)
421                         return err;
422
423                 if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
424                         DBG_BUGON(!m->delta[1] &&
425                                   m->clusterofs != 1 << lclusterbits);
426                 } else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
427                            m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1 ||
428                            m->type == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
429                         /* go on until the next HEAD lcluster */
430                         if (lcn != headlcn)
431                                 break;
432                         m->delta[1] = 1;
433                 } else {
434                         erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu",
435                                   m->type, lcn, vi->nid);
436                         DBG_BUGON(1);
437                         return -EOPNOTSUPP;
438                 }
439                 lcn += m->delta[1];
440         } while (m->delta[1]);
441
442         map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la;
443         return 0;
444 }
445
446 static int z_erofs_do_map_blocks(struct inode *inode,
447                                  struct erofs_map_blocks *map, int flags)
448 {
449         struct erofs_inode *const vi = EROFS_I(inode);
450         bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
451         bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
452         struct z_erofs_maprecorder m = {
453                 .inode = inode,
454                 .map = map,
455         };
456         int err = 0;
457         unsigned int lclusterbits, endoff, afmt;
458         unsigned long initial_lcn;
459         unsigned long long ofs, end;
460
461         lclusterbits = vi->z_logical_clusterbits;
462         ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
463         initial_lcn = ofs >> lclusterbits;
464         endoff = ofs & ((1 << lclusterbits) - 1);
465
466         err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false);
467         if (err)
468                 goto unmap_out;
469
470         if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
471                 vi->z_idataoff = m.nextpackoff;
472
473         map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
474         end = (m.lcn + 1ULL) << lclusterbits;
475
476         switch (m.type) {
477         case Z_EROFS_LCLUSTER_TYPE_PLAIN:
478         case Z_EROFS_LCLUSTER_TYPE_HEAD1:
479         case Z_EROFS_LCLUSTER_TYPE_HEAD2:
480                 if (endoff >= m.clusterofs) {
481                         m.headtype = m.type;
482                         map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
483                         /*
484                          * For ztailpacking files, in order to inline data more
485                          * effectively, special EOF lclusters are now supported
486                          * which can have three parts at most.
487                          */
488                         if (ztailpacking && end > inode->i_size)
489                                 end = inode->i_size;
490                         break;
491                 }
492                 /* m.lcn should be >= 1 if endoff < m.clusterofs */
493                 if (!m.lcn) {
494                         erofs_err(inode->i_sb,
495                                   "invalid logical cluster 0 at nid %llu",
496                                   vi->nid);
497                         err = -EFSCORRUPTED;
498                         goto unmap_out;
499                 }
500                 end = (m.lcn << lclusterbits) | m.clusterofs;
501                 map->m_flags |= EROFS_MAP_FULL_MAPPED;
502                 m.delta[0] = 1;
503                 fallthrough;
504         case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
505                 /* get the corresponding first chunk */
506                 err = z_erofs_extent_lookback(&m, m.delta[0]);
507                 if (err)
508                         goto unmap_out;
509                 break;
510         default:
511                 erofs_err(inode->i_sb,
512                           "unknown type %u @ offset %llu of nid %llu",
513                           m.type, ofs, vi->nid);
514                 err = -EOPNOTSUPP;
515                 goto unmap_out;
516         }
517         if (m.partialref)
518                 map->m_flags |= EROFS_MAP_PARTIAL_REF;
519         map->m_llen = end - map->m_la;
520
521         if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
522                 vi->z_tailextent_headlcn = m.lcn;
523                 /* for non-compact indexes, fragmentoff is 64 bits */
524                 if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
525                         vi->z_fragmentoff |= (u64)m.pblk << 32;
526         }
527         if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
528                 map->m_flags |= EROFS_MAP_META;
529                 map->m_pa = vi->z_idataoff;
530                 map->m_plen = vi->z_idata_size;
531         } else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
532                 map->m_flags |= EROFS_MAP_FRAGMENT;
533         } else {
534                 map->m_pa = erofs_pos(inode->i_sb, m.pblk);
535                 err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
536                 if (err)
537                         goto unmap_out;
538         }
539
540         if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) {
541                 if (map->m_llen > map->m_plen) {
542                         DBG_BUGON(1);
543                         err = -EFSCORRUPTED;
544                         goto unmap_out;
545                 }
546                 afmt = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER ?
547                         Z_EROFS_COMPRESSION_INTERLACED :
548                         Z_EROFS_COMPRESSION_SHIFTED;
549         } else {
550                 afmt = m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2 ?
551                         vi->z_algorithmtype[1] : vi->z_algorithmtype[0];
552                 if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) {
553                         erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu",
554                                   afmt, vi->nid);
555                         err = -EFSCORRUPTED;
556                         goto unmap_out;
557                 }
558         }
559         map->m_algorithmformat = afmt;
560
561         if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
562             ((flags & EROFS_GET_BLOCKS_READMORE) &&
563              (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA ||
564               map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE) &&
565               map->m_llen >= i_blocksize(inode))) {
566                 err = z_erofs_get_extent_decompressedlen(&m);
567                 if (!err)
568                         map->m_flags |= EROFS_MAP_FULL_MAPPED;
569         }
570
571 unmap_out:
572         erofs_unmap_metabuf(&m.map->buf);
573         return err;
574 }
575
576 static int z_erofs_fill_inode_lazy(struct inode *inode)
577 {
578         struct erofs_inode *const vi = EROFS_I(inode);
579         struct super_block *const sb = inode->i_sb;
580         int err, headnr;
581         erofs_off_t pos;
582         struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
583         void *kaddr;
584         struct z_erofs_map_header *h;
585
586         if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
587                 /*
588                  * paired with smp_mb() at the end of the function to ensure
589                  * fields will only be observed after the bit is set.
590                  */
591                 smp_mb();
592                 return 0;
593         }
594
595         if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
596                 return -ERESTARTSYS;
597
598         err = 0;
599         if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
600                 goto out_unlock;
601
602         pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
603         kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(sb, pos), EROFS_KMAP);
604         if (IS_ERR(kaddr)) {
605                 err = PTR_ERR(kaddr);
606                 goto out_unlock;
607         }
608
609         h = kaddr + erofs_blkoff(sb, pos);
610         /*
611          * if the highest bit of the 8-byte map header is set, the whole file
612          * is stored in the packed inode. The rest bits keeps z_fragmentoff.
613          */
614         if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
615                 vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
616                 vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
617                 vi->z_tailextent_headlcn = 0;
618                 goto done;
619         }
620         vi->z_advise = le16_to_cpu(h->h_advise);
621         vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
622         vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
623
624         headnr = 0;
625         if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX ||
626             vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) {
627                 erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
628                           headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
629                 err = -EOPNOTSUPP;
630                 goto out_put_metabuf;
631         }
632
633         vi->z_logical_clusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 7);
634         if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
635             vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
636                             Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
637                 erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
638                           vi->nid);
639                 err = -EFSCORRUPTED;
640                 goto out_put_metabuf;
641         }
642         if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT &&
643             !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
644             !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
645                 erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
646                           vi->nid);
647                 err = -EFSCORRUPTED;
648                 goto out_put_metabuf;
649         }
650
651         if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
652                 struct erofs_map_blocks map = {
653                         .buf = __EROFS_BUF_INITIALIZER
654                 };
655
656                 vi->z_idata_size = le16_to_cpu(h->h_idata_size);
657                 err = z_erofs_do_map_blocks(inode, &map,
658                                             EROFS_GET_BLOCKS_FINDTAIL);
659                 erofs_put_metabuf(&map.buf);
660
661                 if (!map.m_plen ||
662                     erofs_blkoff(sb, map.m_pa) + map.m_plen > sb->s_blocksize) {
663                         erofs_err(sb, "invalid tail-packing pclustersize %llu",
664                                   map.m_plen);
665                         err = -EFSCORRUPTED;
666                 }
667                 if (err < 0)
668                         goto out_put_metabuf;
669         }
670
671         if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
672             !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
673                 struct erofs_map_blocks map = {
674                         .buf = __EROFS_BUF_INITIALIZER
675                 };
676
677                 vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
678                 err = z_erofs_do_map_blocks(inode, &map,
679                                             EROFS_GET_BLOCKS_FINDTAIL);
680                 erofs_put_metabuf(&map.buf);
681                 if (err < 0)
682                         goto out_put_metabuf;
683         }
684 done:
685         /* paired with smp_mb() at the beginning of the function */
686         smp_mb();
687         set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
688 out_put_metabuf:
689         erofs_put_metabuf(&buf);
690 out_unlock:
691         clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
692         return err;
693 }
694
695 int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
696                             int flags)
697 {
698         struct erofs_inode *const vi = EROFS_I(inode);
699         int err = 0;
700
701         trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
702
703         /* when trying to read beyond EOF, leave it unmapped */
704         if (map->m_la >= inode->i_size) {
705                 map->m_llen = map->m_la + 1 - inode->i_size;
706                 map->m_la = inode->i_size;
707                 map->m_flags = 0;
708                 goto out;
709         }
710
711         err = z_erofs_fill_inode_lazy(inode);
712         if (err)
713                 goto out;
714
715         if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
716             !vi->z_tailextent_headlcn) {
717                 map->m_la = 0;
718                 map->m_llen = inode->i_size;
719                 map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
720                                 EROFS_MAP_FRAGMENT;
721                 goto out;
722         }
723
724         err = z_erofs_do_map_blocks(inode, map, flags);
725 out:
726         trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
727         return err;
728 }
729
730 static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
731                                 loff_t length, unsigned int flags,
732                                 struct iomap *iomap, struct iomap *srcmap)
733 {
734         int ret;
735         struct erofs_map_blocks map = { .m_la = offset };
736
737         ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
738         erofs_put_metabuf(&map.buf);
739         if (ret < 0)
740                 return ret;
741
742         iomap->bdev = inode->i_sb->s_bdev;
743         iomap->offset = map.m_la;
744         iomap->length = map.m_llen;
745         if (map.m_flags & EROFS_MAP_MAPPED) {
746                 iomap->type = IOMAP_MAPPED;
747                 iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
748                               IOMAP_NULL_ADDR : map.m_pa;
749         } else {
750                 iomap->type = IOMAP_HOLE;
751                 iomap->addr = IOMAP_NULL_ADDR;
752                 /*
753                  * No strict rule on how to describe extents for post EOF, yet
754                  * we need to do like below. Otherwise, iomap itself will get
755                  * into an endless loop on post EOF.
756                  *
757                  * Calculate the effective offset by subtracting extent start
758                  * (map.m_la) from the requested offset, and add it to length.
759                  * (NB: offset >= map.m_la always)
760                  */
761                 if (iomap->offset >= inode->i_size)
762                         iomap->length = length + offset - map.m_la;
763         }
764         iomap->flags = 0;
765         return 0;
766 }
767
768 const struct iomap_ops z_erofs_iomap_report_ops = {
769         .iomap_begin = z_erofs_iomap_begin_report,
770 };