fs/ntfs3/lib/decompress_common.h

   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /*
   3  * decompress_common.h - Code shared by the XPRESS and LZX decompressors
   4  *
   5  * Copyright (C) 2015 Eric Biggers
   6  */
   7
   8 #ifndef _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H
   9 #define _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H
  10
  11 #include <linux/string.h>
  12 #include <linux/compiler.h>
  13 #include <linux/types.h>
  14 #include <linux/slab.h>
  15 #include <asm/unaligned.h>
  16
  17
  18 /* "Force inline" macro (not required, but helpful for performance)  */
  19 #define forceinline __always_inline
  20
  21 /* Enable whole-word match copying on selected architectures  */
  22 #if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED)
  23 #  define FAST_UNALIGNED_ACCESS
  24 #endif
  25
  26 /* Size of a machine word  */
  27 #define WORDBYTES (sizeof(size_t))
  28
  29 static forceinline void
  30 copy_unaligned_word(const void *src, void *dst)
  31 {
  32         put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst);
  33 }
  34
  35
  36 /* Generate a "word" with platform-dependent size whose bytes all contain the
  37  * value 'b'.
  38  */
  39 static forceinline size_t repeat_byte(u8 b)
  40 {
  41         size_t v;
  42
  43         v = b;
  44         v |= v << 8;
  45         v |= v << 16;
  46         v |= v << ((WORDBYTES == 8) ? 32 : 0);
  47         return v;
  48 }
  49
  50 /* Structure that encapsulates a block of in-memory data being interpreted as a
  51  * stream of bits, optionally with interwoven literal bytes.  Bits are assumed
  52  * to be stored in little endian 16-bit coding units, with the bits ordered high
  53  * to low.
  54  */
  55 struct input_bitstream {
  56
  57         /* Bits that have been read from the input buffer.  The bits are
  58          * left-justified; the next bit is always bit 31.
  59          */
  60         u32 bitbuf;
  61
  62         /* Number of bits currently held in @bitbuf.  */
  63         u32 bitsleft;
  64
  65         /* Pointer to the next byte to be retrieved from the input buffer.  */
  66         const u8 *next;
  67
  68         /* Pointer to just past the end of the input buffer.  */
  69         const u8 *end;
  70 };
  71
  72 /* Initialize a bitstream to read from the specified input buffer.  */
  73 static forceinline void init_input_bitstream(struct input_bitstream *is,
  74                                              const void *buffer, u32 size)
  75 {
  76         is->bitbuf = 0;
  77         is->bitsleft = 0;
  78         is->next = buffer;
  79         is->end = is->next + size;
  80 }
  81
  82 /* Ensure the bit buffer variable for the bitstream contains at least @num_bits
  83  * bits.  Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
  84  * may be called on the bitstream to peek or remove up to @num_bits bits.  Note
  85  * that @num_bits must be <= 16.
  86  */
  87 static forceinline void bitstream_ensure_bits(struct input_bitstream *is,
  88                                               u32 num_bits)
  89 {
  90         if (is->bitsleft < num_bits) {
  91                 if (is->end - is->next >= 2) {
  92                         is->bitbuf |= (u32)get_unaligned_le16(is->next)
  93                                         << (16 - is->bitsleft);
  94                         is->next += 2;
  95                 }
  96                 is->bitsleft += 16;
  97         }
  98 }
  99
 100 /* Return the next @num_bits bits from the bitstream, without removing them.
 101  * There must be at least @num_bits remaining in the buffer variable, from a
 102  * previous call to bitstream_ensure_bits().
 103  */
 104 static forceinline u32
 105 bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits)
 106 {
 107         return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
 108 }
 109
 110 /* Remove @num_bits from the bitstream.  There must be at least @num_bits
 111  * remaining in the buffer variable, from a previous call to
 112  * bitstream_ensure_bits().
 113  */
 114 static forceinline void
 115 bitstream_remove_bits(struct input_bitstream *is, u32 num_bits)
 116 {
 117         is->bitbuf <<= num_bits;
 118         is->bitsleft -= num_bits;
 119 }
 120
 121 /* Remove and return @num_bits bits from the bitstream.  There must be at least
 122  * @num_bits remaining in the buffer variable, from a previous call to
 123  * bitstream_ensure_bits().
 124  */
 125 static forceinline u32
 126 bitstream_pop_bits(struct input_bitstream *is, u32 num_bits)
 127 {
 128         u32 bits = bitstream_peek_bits(is, num_bits);
 129
 130         bitstream_remove_bits(is, num_bits);
 131         return bits;
 132 }
 133
 134 /* Read and return the next @num_bits bits from the bitstream.  */
 135 static forceinline u32
 136 bitstream_read_bits(struct input_bitstream *is, u32 num_bits)
 137 {
 138         bitstream_ensure_bits(is, num_bits);
 139         return bitstream_pop_bits(is, num_bits);
 140 }
 141
 142 /* Read and return the next literal byte embedded in the bitstream.  */
 143 static forceinline u8
 144 bitstream_read_byte(struct input_bitstream *is)
 145 {
 146         if (unlikely(is->end == is->next))
 147                 return 0;
 148         return *is->next++;
 149 }
 150
 151 /* Read and return the next 16-bit integer embedded in the bitstream.  */
 152 static forceinline u16
 153 bitstream_read_u16(struct input_bitstream *is)
 154 {
 155         u16 v;
 156
 157         if (unlikely(is->end - is->next < 2))
 158                 return 0;
 159         v = get_unaligned_le16(is->next);
 160         is->next += 2;
 161         return v;
 162 }
 163
 164 /* Read and return the next 32-bit integer embedded in the bitstream.  */
 165 static forceinline u32
 166 bitstream_read_u32(struct input_bitstream *is)
 167 {
 168         u32 v;
 169
 170         if (unlikely(is->end - is->next < 4))
 171                 return 0;
 172         v = get_unaligned_le32(is->next);
 173         is->next += 4;
 174         return v;
 175 }
 176
 177 /* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
 178  * Return either a pointer to the byte past the last written, or NULL if the
 179  * read overflows the input buffer.
 180  */
 181 static forceinline void *bitstream_read_bytes(struct input_bitstream *is,
 182                                               void *dst_buffer, size_t count)
 183 {
 184         if ((size_t)(is->end - is->next) < count)
 185                 return NULL;
 186         memcpy(dst_buffer, is->next, count);
 187         is->next += count;
 188         return (u8 *)dst_buffer + count;
 189 }
 190
 191 /* Align the input bitstream on a coding-unit boundary.  */
 192 static forceinline void bitstream_align(struct input_bitstream *is)
 193 {
 194         is->bitsleft = 0;
 195         is->bitbuf = 0;
 196 }
 197
 198 extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
 199                                      const u32 num_bits, const u8 lens[],
 200                                      const u32 max_codeword_len,
 201                                      u16 working_space[]);
 202
 203
 204 /* Reads and returns the next Huffman-encoded symbol from a bitstream.  If the
 205  * input data is exhausted, the Huffman symbol is decoded as if the missing bits
 206  * are all zeroes.
 207  */
 208 static forceinline u32 read_huffsym(struct input_bitstream *istream,
 209                                          const u16 decode_table[],
 210                                          u32 table_bits,
 211                                          u32 max_codeword_len)
 212 {
 213         u32 entry;
 214         u32 key_bits;
 215
 216         bitstream_ensure_bits(istream, max_codeword_len);
 217
 218         /* Index the decode table by the next table_bits bits of the input.  */
 219         key_bits = bitstream_peek_bits(istream, table_bits);
 220         entry = decode_table[key_bits];
 221         if (entry < 0xC000) {
 222                 /* Fast case: The decode table directly provided the
 223                  * symbol and codeword length.  The low 11 bits are the
 224                  * symbol, and the high 5 bits are the codeword length.
 225                  */
 226                 bitstream_remove_bits(istream, entry >> 11);
 227                 return entry & 0x7FF;
 228         }
 229         /* Slow case: The codeword for the symbol is longer than
 230          * table_bits, so the symbol does not have an entry
 231          * directly in the first (1 << table_bits) entries of the
 232          * decode table.  Traverse the appropriate binary tree
 233          * bit-by-bit to decode the symbol.
 234          */
 235         bitstream_remove_bits(istream, table_bits);
 236         do {
 237                 key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1);
 238         } while ((entry = decode_table[key_bits]) >= 0xC000);
 239         return entry;
 240 }
 241
 242 /*
 243  * Copy an LZ77 match at (dst - offset) to dst.
 244  *
 245  * The length and offset must be already validated --- that is, (dst - offset)
 246  * can't underrun the output buffer, and (dst + length) can't overrun the output
 247  * buffer.  Also, the length cannot be 0.
 248  *
 249  * @bufend points to the byte past the end of the output buffer.  This function
 250  * won't write any data beyond this position.
 251  *
 252  * Returns dst + length.
 253  */
 254 static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend,
 255                                u32 min_length)
 256 {
 257         const u8 *src = dst - offset;
 258
 259         /*
 260          * Try to copy one machine word at a time.  On i386 and x86_64 this is
 261          * faster than copying one byte at a time, unless the data is
 262          * near-random and all the matches have very short lengths.  Note that
 263          * since this requires unaligned memory accesses, it won't necessarily
 264          * be faster on every architecture.
 265          *
 266          * Also note that we might copy more than the length of the match.  For
 267          * example, if a word is 8 bytes and the match is of length 5, then
 268          * we'll simply copy 8 bytes.  This is okay as long as we don't write
 269          * beyond the end of the output buffer, hence the check for (bufend -
 270          * end >= WORDBYTES - 1).
 271          */
 272 #ifdef FAST_UNALIGNED_ACCESS
 273         u8 * const end = dst + length;
 274
 275         if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) {
 276
 277                 if (offset >= WORDBYTES) {
 278                         /* The source and destination words don't overlap.  */
 279
 280                         /* To improve branch prediction, one iteration of this
 281                          * loop is unrolled.  Most matches are short and will
 282                          * fail the first check.  But if that check passes, then
 283                          * it becomes increasing likely that the match is long
 284                          * and we'll need to continue copying.
 285                          */
 286
 287                         copy_unaligned_word(src, dst);
 288                         src += WORDBYTES;
 289                         dst += WORDBYTES;
 290
 291                         if (dst < end) {
 292                                 do {
 293                                         copy_unaligned_word(src, dst);
 294                                         src += WORDBYTES;
 295                                         dst += WORDBYTES;
 296                                 } while (dst < end);
 297                         }
 298                         return end;
 299                 } else if (offset == 1) {
 300
 301                         /* Offset 1 matches are equivalent to run-length
 302                          * encoding of the previous byte.  This case is common
 303                          * if the data contains many repeated bytes.
 304                          */
 305                         size_t v = repeat_byte(*(dst - 1));
 306
 307                         do {
 308                                 put_unaligned(v, (size_t *)dst);
 309                                 src += WORDBYTES;
 310                                 dst += WORDBYTES;
 311                         } while (dst < end);
 312                         return end;
 313                 }
 314                 /*
 315                  * We don't bother with special cases for other 'offset <
 316                  * WORDBYTES', which are usually rarer than 'offset == 1'.  Extra
 317                  * checks will just slow things down.  Actually, it's possible
 318                  * to handle all the 'offset < WORDBYTES' cases using the same
 319                  * code, but it still becomes more complicated doesn't seem any
 320                  * faster overall; it definitely slows down the more common
 321                  * 'offset == 1' case.
 322                  */
 323         }
 324 #endif /* FAST_UNALIGNED_ACCESS */
 325
 326         /* Fall back to a bytewise copy.  */
 327
 328         if (min_length >= 2) {
 329                 *dst++ = *src++;
 330                 length--;
 331         }
 332         if (min_length >= 3) {
 333                 *dst++ = *src++;
 334                 length--;
 335         }
 336         do {
 337                 *dst++ = *src++;
 338         } while (--length);
 339
 340         return dst;
 341 }
 342
 343 #endif /* _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H */