4 /* Constants for the LZX data compression format. See the comments in
5 * lzx-compress.c and lzx-decompress.c for more information about this format.
8 #include "wimlib/assert.h"
9 #include "wimlib/util.h"
10 #include "wimlib/types.h"
12 //#define ENABLE_LZX_DEBUG
13 #ifdef ENABLE_LZX_DEBUG
14 # define LZX_DEBUG DEBUG
15 # define LZX_ASSERT wimlib_assert
17 # define LZX_DEBUG(format, ...)
18 # define LZX_ASSERT(...)
21 /* Constants, most of which are defined by the LZX specification: */
23 /* The smallest and largest allowed match lengths. */
24 #define LZX_MIN_MATCH_LEN 2
25 #define LZX_MAX_MATCH_LEN 257
27 /* Number of values an uncompressed literal byte can represent. */
28 #define LZX_NUM_CHARS 256
30 /* Each LZX block begins with 3 bits that determines the block type. Below are
31 * the valid block types. Values 0, and 4 through 7, are invalid. */
32 #define LZX_BLOCKTYPE_VERBATIM 1
33 #define LZX_BLOCKTYPE_ALIGNED 2
34 #define LZX_BLOCKTYPE_UNCOMPRESSED 3
36 #define LZX_NUM_PRIMARY_LENS 7
38 /* The number of position slots varies from 30 to 51 depending on the window
39 * size (see comment in lzx-decompress.c). */
40 #define LZX_MAX_POSITION_SLOTS 51
42 #define LZX_MIN_WINDOW_ORDER 15
43 #define LZX_MAX_WINDOW_ORDER 21
44 #define LZX_MIN_WINDOW_SIZE (1U << LZX_MIN_WINDOW_ORDER) /* 32768 */
45 #define LZX_MAX_WINDOW_SIZE (1U << LZX_MAX_WINDOW_ORDER) /* 2097152 */
47 /* Read the LZX specification for information about the Huffman trees used in
48 * the LZX compression format. Basically there are 4 of them: The main tree,
49 * the length tree, the pre tree, and the aligned tree. The main tree and
50 * length tree are given at the beginning of VERBATIM and ALIGNED blocks as a
51 * list of *_NUM_SYMBOLS code length values. They are read using the
52 * read_code_lens() function and built using the make_decode_table() function.
53 * The decode table is not a real tree but rather a table that we can index by
54 * some number of bits (*_TABLEBITS) of the input to quickly look up the symbol
55 * corresponding to a Huffman code.
57 * The ALIGNED tree is only present on ALIGNED blocks.
59 * A PRECODE is used to encode the code lengths for the main tree and the length
60 * tree. There is a separate pretree for each half of the main tree. */
62 #define LZX_MAINCODE_MAX_NUM_SYMBOLS (LZX_NUM_CHARS + (LZX_MAX_POSITION_SLOTS << 3))
63 #define LZX_MAINCODE_TABLEBITS 11
65 #define LZX_LENCODE_NUM_SYMBOLS 249
66 #define LZX_LENCODE_TABLEBITS 10
68 #define LZX_PRECODE_NUM_SYMBOLS 20
69 #define LZX_PRECODE_TABLEBITS 6
70 #define LZX_PRECODE_ELEMENT_SIZE 4
72 #define LZX_ALIGNEDCODE_NUM_SYMBOLS 8
73 #define LZX_ALIGNEDCODE_TABLEBITS 7
74 #define LZX_ALIGNEDCODE_ELEMENT_SIZE 3
76 /* Maximum allowed length of Huffman codewords. */
77 #define LZX_MAX_MAIN_CODEWORD_LEN 16
78 #define LZX_MAX_LEN_CODEWORD_LEN 16
79 #define LZX_MAX_PRE_CODEWORD_LEN 16
80 #define LZX_MAX_ALIGNED_CODEWORD_LEN 8
82 /* For the LZX-compressed blocks in WIM files, this value is always used as the
83 * filesize parameter for the call instruction (0xe8 byte) preprocessing, even
84 * though the blocks themselves are not this size, and the size of the actual
85 * file resource in the WIM file is very likely to be something entirely
86 * different as well. */
87 #define LZX_WIM_MAGIC_FILESIZE 12000000
89 /* Assumed LZX block size when the encoded block size begins with a 0 bit. */
90 #define LZX_DEFAULT_BLOCK_SIZE 32768
92 #define USE_LZX_EXTRA_BITS_ARRAY
94 #ifdef USE_LZX_EXTRA_BITS_ARRAY
95 extern const u8 lzx_extra_bits[LZX_MAX_POSITION_SLOTS];
98 /* Given the number of a LZX position slot, return the number of extra bits that
99 * are needed to encode the match offset. */
100 static inline unsigned
101 lzx_get_num_extra_bits(unsigned position_slot)
103 #ifdef USE_LZX_EXTRA_BITS_ARRAY
105 return lzx_extra_bits[position_slot];
107 /* Calculate directly using a shift and subtraction. */
108 LZX_ASSERT(position_slot >= 2 && position_slot <= 37);
109 return (position_slot >> 1) - 1;
113 extern const u32 lzx_position_base[LZX_MAX_POSITION_SLOTS];
115 /* Returns the LZX position slot that corresponds to a given formatted offset.
117 * Logically, this returns the smallest i such that
118 * formatted_offset >= lzx_position_base[i].
120 * The actual implementation below takes advantage of the regularity of the
121 * numbers in the lzx_position_base array to calculate the slot directly from
122 * the formatted offset without actually looking at the array.
124 static inline unsigned
125 lzx_get_position_slot_raw(unsigned formatted_offset)
127 if (formatted_offset >= 196608) {
128 return (formatted_offset >> 17) + 34;
130 LZX_ASSERT(2 <= formatted_offset && formatted_offset < 655360);
131 unsigned mssb_idx = bsr32(formatted_offset);
132 return (mssb_idx << 1) |
133 ((formatted_offset >> (mssb_idx - 1)) & 1);
137 extern bool lzx_window_size_valid(size_t window_size);
138 extern unsigned lzx_get_num_main_syms(u32 window_size);
140 #define LZX_NUM_RECENT_OFFSETS 3
142 /* Least-recently used queue for match offsets. */
143 struct lzx_lru_queue {
144 u32 R[LZX_NUM_RECENT_OFFSETS];
147 /* In the LZX format, an offset of n bytes is actually encoded
148 * as (n + LZX_OFFSET_OFFSET). */
149 #define LZX_OFFSET_OFFSET (LZX_NUM_RECENT_OFFSETS - 1)
152 lzx_lru_queue_init(struct lzx_lru_queue *queue)
154 for (unsigned i = 0; i < LZX_NUM_RECENT_OFFSETS; i++)
158 #endif /* _WIMLIB_LZX_H */