X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=include%2Fwimlib%2Flzx.h;h=da0c55143c4bc9a569bdc5550b51c0e16c16d80b;hb=dd3f804966f577b104eda649f572e8e765edc7ef;hp=6061e730e7b63f9e45ee867aac3f81f510f7e721;hpb=2254a0fc3f1d7af1151ee83f3458f44339b5028b;p=wimlib diff --git a/include/wimlib/lzx.h b/include/wimlib/lzx.h index 6061e730..da0c5514 100644 --- a/include/wimlib/lzx.h +++ b/include/wimlib/lzx.h @@ -1,111 +1,73 @@ +/* + * lzx.h + * + * Declarations shared between LZX compression and decompression. + */ + #ifndef _WIMLIB_LZX_H #define _WIMLIB_LZX_H #include "wimlib/assert.h" +#include "wimlib/compiler.h" +#include "wimlib/lzx_constants.h" +#include "wimlib/util.h" #include "wimlib/types.h" //#define ENABLE_LZX_DEBUG #ifdef ENABLE_LZX_DEBUG -# define LZX_DEBUG DEBUG # define LZX_ASSERT wimlib_assert #else -# define LZX_DEBUG(format, ...) # define LZX_ASSERT(...) #endif -/* Constants, most of which are defined by the LZX specification: */ - -/* The smallest and largest allowed match lengths. */ -#define LZX_MIN_MATCH 2 -#define LZX_MAX_MATCH 257 - -/* Number of values an uncompressed literal byte can represent. */ -#define LZX_NUM_CHARS 256 - -/* Each LZX block begins with 3 bits that determines the block type. Below are - * the valid block types. Values 0, and 4 through 7, are invalid. */ -#define LZX_BLOCKTYPE_VERBATIM 1 -#define LZX_BLOCKTYPE_ALIGNED 2 -#define LZX_BLOCKTYPE_UNCOMPRESSED 3 - -#define LZX_NUM_PRIMARY_LENS 7 /* this one missing from spec! */ +extern const u32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS]; -/* NOTE: There are really 51 position slots in the LZX format as a whole, but - * only 30 are needed to allow for the window to be up to 32768 bytes long, - * which is the maximum in the WIM format. */ -#define LZX_NUM_POSITION_SLOTS 30 +extern const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS]; -/* Read the LZX specification for information about the Huffman trees used in - * the LZX compression format. Basically there are 4 of them: The main tree, - * the length tree, the pre tree, and the aligned tree. The main tree and - * length tree are given at the beginning of VERBATIM and ALIGNED blocks as a - * list of *_NUM_SYMBOLS code length values. They are read using the - * read_code_lens() function and built using the make_decode_table() function. - * The decode table is not a real tree but rather a table that we can index by - * some number of bits (*_TABLEBITS) of the input to quickly look up the symbol - * corresponding to a Huffman code. +/* Returns the LZX offset slot that corresponds to a given adjusted offset. * - * The ALIGNED tree is only present on ALIGNED blocks. + * Logically, this returns the smallest i such that + * adjusted_offset >= lzx_offset_slot_base[i]. * - * A PRETREE is used to encode the code lengths for the main tree and the length - * tree. There is a separate pretree for each half of the main tree. */ - -#define LZX_MAINTREE_NUM_SYMBOLS (LZX_NUM_CHARS + \ - (LZX_NUM_POSITION_SLOTS << 3)) -#define LZX_MAINTREE_TABLEBITS 11 - -#define LZX_LENTREE_NUM_SYMBOLS 249 -#define LZX_LENTREE_TABLEBITS 10 - -#define LZX_PRETREE_NUM_SYMBOLS 20 -#define LZX_PRETREE_TABLEBITS 6 -#define LZX_PRETREE_ELEMENT_SIZE 4 - -#define LZX_ALIGNEDTREE_NUM_SYMBOLS 8 -#define LZX_ALIGNEDTREE_TABLEBITS 7 -#define LZX_ALIGNEDTREE_ELEMENT_SIZE 3 - -/* Maximum allowed length of a Huffman code. */ -#define LZX_MAX_CODEWORD_LEN 16 - -/* For the LZX-compressed blocks in WIM files, this value is always used as the - * filesize parameter for the call instruction (0xe8 byte) preprocessing, even - * though the blocks themselves are not this size, and the size of the actual - * file resource in the WIM file is very likely to be something entirely - * different as well. */ -#define LZX_WIM_MAGIC_FILESIZE 12000000 + * The actual implementation below takes advantage of the regularity of the + * numbers in the lzx_offset_slot_base array to calculate the slot directly from + * the adjusted offset without actually looking at the array. + */ +static inline unsigned +lzx_get_offset_slot_raw(u32 adjusted_offset) +{ + if (adjusted_offset >= 196608) { + return (adjusted_offset >> 17) + 34; + } else { + LZX_ASSERT(2 <= adjusted_offset && adjusted_offset < 655360); + unsigned mssb_idx = bsr32(adjusted_offset); + return (mssb_idx << 1) | + ((adjusted_offset >> (mssb_idx - 1)) & 1); + } +} -#define LZX_BLOCKTYPE_NBITS 3 -#define LZX_BLOCKSIZE_NBITS 16 +extern unsigned lzx_get_window_order(size_t max_block_size); -#define USE_LZX_EXTRA_BITS_ARRAY +extern unsigned lzx_get_num_main_syms(unsigned window_order); -#ifdef USE_LZX_EXTRA_BITS_ARRAY -extern const u8 lzx_extra_bits[LZX_NUM_POSITION_SLOTS]; -#endif +/* Least-recently used queue for match offsets. */ +struct lzx_lru_queue { + u32 R[LZX_NUM_RECENT_OFFSETS]; +} _aligned_attribute(sizeof(unsigned long)); -/* Given the number of a LZX position slot, return the number of extra bits that - * are needed to encode the match offset. */ -static inline unsigned -lzx_get_num_extra_bits(unsigned position_slot) +/* Initialize the LZX least-recently-used match offset queue at the beginning of + * a new window for either decompression or compression. */ +static inline void +lzx_lru_queue_init(struct lzx_lru_queue *queue) { -#ifdef USE_LZX_EXTRA_BITS_ARRAY - /* Use a table */ - return lzx_extra_bits[position_slot]; -#else - /* Calculate directly using a shift and subtraction. */ - wimlib_assert(position_slot >= 2 && position_slot <= 37); - return (position_slot >> 1) - 1; -#endif + for (unsigned i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) + queue->R[i] = 1; } -extern const u32 lzx_position_base[LZX_NUM_POSITION_SLOTS]; +extern void +lzx_do_e8_preprocessing(u8 *data, u32 size); -/* Least-recently used queue for match offsets. */ -struct lzx_lru_queue { - u32 R0; - u32 R1; - u32 R2; -}; +extern void +lzx_undo_e8_preprocessing(u8 *data, u32 size); #endif /* _WIMLIB_LZX_H */