X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=include%2Fwimlib%2Flzx.h;h=715ec16bfefa62fef98ca43c07bd1964f7856c5c;hb=86a852652706cfe53087ef839840171bed597ffd;hp=a3db788a8b76b221b3bc368abce9fe38782926fb;hpb=e8c3ca2d1d0cac3d64985b45a9f654d2029a7518;p=wimlib diff --git a/include/wimlib/lzx.h b/include/wimlib/lzx.h index a3db788a..715ec16b 100644 --- a/include/wimlib/lzx.h +++ b/include/wimlib/lzx.h @@ -1,21 +1,28 @@ #ifndef _WIMLIB_LZX_H #define _WIMLIB_LZX_H +/* Constants for the LZX data compression format. See the comments in + * lzx-compress.c and lzx-decompress.c for more information about this format. + * */ + #include "wimlib/assert.h" +#include "wimlib/util.h" #include "wimlib/types.h" //#define ENABLE_LZX_DEBUG #ifdef ENABLE_LZX_DEBUG # define LZX_DEBUG DEBUG +# define LZX_ASSERT wimlib_assert #else # define LZX_DEBUG(format, ...) +# define LZX_ASSERT(...) #endif /* Constants, most of which are defined by the LZX specification: */ /* The smallest and largest allowed match lengths. */ -#define LZX_MIN_MATCH 2 -#define LZX_MAX_MATCH 257 +#define LZX_MIN_MATCH_LEN 2 +#define LZX_MAX_MATCH_LEN 257 /* Number of values an uncompressed literal byte can represent. */ #define LZX_NUM_CHARS 256 @@ -26,12 +33,16 @@ #define LZX_BLOCKTYPE_ALIGNED 2 #define LZX_BLOCKTYPE_UNCOMPRESSED 3 -#define LZX_NUM_PRIMARY_LENS 7 /* this one missing from spec! */ +#define LZX_NUM_PRIMARY_LENS 7 + +/* The number of position slots varies from 30 to 51 depending on the window + * size (see comment in lzx-decompress.c). */ +#define LZX_MAX_POSITION_SLOTS 51 -/* NOTE: There are really 51 position slots in the LZX format as a whole, but - * only 30 are needed to allow for the window to be up to 32768 bytes long, - * which is the maximum in the WIM format. */ -#define LZX_NUM_POSITION_SLOTS 30 +#define LZX_MIN_WINDOW_ORDER 15 +#define LZX_MAX_WINDOW_ORDER 21 +#define LZX_MIN_WINDOW_SIZE (1U << LZX_MIN_WINDOW_ORDER) /* 32768 */ +#define LZX_MAX_WINDOW_SIZE (1U << LZX_MAX_WINDOW_ORDER) /* 2097152 */ /* Read the LZX specification for information about the Huffman trees used in * the LZX compression format. Basically there are 4 of them: The main tree, @@ -45,26 +56,28 @@ * * The ALIGNED tree is only present on ALIGNED blocks. * - * A PRETREE is used to encode the code lengths for the main tree and the length + * A PRECODE is used to encode the code lengths for the main tree and the length * tree. There is a separate pretree for each half of the main tree. */ -#define LZX_MAINTREE_NUM_SYMBOLS (LZX_NUM_CHARS + \ - (LZX_NUM_POSITION_SLOTS << 3)) -#define LZX_MAINTREE_TABLEBITS 11 +#define LZX_MAINCODE_MAX_NUM_SYMBOLS (LZX_NUM_CHARS + (LZX_MAX_POSITION_SLOTS << 3)) +#define LZX_MAINCODE_TABLEBITS 11 -#define LZX_LENTREE_NUM_SYMBOLS 249 -#define LZX_LENTREE_TABLEBITS 10 +#define LZX_LENCODE_NUM_SYMBOLS 249 +#define LZX_LENCODE_TABLEBITS 10 -#define LZX_PRETREE_NUM_SYMBOLS 20 -#define LZX_PRETREE_TABLEBITS 6 -#define LZX_PRETREE_ELEMENT_SIZE 4 +#define LZX_PRECODE_NUM_SYMBOLS 20 +#define LZX_PRECODE_TABLEBITS 6 +#define LZX_PRECODE_ELEMENT_SIZE 4 -#define LZX_ALIGNEDTREE_NUM_SYMBOLS 8 -#define LZX_ALIGNEDTREE_TABLEBITS 7 -#define LZX_ALIGNEDTREE_ELEMENT_SIZE 3 +#define LZX_ALIGNEDCODE_NUM_SYMBOLS 8 +#define LZX_ALIGNEDCODE_TABLEBITS 7 +#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3 -/* Maximum allowed length of a Huffman code. */ -#define LZX_MAX_CODEWORD_LEN 16 +/* Maximum allowed length of Huffman codewords. */ +#define LZX_MAX_MAIN_CODEWORD_LEN 16 +#define LZX_MAX_LEN_CODEWORD_LEN 16 +#define LZX_MAX_PRE_CODEWORD_LEN 16 +#define LZX_MAX_ALIGNED_CODEWORD_LEN 8 /* For the LZX-compressed blocks in WIM files, this value is always used as the * filesize parameter for the call instruction (0xe8 byte) preprocessing, even @@ -73,10 +86,13 @@ * different as well. */ #define LZX_WIM_MAGIC_FILESIZE 12000000 +/* Assumed LZX block size when the encoded block size begins with a 0 bit. */ +#define LZX_DEFAULT_BLOCK_SIZE 32768 + #define USE_LZX_EXTRA_BITS_ARRAY #ifdef USE_LZX_EXTRA_BITS_ARRAY -extern const u8 lzx_extra_bits[LZX_NUM_POSITION_SLOTS]; +extern const u8 lzx_extra_bits[LZX_MAX_POSITION_SLOTS]; #endif /* Given the number of a LZX position slot, return the number of extra bits that @@ -89,18 +105,56 @@ lzx_get_num_extra_bits(unsigned position_slot) return lzx_extra_bits[position_slot]; #else /* Calculate directly using a shift and subtraction. */ - wimlib_assert(position_slot >= 2 && position_slot <= 37); + LZX_ASSERT(position_slot >= 2 && position_slot <= 37); return (position_slot >> 1) - 1; #endif } -extern const u32 lzx_position_base[LZX_NUM_POSITION_SLOTS]; +extern const u32 lzx_position_base[LZX_MAX_POSITION_SLOTS]; -/* Least-recently used queue for match offsets. */ -struct lru_queue { - u32 R0; - u32 R1; - u32 R2; +/* Returns the LZX position slot that corresponds to a given formatted offset. + * + * Logically, this returns the smallest i such that + * formatted_offset >= lzx_position_base[i]. + * + * The actual implementation below takes advantage of the regularity of the + * numbers in the lzx_position_base array to calculate the slot directly from + * the formatted offset without actually looking at the array. + */ +static inline unsigned +lzx_get_position_slot_raw(unsigned formatted_offset) +{ + if (formatted_offset >= 196608) { + return (formatted_offset >> 17) + 34; + } else { + LZX_ASSERT(2 <= formatted_offset && formatted_offset < 655360); + unsigned mssb_idx = bsr32(formatted_offset); + return (mssb_idx << 1) | + ((formatted_offset >> (mssb_idx - 1)) & 1); + } +} + +extern bool lzx_window_size_valid(size_t window_size); +extern unsigned lzx_get_num_main_syms(u32 window_size); + +#define LZX_NUM_RECENT_OFFSETS 3 + +/* Least-recently used queue for match offsets. */ +struct lzx_lru_queue { + u32 R[LZX_NUM_RECENT_OFFSETS]; }; +/* In the LZX format, an offset of n bytes is actually encoded + * as (n + LZX_OFFSET_OFFSET). */ +#define LZX_OFFSET_OFFSET (LZX_NUM_RECENT_OFFSETS - 1) + +/* Initialize the LZX least-recently-used match offset queue at the beginning of + * a new window for either decompression or compression. */ +static inline void +lzx_lru_queue_init(struct lzx_lru_queue *queue) +{ + for (unsigned i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) + queue->R[i] = 1; +} + #endif /* _WIMLIB_LZX_H */