X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzms-decompress.c;h=1ddc3f19dfdce40e9b0bc8e8833505654fd95595;hp=e64ce2db1c1667ff5e2e886a459c8756a19dfcbf;hb=500874ebbf2839d2c93db2b4094817c219946f29;hpb=2e6b4c5e5343353b1edaa695fff0ee4143e65af6 diff --git a/src/lzms-decompress.c b/src/lzms-decompress.c index e64ce2db..1ddc3f19 100644 --- a/src/lzms-decompress.c +++ b/src/lzms-decompress.c @@ -1,7 +1,5 @@ /* * lzms-decompress.c - * - * A decompressor for the LZMS compression format. */ /* @@ -37,7 +35,7 @@ * is a container format from which the locations and sizes (both compressed and * uncompressed) of the constituent blocks can be determined. * - * A LZMS-compressed block must be read in 16-bit little endian units from both + * An LZMS-compressed block must be read in 16-bit little endian units from both * directions. One logical bitstream starts at the front of the block and * proceeds forwards. Another logical bitstream starts at the end of the block * and proceeds backwards. Bits read from the forwards bitstream constitute @@ -46,7 +44,7 @@ * of the bits within the 16-bit coding units is such that the first bit is the * high-order bit and the last bit is the low-order bit. * - * From these two logical bitstreams, a LZMS decompressor can reconstitute the + * From these two logical bitstreams, an LZMS decompressor can reconstitute the * series of items that make up the LZMS data representation. Each such item * may be a literal byte or a match. Matches may be either traditional LZ77 * matches or "delta" matches, either of which can have its offset encoded @@ -56,7 +54,7 @@ * sequence of bytes beginning at the current position and extending for the * length is exactly equal to the equal-length sequence of bytes at the offset * back in the window. On the other hand, a delta match consists of a length, - * raw offset, and power. It asserts that the sequence of bytes of beginning at + * raw offset, and power. It asserts that the sequence of bytes beginning at * the current position and extending for the length is equal to the bytewise * sum of the two equal-length sequences of bytes (2**power) and (raw_offset * * 2**power) bytes before the current position, minus bytewise the sequence of @@ -67,7 +65,7 @@ * offset is 1, regardless of match length. * * For LZ matches, up to 3 repeat offsets are allowed, similar to some other - * LZ-based formats such as LZX and LZMA. They must updated in a LRU fashion, + * LZ-based formats such as LZX and LZMA. They must updated in an LRU fashion, * except for a quirk: updates to the queue must be delayed by one LZMS item, * except for the removal of a repeat match. As a result, 4 entries are * actually needed in the queue, even though it is only possible to decode @@ -88,17 +86,17 @@ * filled in with the next 16 bits from the forwards bitstream. * * To decode each bit, the range decoder requires a probability that is - * logically a real number between 0 and 1. Multiplying this - * probability by the current range and taking the floor gives the bound between - * the 0-bit region of the range and the 1-bit region of the range. However, in - * LZMS, probabilities are restricted to values of n/64 where n is an integer is + * logically a real number between 0 and 1. Multiplying this probability by the + * current range and taking the floor gives the bound between the 0-bit region + * of the range and the 1-bit region of the range. However, in LZMS, + * probabilities are restricted to values of n/64 where n is an integer is * between 1 and 63 inclusively, so the implementation may use integer * operations instead. Following calculation of the bound, if the current code * is in the 0-bit region, the new range becomes the current code and the * decoded bit is 0; otherwise, the bound must be subtracted from both the range * and the code, and the decoded bit is 1. More information about range coding - * can be found https://en.wikipedia.org/wiki/Range_encoding. Furthermore, note - * that the LZMA format also uses range coding and has public domain code + * can be found at https://en.wikipedia.org/wiki/Range_encoding. Furthermore, + * note that the LZMA format also uses range coding and has public domain code * available for it. * * The probability used to range-decode each bit must be taken from a table, of @@ -129,8 +127,8 @@ * bitstream. For this, there are 5 different Huffman codes used: * * - The literal code, used for decoding literal bytes. Each of the 256 - * symbols represents literal byte. This code must be rebuilt whenever 1024 - * symbols have been decoded with it. + * symbols represents a literal byte. This code must be rebuilt whenever + * 1024 symbols have been decoded with it. * * - The LZ offset code, used for decoding the offsets of standard LZ77 * matches. Each symbol represents a position slot, which corresponds to a @@ -172,11 +170,11 @@ * * Codewords in all the LZMS Huffman codes are limited to 15 bits. If the * canonical code for a given set of symbol frequencies has any codewords longer - * than 15 bits, all frequencies must be divided by 2, rounding up, and the code - * construction must be attempted again. + * than 15 bits, then all frequencies must be divided by 2, rounding up, and the + * code construction must be attempted again. * - * A LZMS-compressed block seemingly cannot have a size greater than or equal to - * the original uncompressed size. In such cases the block must be stored + * An LZMS-compressed block seemingly cannot have a compressed size greater than + * or equal to the uncompressed size. In such cases the block must be stored * uncompressed. * * After all LZMS items have been decoded, the data must be postprocessed to @@ -193,14 +191,13 @@ #endif #include "wimlib.h" -#include "wimlib/compress.h" -#include "wimlib/decompress.h" -#include "wimlib/error.h" +#include "wimlib/compress_common.h" +#include "wimlib/decompressor_ops.h" +#include "wimlib/decompress_common.h" #include "wimlib/lzms.h" #include "wimlib/util.h" #include -#include #define LZMS_DECODE_TABLE_BITS 10 @@ -235,7 +232,7 @@ struct lzms_input_bitstream { * at a time, this needs to be 64 bits rather than 32 bits. */ u64 bitbuf; - /* Number of bits in @bitbuf that are are used. */ + /* Number of bits in @bitbuf that are used. */ unsigned num_filled_bits; /* Pointer to the one past the next little-endian 16-bit integer in the @@ -247,22 +244,6 @@ struct lzms_input_bitstream { size_t num_le16_remaining; }; -/* Probability entry for use by the range decoder when in a specific state. */ -struct lzms_probability_entry { - - /* Number of zeroes in the most recent LZMS_PROBABILITY_MAX bits that - * have been decoded using this probability entry. This is a cached - * value because it can be computed as LZMS_PROBABILITY_MAX minus the - * Hamming weight of the low-order LZMS_PROBABILITY_MAX bits of - * @recent_bits. */ - u32 num_recent_zero_bits; - - /* The most recent LZMS_PROBABILITY_MAX bits that have been decoded - * using this probability entry. The size of this variable, in bits, - * must be at least LZMS_PROBABILITY_MAX. */ - u64 recent_bits; -}; - /* Structure used for range decoding. This wraps around `struct * lzms_range_decoder_raw' to use and maintain probability entries. */ struct lzms_range_decoder { @@ -302,6 +283,8 @@ struct lzms_huffman_decoder { * read using this decoder. */ const u32 *slot_base_tab; + const u8 *extra_bits_tab; + /* Number of symbols that have been read using this code far. Reset to * 0 whenever the code is rebuilt. */ u32 num_syms_read; @@ -321,7 +304,7 @@ struct lzms_huffman_decoder { u8 lens[LZMS_MAX_NUM_SYMS]; /* The codeword of each symbol in the Huffman code. */ - u16 codewords[LZMS_MAX_NUM_SYMS]; + u32 codewords[LZMS_MAX_NUM_SYMS]; /* A table for quickly decoding symbols encoded using the Huffman code. */ @@ -364,112 +347,13 @@ struct lzms_decompressor { struct lzms_huffman_decoder delta_power_decoder; struct lzms_huffman_decoder delta_offset_decoder; - /* LRU (least-recently-used) queue of LZ match offsets. */ - u64 recent_lz_offsets[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* LRU (least-recently-used) queue of delta match powers. */ - u32 recent_delta_powers[LZMS_NUM_RECENT_OFFSETS + 1]; + /* LRU (least-recently-used) queues for match information. */ + struct lzms_lru_queues lru; - /* LRU (least-recently-used) queue of delta match offsets. */ - u32 recent_delta_offsets[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* These variables are used to delay updates to the LRU queues by one - * decoded item. */ - u32 prev_lz_offset; - u32 prev_delta_power; - u32 prev_delta_offset; - u32 upcoming_lz_offset; - u32 upcoming_delta_power; - u32 upcoming_delta_offset; + /* Used for postprocessing. */ + s32 last_target_usages[65536]; }; -/* A table that maps position slots to their base values. These are constants - * computed at runtime by lzms_compute_slot_bases(). */ -static u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1]; - -/* A table that maps length slots to their base values. These are constants - * computed at runtime by lzms_compute_slot_bases(). */ -static u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1]; - -static void -lzms_decode_delta_rle_slot_bases(u32 slot_bases[], - const u8 delta_run_lens[], size_t num_run_lens) -{ - u32 delta = 1; - u32 base = 0; - size_t slot = 0; - for (size_t i = 0; i < num_run_lens; i++) { - u8 run_len = delta_run_lens[i]; - while (run_len--) { - base += delta; - slot_bases[slot++] = base; - } - delta <<= 1; - } -} - -/* Initialize the global position and length slot tables. */ -static void -lzms_compute_slot_bases(void) -{ - /* If an explicit formula that maps LZMS position and length slots to - * slot bases exists, then it could be used here. But until one is - * found, the following code fills in the slots using the observation - * that the increase from one slot base to the next is an increasing - * power of 2. Therefore, run-length encoding of the delta of adjacent - * entries can be used. */ - static const u8 position_slot_delta_run_lens[] = { - 9, 0, 9, 7, 10, 15, 15, 20, - 20, 30, 33, 40, 42, 45, 60, 73, - 80, 85, 95, 105, 6, - }; - - static const u8 length_slot_delta_run_lens[] = { - 27, 4, 6, 4, 5, 2, 1, 1, - 1, 1, 1, 0, 0, 0, 0, 0, - 1, - }; - - lzms_decode_delta_rle_slot_bases(lzms_position_slot_base, - position_slot_delta_run_lens, - ARRAY_LEN(position_slot_delta_run_lens)); - - lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS] = 0x7fffffff; - - lzms_decode_delta_rle_slot_bases(lzms_length_slot_base, - length_slot_delta_run_lens, - ARRAY_LEN(length_slot_delta_run_lens)); - - lzms_length_slot_base[LZMS_NUM_LEN_SYMS] = 0x400108ab; -} - -/* Initialize the global position length slot tables if not done so already. */ -static void -lzms_init_slot_bases(void) -{ - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - static bool already_computed = false; - - if (unlikely(!already_computed)) { - pthread_mutex_lock(&mutex); - if (!already_computed) { - lzms_compute_slot_bases(); - already_computed = true; - } - pthread_mutex_unlock(&mutex); - } -} - -/* Return the position slot for the specified offset. */ -static u32 -lzms_get_position_slot_raw(u32 offset) -{ - u32 position_slot = 0; - while (lzms_position_slot_base[position_slot + 1] <= offset) - position_slot++; - return position_slot; -} - /* Initialize the input bitstream @is to read forwards from the specified * compressed data buffer @in that is @in_limit 16-bit integers long. */ static void @@ -658,10 +542,9 @@ lzms_range_decode_bit(struct lzms_range_decoder *dec) static void lzms_rebuild_adaptive_huffman_code(struct lzms_huffman_decoder *dec) { - int ret; - /* XXX: This implementation that makes use of code already implemented - * for the XPRESS and LZX compression formats. However, since for the + /* XXX: This implementation makes use of code already implemented for + * the XPRESS and LZX compression formats. However, since for the * adaptive codes used in LZMS we don't actually need the explicit codes * themselves, only the decode tables, it may be possible to optimize * this by somehow directly building or updating the Huffman decode @@ -671,20 +554,25 @@ lzms_rebuild_adaptive_huffman_code(struct lzms_huffman_decoder *dec) dec->num_syms); make_canonical_huffman_code(dec->num_syms, LZMS_MAX_CODEWORD_LEN, dec->sym_freqs, dec->lens, dec->codewords); - ret = make_huffman_decode_table(dec->decode_table, dec->num_syms, - LZMS_DECODE_TABLE_BITS, dec->lens, - LZMS_MAX_CODEWORD_LEN); +#if defined(ENABLE_LZMS_DEBUG) + int ret = +#endif + make_huffman_decode_table(dec->decode_table, dec->num_syms, + LZMS_DECODE_TABLE_BITS, dec->lens, + LZMS_MAX_CODEWORD_LEN); LZMS_ASSERT(ret == 0); } /* Decode and return the next Huffman-encoded symbol from the LZMS-compressed * block using the specified Huffman decoder. */ static u32 -lzms_decode_huffman_symbol(struct lzms_huffman_decoder *dec) +lzms_huffman_decode_symbol(struct lzms_huffman_decoder *dec) { - const u8 *lens = dec->lens; const u16 *decode_table = dec->decode_table; struct lzms_input_bitstream *is = dec->is; + u16 entry; + u16 key_bits; + u16 sym; /* The Huffman codes used in LZMS are adaptive and must be rebuilt * whenever a certain number of symbols have been read. Each such @@ -702,28 +590,31 @@ lzms_decode_huffman_symbol(struct lzms_huffman_decoder *dec) dec->num_syms_read = 0; } - /* In the following Huffman decoding implementation, the first - * LZMS_DECODE_TABLE_BITS of the input are used as an offset into a - * decode table. The entry will either provide the decoded symbol - * directly, or else a "real" Huffman binary tree will be searched to - * decode the symbol. */ - + /* XXX: Copied from read_huffsym() (decompress_common.h), since this + * uses a different input bitstream type. Should unify the + * implementations. */ lzms_input_bitstream_ensure_bits(is, LZMS_MAX_CODEWORD_LEN); - u16 key_bits = lzms_input_bitstream_peek_bits(is, LZMS_DECODE_TABLE_BITS); - u16 sym = decode_table[key_bits]; - - if (sym < dec->num_syms) { - /* Fast case: The decode table directly provided the symbol. */ - lzms_input_bitstream_remove_bits(is, lens[sym]); + /* Index the decode table by the next table_bits bits of the input. */ + key_bits = lzms_input_bitstream_peek_bits(is, LZMS_DECODE_TABLE_BITS); + entry = decode_table[key_bits]; + if (likely(entry < 0xC000)) { + /* Fast case: The decode table directly provided the symbol and + * codeword length. The low 11 bits are the symbol, and the + * high 5 bits are the codeword length. */ + lzms_input_bitstream_remove_bits(is, entry >> 11); + sym = entry & 0x7FF; } else { - /* Slow case: The symbol took too many bits to include directly - * in the decode table, so search for it in a binary tree at the - * end of the decode table. */ + /* Slow case: The codeword for the symbol is longer than + * table_bits, so the symbol does not have an entry directly in + * the first (1 << table_bits) entries of the decode table. + * Traverse the appropriate binary tree bit-by-bit in order to + * decode the symbol. */ lzms_input_bitstream_remove_bits(is, LZMS_DECODE_TABLE_BITS); do { - key_bits = sym + lzms_input_bitstream_pop_bits(is, 1); - } while ((sym = decode_table[key_bits]) >= dec->num_syms); + key_bits = (entry & 0x3FFF) + lzms_input_bitstream_pop_bits(is, 1); + } while ((entry = decode_table[key_bits]) >= 0xC000); + sym = entry; } /* Tally and return the decoded symbol. */ @@ -743,19 +634,19 @@ lzms_decode_value(struct lzms_huffman_decoder *dec) unsigned num_extra_bits; u32 extra_bits; + LZMS_ASSERT(dec->slot_base_tab != NULL); + LZMS_ASSERT(dec->extra_bits_tab != NULL); + /* Read the slot (position slot, length slot, etc.), which is encoded as * a Huffman symbol. */ - slot = lzms_decode_huffman_symbol(dec); - - LZMS_ASSERT(dec->slot_base_tab != NULL); + slot = lzms_huffman_decode_symbol(dec); /* Get the number of extra bits needed to represent the range of values * that share the slot. */ - num_extra_bits = bsr32(dec->slot_base_tab[slot + 1] - - dec->slot_base_tab[slot]); + num_extra_bits = dec->extra_bits_tab[slot]; - /* Read the number of extra bits and add them to the slot to form the - * final decoded value. */ + /* Read the number of extra bits and add them to the slot base to form + * the final decoded value. */ extra_bits = lzms_input_bitstream_read_bits(dec->is, num_extra_bits); return dec->slot_base_tab[slot] + extra_bits; } @@ -768,7 +659,7 @@ lzms_copy_literal(struct lzms_decompressor *ctx, u8 literal) return 0; } -/* Validate a LZ match and copy it to the output buffer. */ +/* Validate an LZ match and copy it to the output buffer. */ static int lzms_copy_lz_match(struct lzms_decompressor *ctx, u32 length, u32 offset) { @@ -849,17 +740,17 @@ lzms_decode_lz_match(struct lzms_decompressor *ctx) if (!lzms_range_decode_bit(&ctx->lz_repeat_match_range_decoders[i])) break; - offset = ctx->recent_lz_offsets[i]; + offset = ctx->lru.lz.recent_offsets[i]; for (; i < LZMS_NUM_RECENT_OFFSETS; i++) - ctx->recent_lz_offsets[i] = ctx->recent_lz_offsets[i + 1]; + ctx->lru.lz.recent_offsets[i] = ctx->lru.lz.recent_offsets[i + 1]; } /* Decode match length, which is always given explicitly (there is no * LRU queue for repeat lengths). */ length = lzms_decode_value(&ctx->length_decoder); - ctx->upcoming_lz_offset = offset; + ctx->lru.lz.upcoming_offset = offset; LZMS_DEBUG("Decoded %s LZ match: length=%u, offset=%u", (bit ? "repeat" : "explicit"), length, offset); @@ -880,7 +771,7 @@ lzms_decode_delta_match(struct lzms_decompressor *ctx) bit = lzms_range_decode_bit(&ctx->delta_match_range_decoder); if (bit == 0) { - power = lzms_decode_huffman_symbol(&ctx->delta_power_decoder); + power = lzms_huffman_decode_symbol(&ctx->delta_power_decoder); raw_offset = lzms_decode_value(&ctx->delta_offset_decoder); } else { int i; @@ -889,19 +780,19 @@ lzms_decode_delta_match(struct lzms_decompressor *ctx) if (!lzms_range_decode_bit(&ctx->delta_repeat_match_range_decoders[i])) break; - power = ctx->recent_delta_powers[i]; - raw_offset = ctx->recent_delta_offsets[i]; + power = ctx->lru.delta.recent_powers[i]; + raw_offset = ctx->lru.delta.recent_offsets[i]; for (; i < LZMS_NUM_RECENT_OFFSETS; i++) { - ctx->recent_delta_powers[i] = ctx->recent_delta_powers[i + 1]; - ctx->recent_delta_offsets[i] = ctx->recent_delta_offsets[i + 1]; + ctx->lru.delta.recent_powers[i] = ctx->lru.delta.recent_powers[i + 1]; + ctx->lru.delta.recent_offsets[i] = ctx->lru.delta.recent_offsets[i + 1]; } } length = lzms_decode_value(&ctx->length_decoder); - ctx->upcoming_delta_power = power; - ctx->upcoming_delta_offset = raw_offset; + ctx->lru.delta.upcoming_power = power; + ctx->lru.delta.upcoming_offset = raw_offset; LZMS_DEBUG("Decoded %s delta match: length=%u, power=%u, raw_offset=%u", (bit ? "repeat" : "explicit"), length, power, raw_offset); @@ -910,6 +801,7 @@ lzms_decode_delta_match(struct lzms_decompressor *ctx) return lzms_copy_delta_match(ctx, length, power, raw_offset); } +/* Decode an LZ or delta match. */ static int lzms_decode_match(struct lzms_decompressor *ctx) { @@ -923,7 +815,7 @@ lzms_decode_match(struct lzms_decompressor *ctx) static int lzms_decode_literal(struct lzms_decompressor *ctx) { - u8 literal = lzms_decode_huffman_symbol(&ctx->literal_decoder); + u8 literal = lzms_huffman_decode_symbol(&ctx->literal_decoder); LZMS_DEBUG("Decoded literal: 0x%02x", literal); return lzms_copy_literal(ctx, literal); } @@ -934,9 +826,9 @@ lzms_decode_item(struct lzms_decompressor *ctx) { int ret; - ctx->upcoming_delta_offset = 0; - ctx->upcoming_lz_offset = 0; - ctx->upcoming_delta_power = 0; + ctx->lru.lz.upcoming_offset = 0; + ctx->lru.delta.upcoming_power = 0; + ctx->lru.delta.upcoming_offset = 0; if (lzms_range_decode_bit(&ctx->main_range_decoder)) ret = lzms_decode_match(ctx); @@ -946,26 +838,7 @@ lzms_decode_item(struct lzms_decompressor *ctx) if (ret) return ret; - /* Update LRU queues */ - if (ctx->prev_lz_offset != 0) { - for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) - ctx->recent_lz_offsets[i + 1] = ctx->recent_lz_offsets[i]; - ctx->recent_lz_offsets[0] = ctx->prev_lz_offset; - - } - - if (ctx->prev_delta_offset != 0) { - for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) { - ctx->recent_delta_powers[i + 1] = ctx->recent_delta_powers[i]; - ctx->recent_delta_offsets[i + 1] = ctx->recent_delta_offsets[i]; - } - ctx->recent_delta_powers[0] = ctx->prev_delta_power; - ctx->recent_delta_offsets[0] = ctx->prev_delta_offset; - } - - ctx->prev_lz_offset = ctx->upcoming_lz_offset; - ctx->prev_delta_offset = ctx->upcoming_delta_offset; - ctx->prev_delta_power = ctx->upcoming_delta_power; + lzms_update_lru_queues(&ctx->lru); return 0; } @@ -985,11 +858,14 @@ lzms_init_range_decoder(struct lzms_range_decoder *dec, static void lzms_init_huffman_decoder(struct lzms_huffman_decoder *dec, struct lzms_input_bitstream *is, - const u32 *slot_base_tab, unsigned num_syms, + const u32 *slot_base_tab, + const u8 *extra_bits_tab, + unsigned num_syms, unsigned rebuild_freq) { dec->is = is; dec->slot_base_tab = slot_base_tab; + dec->extra_bits_tab = extra_bits_tab; dec->num_syms = num_syms; dec->num_syms_read = rebuild_freq; dec->rebuild_freq = rebuild_freq; @@ -997,7 +873,7 @@ lzms_init_huffman_decoder(struct lzms_huffman_decoder *dec, dec->sym_freqs[i] = 1; } -/* Prepare to decode items from a LZMS-compressed block. */ +/* Prepare to decode items from an LZMS-compressed block. */ static void lzms_init_decompressor(struct lzms_decompressor *ctx, const void *cdata, unsigned clen, @@ -1019,46 +895,43 @@ lzms_init_decompressor(struct lzms_decompressor *ctx, * backwards) */ lzms_input_bitstream_init(&ctx->is, cdata, clen / 2); - /* Initialize position and length slot bases if not done already. */ - lzms_init_slot_bases(); - - /* Like in other compression formats such as LZX and DEFLATE, match - * offsets in LZMS are represented as a position slot, which corresponds - * to a fixed lesser or equal match offset, followed by a - * position-slot-dependent number of extra bits that gives an additional - * offset from that position slot. Because the full number of position - * slots may exceed the length of the compressed block, here we - * calculate the number of position slots that will actually be used in - * the compressed representation. */ - num_position_slots = lzms_get_position_slot_raw(ulen - 1) + 1; + /* Calculate the number of position slots needed for this compressed + * block. */ + num_position_slots = lzms_get_position_slot(ulen - 1) + 1; LZMS_DEBUG("Using %u position slots", num_position_slots); /* Initialize Huffman decoders for each alphabet used in the compressed * representation. */ lzms_init_huffman_decoder(&ctx->literal_decoder, &ctx->is, - NULL, LZMS_NUM_LITERAL_SYMS, + NULL, NULL, LZMS_NUM_LITERAL_SYMS, LZMS_LITERAL_CODE_REBUILD_FREQ); lzms_init_huffman_decoder(&ctx->lz_offset_decoder, &ctx->is, - lzms_position_slot_base, num_position_slots, + lzms_position_slot_base, + lzms_extra_position_bits, + num_position_slots, LZMS_LZ_OFFSET_CODE_REBUILD_FREQ); lzms_init_huffman_decoder(&ctx->length_decoder, &ctx->is, - lzms_length_slot_base, LZMS_NUM_LEN_SYMS, + lzms_length_slot_base, + lzms_extra_length_bits, + LZMS_NUM_LEN_SYMS, LZMS_LENGTH_CODE_REBUILD_FREQ); lzms_init_huffman_decoder(&ctx->delta_offset_decoder, &ctx->is, - lzms_position_slot_base, num_position_slots, + lzms_position_slot_base, + lzms_extra_position_bits, + num_position_slots, LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ); lzms_init_huffman_decoder(&ctx->delta_power_decoder, &ctx->is, - NULL, LZMS_NUM_DELTA_POWER_SYMS, + NULL, NULL, LZMS_NUM_DELTA_POWER_SYMS, LZMS_DELTA_POWER_CODE_REBUILD_FREQ); - /* Initialize range decoders (all of which wrap around the same - * lzms_range_decoder_raw). */ + /* Initialize range decoders, all of which wrap around the same + * lzms_range_decoder_raw. */ lzms_init_range_decoder(&ctx->main_range_decoder, &ctx->rd, LZMS_NUM_MAIN_STATES); @@ -1079,21 +952,8 @@ lzms_init_decompressor(struct lzms_decompressor *ctx, lzms_init_range_decoder(&ctx->delta_repeat_match_range_decoders[i], &ctx->rd, LZMS_NUM_DELTA_REPEAT_MATCH_STATES); - - /* Initialize the LRU queue for recent match offsets. */ - for (size_t i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) - ctx->recent_lz_offsets[i] = i + 1; - - for (size_t i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) { - ctx->recent_delta_powers[i] = 0; - ctx->recent_delta_offsets[i] = i + 1; - } - ctx->prev_lz_offset = 0; - ctx->prev_delta_offset = 0; - ctx->prev_delta_power = 0; - ctx->upcoming_lz_offset = 0; - ctx->upcoming_delta_offset = 0; - ctx->upcoming_delta_power = 0; + /* Initialize LRU match information. */ + lzms_init_lru_queues(&ctx->lru); LZMS_DEBUG("Decompressor successfully initialized"); } @@ -1101,193 +961,101 @@ lzms_init_decompressor(struct lzms_decompressor *ctx, /* Decode the series of literals and matches from the LZMS-compressed data. * Returns 0 on success; nonzero if the compressed data is invalid. */ static int -lzms_decode_items(const u8 *cdata, size_t clen, u8 *ubuf, size_t ulen) +lzms_decode_items(const u8 *cdata, size_t clen, u8 *ubuf, size_t ulen, + struct lzms_decompressor *ctx) { - /* XXX: The context could be allocated on the heap. */ - struct lzms_decompressor ctx; - /* Initialize the LZMS decompressor. */ - lzms_init_decompressor(&ctx, cdata, clen, ubuf, ulen); + lzms_init_decompressor(ctx, cdata, clen, ubuf, ulen); /* Decode the sequence of items. */ - while (ctx.out_next != ctx.out_end) { - LZMS_DEBUG("Position %u", ctx.out_next - ctx.out_begin); - if (lzms_decode_item(&ctx)) + while (ctx->out_next != ctx->out_end) { + LZMS_DEBUG("Position %u", ctx->out_next - ctx->out_begin); + if (lzms_decode_item(ctx)) return -1; } return 0; } -static s32 -lzms_try_x86_translation(u8 *ubuf, s32 i, s32 num_op_bytes, - s32 *closest_target_usage_p, s32 last_target_usages[], - s32 max_trans_offset) -{ - u16 pos; - - if (i - *closest_target_usage_p <= max_trans_offset) { - LZMS_DEBUG("Performed x86 translation at position %d " - "(opcode 0x%02x)", i, ubuf[i]); - le32 *p32 = (le32*)&ubuf[i + num_op_bytes]; - u32 n = le32_to_cpu(*p32); - *p32 = cpu_to_le32(n - i); - } - - pos = i + le16_to_cpu(*(const le16*)&ubuf[i + num_op_bytes]); - - i += num_op_bytes + sizeof(le32) - 1; - - if (i - last_target_usages[pos] <= LZMS_X86_MAX_GOOD_TARGET_OFFSET) - *closest_target_usage_p = i; - - last_target_usages[pos] = i; - - return i + 1; -} - -static s32 -lzms_process_x86_translation(u8 *ubuf, s32 i, s32 *closest_target_usage_p, - s32 last_target_usages[]) -{ - /* Switch on first byte of the opcode, assuming it is really an x86 - * instruction. */ - switch (ubuf[i]) { - case 0x48: - if (ubuf[i + 1] == 0x8b) { - if (ubuf[i + 2] == 0x5 || ubuf[i + 2] == 0xd) { - /* Load relative (x86_64) */ - return lzms_try_x86_translation(ubuf, i, 3, - closest_target_usage_p, - last_target_usages, - LZMS_X86_MAX_TRANSLATION_OFFSET); - } - } else if (ubuf[i + 1] == 0x8d) { - if ((ubuf[i + 2] & 0x7) == 0x5) { - /* Load effective address relative (x86_64) */ - return lzms_try_x86_translation(ubuf, i, 3, - closest_target_usage_p, - last_target_usages, - LZMS_X86_MAX_TRANSLATION_OFFSET); - } - } - break; - - case 0x4c: - if (ubuf[i + 1] == 0x8d) { - if ((ubuf[i + 2] & 0x7) == 0x5) { - /* Load effective address relative (x86_64) */ - return lzms_try_x86_translation(ubuf, i, 3, - closest_target_usage_p, - last_target_usages, - LZMS_X86_MAX_TRANSLATION_OFFSET); - } - } - break; - - case 0xe8: - /* Call relative */ - return lzms_try_x86_translation(ubuf, i, 1, closest_target_usage_p, - last_target_usages, - LZMS_X86_MAX_TRANSLATION_OFFSET / 2); - - case 0xe9: - /* Jump relative */ - return i + 5; - - case 0xf0: - if (ubuf[i + 1] == 0x83 && ubuf[i + 2] == 0x05) { - /* Lock add relative */ - return lzms_try_x86_translation(ubuf, i, 3, - closest_target_usage_p, - last_target_usages, - LZMS_X86_MAX_TRANSLATION_OFFSET); - } - break; - - case 0xff: - if (ubuf[i + 1] == 0x15) { - /* Call indirect */ - return lzms_try_x86_translation(ubuf, i, 2, - closest_target_usage_p, - last_target_usages, - LZMS_X86_MAX_TRANSLATION_OFFSET); - } - break; - } - return i + 1; -} - -/* Postprocess the uncompressed data by undoing the translation of relative - * addresses embedded in x86 instructions into absolute addresses. - * - * There does not appear to be any way to check to see if this postprocessing - * actually needs to be done (or to plug in alternate filters, like in LZMA), - * and the corresponding preprocessing seems to be done unconditionally. */ -static void -lzms_postprocess_data(u8 *ubuf, s32 ulen) +static int +lzms_decompress(const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size, void *_ctx) { - /* Offset (from beginning of buffer) of the most recent reference to a - * seemingly valid target address. */ - s32 closest_target_usage = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1; - - /* Offset (from beginning of buffer) of the most recently used target - * address beginning with two bytes equal to the array index. - * - * XXX: This array could be allocated on the heap. */ - s32 last_target_usages[65536]; - for (s32 i = 0; i < 65536; i++) - last_target_usages[i] = -LZMS_X86_MAX_GOOD_TARGET_OFFSET - 1; - - /* Check each byte in the buffer for an x86 opcode for which a - * translation may be possible. No translations are done on any - * instructions starting in the last 11 bytes of the buffer. */ - for (s32 i = 0; i < ulen - 11; ) - i = lzms_process_x86_translation(ubuf, i, &closest_target_usage, - last_target_usages); -} + struct lzms_decompressor *ctx = _ctx; -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_lzms_decompress(const void *cdata, unsigned clen, - void *ubuf, unsigned ulen) -{ /* The range decoder requires that a minimum of 4 bytes of compressed * data be initially available. */ - if (clen < 4) { - LZMS_DEBUG("Compressed length too small (got %u, expected >= 4)", - clen); + if (compressed_size < 4) { + LZMS_DEBUG("Compressed size too small (got %zu, expected >= 4)", + compressed_size); return -1; } - /* A LZMS-compressed data block should be evenly divisible into 16-bit + /* An LZMS-compressed data block should be evenly divisible into 16-bit * integers. */ - if (clen % 2 != 0) { - LZMS_DEBUG("Compressed length not divisible by 2 (got %u)", clen); + if (compressed_size % 2 != 0) { + LZMS_DEBUG("Compressed size not divisible by 2 (got %zu)", + compressed_size); return -1; } /* Handle the trivial case where nothing needs to be decompressed. * (Necessary because a window of size 0 does not have a valid position * slot.) */ - if (ulen == 0) + if (uncompressed_size == 0) return 0; /* The x86 post-processor requires that the uncompressed length fit into * a signed 32-bit integer. Also, the position slot table cannot be * searched for a position of INT32_MAX or greater. */ - if (ulen >= INT32_MAX) { + if (uncompressed_size >= INT32_MAX) { LZMS_DEBUG("Uncompressed length too large " - "(got %u, expected < INT32_MAX)", ulen); + "(got %zu, expected < INT32_MAX)", + uncompressed_size); return -1; } /* Decode the literals and matches. */ - if (lzms_decode_items(cdata, clen, ubuf, ulen)) + if (lzms_decode_items(compressed_data, compressed_size, + uncompressed_data, uncompressed_size, ctx)) return -1; /* Postprocess the data. */ - lzms_postprocess_data(ubuf, ulen); + lzms_x86_filter(uncompressed_data, uncompressed_size, + ctx->last_target_usages, true); LZMS_DEBUG("Decompression successful."); return 0; } + +static void +lzms_free_decompressor(void *_ctx) +{ + struct lzms_decompressor *ctx = _ctx; + + ALIGNED_FREE(ctx); +} + +static int +lzms_create_decompressor(size_t max_block_size, + const struct wimlib_decompressor_params_header *params, + void **ctx_ret) +{ + struct lzms_decompressor *ctx; + + ctx = ALIGNED_MALLOC(sizeof(struct lzms_decompressor), + DECODE_TABLE_ALIGNMENT); + if (ctx == NULL) + return WIMLIB_ERR_NOMEM; + + /* Initialize position and length slot data if not done already. */ + lzms_init_slots(); + + *ctx_ret = ctx; + return 0; +} + +const struct decompressor_ops lzms_decompressor_ops = { + .create_decompressor = lzms_create_decompressor, + .decompress = lzms_decompress, + .free_decompressor = lzms_free_decompressor, +};