+ unsigned slot;
+ unsigned num_extra_bits;
+ u32 extra_bits;
+
+ /* Read the slot (position slot, length slot, etc.), which is encoded as
+ * a Huffman symbol. */
+ slot = lzms_decode_huffman_symbol(dec);
+
+ LZMS_ASSERT(dec->slot_base_tab != NULL);
+
+ /* Get the number of extra bits needed to represent the range of values
+ * that share the slot. */
+ num_extra_bits = bsr32(dec->slot_base_tab[slot + 1] -
+ dec->slot_base_tab[slot]);
+
+ /* Read the number of extra bits and add them to the slot to form the
+ * final decoded value. */
+ extra_bits = lzms_input_bitstream_read_bits(dec->is, num_extra_bits);
+ return dec->slot_base_tab[slot] + extra_bits;
+}
+
+/* Copy a literal to the output buffer. */
+static int
+lzms_copy_literal(struct lzms_decompressor *ctx, u8 literal)
+{
+ *ctx->out_next++ = literal;
+ return 0;
+}
+
+/* Validate an LZ match and copy it to the output buffer. */
+static int
+lzms_copy_lz_match(struct lzms_decompressor *ctx, u32 length, u32 offset)
+{
+ u8 *out_next;
+ u8 *matchptr;
+
+ if (length > ctx->out_end - ctx->out_next) {
+ LZMS_DEBUG("Match overrun!");
+ return -1;
+ }
+ if (offset > ctx->out_next - ctx->out_begin) {
+ LZMS_DEBUG("Match underrun!");
+ return -1;
+ }
+
+ out_next = ctx->out_next;
+ matchptr = out_next - offset;
+ while (length--)
+ *out_next++ = *matchptr++;
+
+ ctx->out_next = out_next;
+ return 0;
+}
+
+/* Validate a delta match and copy it to the output buffer. */
+static int
+lzms_copy_delta_match(struct lzms_decompressor *ctx, u32 length,
+ u32 power, u32 raw_offset)
+{
+ u32 offset1 = 1U << power;
+ u32 offset2 = raw_offset << power;
+ u32 offset = offset1 + offset2;
+ u8 *out_next;
+ u8 *matchptr1;
+ u8 *matchptr2;
+ u8 *matchptr;
+
+ if (length > ctx->out_end - ctx->out_next) {
+ LZMS_DEBUG("Match overrun!");
+ return -1;
+ }
+ if (offset > ctx->out_next - ctx->out_begin) {
+ LZMS_DEBUG("Match underrun!");
+ return -1;
+ }
+
+ out_next = ctx->out_next;
+ matchptr1 = out_next - offset1;
+ matchptr2 = out_next - offset2;
+ matchptr = out_next - offset;
+
+ while (length--)
+ *out_next++ = *matchptr1++ + *matchptr2++ - *matchptr++;
+
+ ctx->out_next = out_next;
+ return 0;
+}
+
+/* Decode a (length, offset) pair from the input. */
+static int
+lzms_decode_lz_match(struct lzms_decompressor *ctx)
+{
+ int bit;
+ u32 length, offset;
+
+ /* Decode the match offset. The next range-encoded bit indicates
+ * whether it's a repeat offset or an explicit offset. */
+
+ bit = lzms_range_decode_bit(&ctx->lz_match_range_decoder);
+ if (bit == 0) {
+ /* Explicit offset. */
+ offset = lzms_decode_value(&ctx->lz_offset_decoder);
+ } else {
+ /* Repeat offset. */
+ int i;
+
+ for (i = 0; i < LZMS_NUM_RECENT_OFFSETS - 1; i++)
+ if (!lzms_range_decode_bit(&ctx->lz_repeat_match_range_decoders[i]))
+ break;
+
+ offset = ctx->recent_lz_offsets[i];
+
+ for (; i < LZMS_NUM_RECENT_OFFSETS; i++)
+ ctx->recent_lz_offsets[i] = ctx->recent_lz_offsets[i + 1];
+ }
+
+ /* Decode match length, which is always given explicitly (there is no
+ * LRU queue for repeat lengths). */
+ length = lzms_decode_value(&ctx->length_decoder);
+
+ ctx->upcoming_lz_offset = offset;
+
+ LZMS_DEBUG("Decoded %s LZ match: length=%u, offset=%u",
+ (bit ? "repeat" : "explicit"), length, offset);
+
+ /* Validate the match and copy it to the output. */
+ return lzms_copy_lz_match(ctx, length, offset);
+}
+
+/* Decodes a "delta" match from the input. */
+static int
+lzms_decode_delta_match(struct lzms_decompressor *ctx)
+{
+ int bit;
+ u32 length, power, raw_offset;
+
+ /* Decode the match power and raw offset. The next range-encoded bit
+ * indicates whether these data are a repeat, or given explicitly. */
+
+ bit = lzms_range_decode_bit(&ctx->delta_match_range_decoder);
+ if (bit == 0) {
+ power = lzms_decode_huffman_symbol(&ctx->delta_power_decoder);
+ raw_offset = lzms_decode_value(&ctx->delta_offset_decoder);
+ } else {
+ int i;
+
+ for (i = 0; i < LZMS_NUM_RECENT_OFFSETS - 1; i++)
+ if (!lzms_range_decode_bit(&ctx->delta_repeat_match_range_decoders[i]))
+ break;
+
+ power = ctx->recent_delta_powers[i];
+ raw_offset = ctx->recent_delta_offsets[i];
+
+ for (; i < LZMS_NUM_RECENT_OFFSETS; i++) {
+ ctx->recent_delta_powers[i] = ctx->recent_delta_powers[i + 1];
+ ctx->recent_delta_offsets[i] = ctx->recent_delta_offsets[i + 1];
+ }
+ }
+
+ length = lzms_decode_value(&ctx->length_decoder);
+
+ ctx->upcoming_delta_power = power;
+ ctx->upcoming_delta_offset = raw_offset;
+
+ LZMS_DEBUG("Decoded %s delta match: length=%u, power=%u, raw_offset=%u",
+ (bit ? "repeat" : "explicit"), length, power, raw_offset);
+
+ /* Validate the match and copy it to the output. */
+ return lzms_copy_delta_match(ctx, length, power, raw_offset);
+}
+
+static int
+lzms_decode_match(struct lzms_decompressor *ctx)
+{
+ if (!lzms_range_decode_bit(&ctx->match_range_decoder))
+ return lzms_decode_lz_match(ctx);
+ else
+ return lzms_decode_delta_match(ctx);
+}
+
+/* Decode a literal byte encoded using the literal Huffman code. */
+static int
+lzms_decode_literal(struct lzms_decompressor *ctx)
+{
+ u8 literal = lzms_decode_huffman_symbol(&ctx->literal_decoder);
+ LZMS_DEBUG("Decoded literal: 0x%02x", literal);
+ return lzms_copy_literal(ctx, literal);
+}
+
+/* Decode the next LZMS match or literal. */
+static int
+lzms_decode_item(struct lzms_decompressor *ctx)
+{
+ int ret;
+
+ ctx->upcoming_delta_offset = 0;
+ ctx->upcoming_lz_offset = 0;
+ ctx->upcoming_delta_power = 0;
+
+ if (lzms_range_decode_bit(&ctx->main_range_decoder))
+ ret = lzms_decode_match(ctx);
+ else
+ ret = lzms_decode_literal(ctx);
+
+ if (ret)
+ return ret;
+
+ /* Update LRU queues */
+ if (ctx->prev_lz_offset != 0) {
+ for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--)
+ ctx->recent_lz_offsets[i + 1] = ctx->recent_lz_offsets[i];
+ ctx->recent_lz_offsets[0] = ctx->prev_lz_offset;
+ }
+
+ if (ctx->prev_delta_offset != 0) {
+ for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) {
+ ctx->recent_delta_powers[i + 1] = ctx->recent_delta_powers[i];
+ ctx->recent_delta_offsets[i + 1] = ctx->recent_delta_offsets[i];
+ }
+ ctx->recent_delta_powers[0] = ctx->prev_delta_power;
+ ctx->recent_delta_offsets[0] = ctx->prev_delta_offset;
+ }
+
+ ctx->prev_lz_offset = ctx->upcoming_lz_offset;
+ ctx->prev_delta_offset = ctx->upcoming_delta_offset;
+ ctx->prev_delta_power = ctx->upcoming_delta_power;
+ return 0;
+}
+
+static void
+lzms_init_range_decoder(struct lzms_range_decoder *dec,
+ struct lzms_range_decoder_raw *rd, u32 num_states)
+{
+ dec->rd = rd;
+ dec->state = 0;
+ dec->mask = num_states - 1;
+ for (u32 i = 0; i < num_states; i++) {
+ dec->prob_entries[i].num_recent_zero_bits = LZMS_INITIAL_PROBABILITY;
+ dec->prob_entries[i].recent_bits = LZMS_INITIAL_RECENT_BITS;
+ }
+}
+
+static void
+lzms_init_huffman_decoder(struct lzms_huffman_decoder *dec,
+ struct lzms_input_bitstream *is,
+ const u32 *slot_base_tab, unsigned num_syms,
+ unsigned rebuild_freq)
+{
+ dec->is = is;
+ dec->slot_base_tab = slot_base_tab;
+ dec->num_syms = num_syms;
+ dec->num_syms_read = rebuild_freq;
+ dec->rebuild_freq = rebuild_freq;
+ for (unsigned i = 0; i < num_syms; i++)
+ dec->sym_freqs[i] = 1;
+}
+
+/* Prepare to decode items from an LZMS-compressed block. */
+static void
+lzms_init_decompressor(struct lzms_decompressor *ctx,
+ const void *cdata, unsigned clen,
+ void *ubuf, unsigned ulen)
+{
+ unsigned num_position_slots;
+
+ LZMS_DEBUG("Initializing decompressor (clen=%u, ulen=%u)", clen, ulen);
+
+ /* Initialize output pointers. */
+ ctx->out_begin = ubuf;
+ ctx->out_next = ubuf;
+ ctx->out_end = (u8*)ubuf + ulen;
+
+ /* Initialize the raw range decoder (reading forwards). */
+ lzms_range_decoder_raw_init(&ctx->rd, cdata, clen / 2);
+
+ /* Initialize the input bitstream for Huffman symbols (reading
+ * backwards) */
+ lzms_input_bitstream_init(&ctx->is, cdata, clen / 2);
+
+ /* Initialize position and length slot bases if not done already. */
+ lzms_init_slot_bases();
+
+ /* Calculate the number of position slots needed for this compressed
+ * block. */
+ num_position_slots = lzms_get_position_slot_raw(ulen - 1) + 1;
+
+ LZMS_DEBUG("Using %u position slots", num_position_slots);
+
+ /* Initialize Huffman decoders for each alphabet used in the compressed
+ * representation. */
+ lzms_init_huffman_decoder(&ctx->literal_decoder, &ctx->is,
+ NULL, LZMS_NUM_LITERAL_SYMS,
+ LZMS_LITERAL_CODE_REBUILD_FREQ);
+
+ lzms_init_huffman_decoder(&ctx->lz_offset_decoder, &ctx->is,
+ lzms_position_slot_base, num_position_slots,
+ LZMS_LZ_OFFSET_CODE_REBUILD_FREQ);
+
+ lzms_init_huffman_decoder(&ctx->length_decoder, &ctx->is,
+ lzms_length_slot_base, LZMS_NUM_LEN_SYMS,
+ LZMS_LENGTH_CODE_REBUILD_FREQ);
+
+ lzms_init_huffman_decoder(&ctx->delta_offset_decoder, &ctx->is,
+ lzms_position_slot_base, num_position_slots,
+ LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ);
+
+ lzms_init_huffman_decoder(&ctx->delta_power_decoder, &ctx->is,
+ NULL, LZMS_NUM_DELTA_POWER_SYMS,
+ LZMS_DELTA_POWER_CODE_REBUILD_FREQ);
+
+
+ /* Initialize range decoders, all of which wrap around the same
+ * lzms_range_decoder_raw. */
+ lzms_init_range_decoder(&ctx->main_range_decoder,
+ &ctx->rd, LZMS_NUM_MAIN_STATES);
+
+ lzms_init_range_decoder(&ctx->match_range_decoder,
+ &ctx->rd, LZMS_NUM_MATCH_STATES);
+
+ lzms_init_range_decoder(&ctx->lz_match_range_decoder,
+ &ctx->rd, LZMS_NUM_LZ_MATCH_STATES);
+
+ for (size_t i = 0; i < ARRAY_LEN(ctx->lz_repeat_match_range_decoders); i++)
+ lzms_init_range_decoder(&ctx->lz_repeat_match_range_decoders[i],
+ &ctx->rd, LZMS_NUM_LZ_REPEAT_MATCH_STATES);
+
+ lzms_init_range_decoder(&ctx->delta_match_range_decoder,
+ &ctx->rd, LZMS_NUM_DELTA_MATCH_STATES);
+
+ for (size_t i = 0; i < ARRAY_LEN(ctx->delta_repeat_match_range_decoders); i++)
+ lzms_init_range_decoder(&ctx->delta_repeat_match_range_decoders[i],
+ &ctx->rd, LZMS_NUM_DELTA_REPEAT_MATCH_STATES);
+
+ /* Initialize the LRU queue for recent match offsets. */
+ for (size_t i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++)
+ ctx->recent_lz_offsets[i] = i + 1;
+
+ for (size_t i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) {
+ ctx->recent_delta_powers[i] = 0;
+ ctx->recent_delta_offsets[i] = i + 1;
+ }
+ ctx->prev_lz_offset = 0;
+ ctx->prev_delta_offset = 0;
+ ctx->prev_delta_power = 0;
+ ctx->upcoming_lz_offset = 0;
+ ctx->upcoming_delta_offset = 0;
+ ctx->upcoming_delta_power = 0;
+
+ LZMS_DEBUG("Decompressor successfully initialized");
+}
+
+/* Decode the series of literals and matches from the LZMS-compressed data.
+ * Returns 0 on success; nonzero if the compressed data is invalid. */
+static int
+lzms_decode_items(const u8 *cdata, size_t clen, u8 *ubuf, size_t ulen)
+{
+ /* XXX: The context could be allocated on the heap. */
+ struct lzms_decompressor ctx;
+
+ /* Initialize the LZMS decompressor. */
+ lzms_init_decompressor(&ctx, cdata, clen, ubuf, ulen);
+
+ /* Decode the sequence of items. */
+ while (ctx.out_next != ctx.out_end) {
+ LZMS_DEBUG("Position %u", ctx.out_next - ctx.out_begin);
+ if (lzms_decode_item(&ctx))
+ return -1;
+ }
+ return 0;
+}
+
+static s32
+lzms_try_x86_translation(u8 *ubuf, s32 i, s32 num_op_bytes,
+ s32 *closest_target_usage_p, s32 last_target_usages[],
+ s32 max_trans_offset)
+{
+ u16 pos;
+
+ if (i - *closest_target_usage_p <= max_trans_offset) {
+ LZMS_DEBUG("Performed x86 translation at position %d "
+ "(opcode 0x%02x)", i, ubuf[i]);
+ le32 *p32 = (le32*)&ubuf[i + num_op_bytes];
+ u32 n = le32_to_cpu(*p32);
+ *p32 = cpu_to_le32(n - i);
+ }
+
+ pos = i + le16_to_cpu(*(const le16*)&ubuf[i + num_op_bytes]);
+
+ i += num_op_bytes + sizeof(le32) - 1;
+
+ if (i - last_target_usages[pos] <= LZMS_X86_MAX_GOOD_TARGET_OFFSET)
+ *closest_target_usage_p = i;
+
+ last_target_usages[pos] = i;
+
+ return i + 1;
+}
+
+static s32
+lzms_process_x86_translation(u8 *ubuf, s32 i, s32 *closest_target_usage_p,
+ s32 last_target_usages[])
+{
+ /* Switch on first byte of the opcode, assuming it is really an x86
+ * instruction. */
+ switch (ubuf[i]) {
+ case 0x48:
+ if (ubuf[i + 1] == 0x8b) {
+ if (ubuf[i + 2] == 0x5 || ubuf[i + 2] == 0xd) {
+ /* Load relative (x86_64) */
+ return lzms_try_x86_translation(ubuf, i, 3,
+ closest_target_usage_p,
+ last_target_usages,
+ LZMS_X86_MAX_TRANSLATION_OFFSET);
+ }
+ } else if (ubuf[i + 1] == 0x8d) {
+ if ((ubuf[i + 2] & 0x7) == 0x5) {
+ /* Load effective address relative (x86_64) */
+ return lzms_try_x86_translation(ubuf, i, 3,
+ closest_target_usage_p,
+ last_target_usages,
+ LZMS_X86_MAX_TRANSLATION_OFFSET);
+ }
+ }
+ break;
+
+ case 0x4c:
+ if (ubuf[i + 1] == 0x8d) {
+ if ((ubuf[i + 2] & 0x7) == 0x5) {
+ /* Load effective address relative (x86_64) */
+ return lzms_try_x86_translation(ubuf, i, 3,
+ closest_target_usage_p,
+ last_target_usages,
+ LZMS_X86_MAX_TRANSLATION_OFFSET);
+ }
+ }
+ break;
+
+ case 0xe8:
+ /* Call relative */
+ return lzms_try_x86_translation(ubuf, i, 1, closest_target_usage_p,
+ last_target_usages,
+ LZMS_X86_MAX_TRANSLATION_OFFSET / 2);
+
+ case 0xe9:
+ /* Jump relative */
+ return i + 5;
+
+ case 0xf0:
+ if (ubuf[i + 1] == 0x83 && ubuf[i + 2] == 0x05) {
+ /* Lock add relative */
+ return lzms_try_x86_translation(ubuf, i, 3,
+ closest_target_usage_p,
+ last_target_usages,
+ LZMS_X86_MAX_TRANSLATION_OFFSET);
+ }
+ break;
+
+ case 0xff:
+ if (ubuf[i + 1] == 0x15) {
+ /* Call indirect */
+ return lzms_try_x86_translation(ubuf, i, 2,
+ closest_target_usage_p,
+ last_target_usages,
+ LZMS_X86_MAX_TRANSLATION_OFFSET);
+ }
+ break;
+ }
+ return i + 1;
+}
+
+/* Postprocess the uncompressed data by undoing the translation of relative
+ * addresses embedded in x86 instructions into absolute addresses.
+ *
+ * There does not appear to be any way to check to see if this postprocessing
+ * actually needs to be done (or to plug in alternate filters, like in LZMA),
+ * and the corresponding preprocessing seems to be done unconditionally. */
+static void
+lzms_postprocess_data(u8 *ubuf, s32 ulen)
+{
+ /* Offset (from beginning of buffer) of the most recent reference to a
+ * seemingly valid target address. */
+ s32 closest_target_usage = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1;
+
+ /* Offset (from beginning of buffer) of the most recently used target
+ * address beginning with two bytes equal to the array index.
+ *
+ * XXX: This array could be allocated on the heap. */
+ s32 last_target_usages[65536];
+ for (s32 i = 0; i < 65536; i++)
+ last_target_usages[i] = -LZMS_X86_MAX_GOOD_TARGET_OFFSET - 1;
+
+ /* Check each byte in the buffer for an x86 opcode for which a
+ * translation may be possible. No translations are done on any
+ * instructions starting in the last 11 bytes of the buffer. */
+ for (s32 i = 0; i < ulen - 11; )
+ i = lzms_process_x86_translation(ubuf, i, &closest_target_usage,
+ last_target_usages);
+}
+
+/* API function documented in wimlib.h */
+WIMLIBAPI int
+wimlib_lzms_decompress(const void *cdata, unsigned clen,
+ void *ubuf, unsigned ulen)
+{
+ /* The range decoder requires that a minimum of 4 bytes of compressed
+ * data be initially available. */
+ if (clen < 4) {
+ LZMS_DEBUG("Compressed length too small (got %u, expected >= 4)",
+ clen);
+ return -1;
+ }
+
+ /* A LZMS-compressed data block should be evenly divisible into 16-bit
+ * integers. */
+ if (clen % 2 != 0) {
+ LZMS_DEBUG("Compressed length not divisible by 2 (got %u)", clen);
+ return -1;
+ }
+
+ /* Handle the trivial case where nothing needs to be decompressed.
+ * (Necessary because a window of size 0 does not have a valid position
+ * slot.) */
+ if (ulen == 0)
+ return 0;
+
+ /* The x86 post-processor requires that the uncompressed length fit into
+ * a signed 32-bit integer. Also, the position slot table cannot be
+ * searched for a position of INT32_MAX or greater. */
+ if (ulen >= INT32_MAX) {
+ LZMS_DEBUG("Uncompressed length too large "
+ "(got %u, expected < INT32_MAX)", ulen);
+ return -1;
+ }
+
+ /* Decode the literals and matches. */
+ if (lzms_decode_items(cdata, clen, ubuf, ulen))
+ return -1;
+
+ /* Postprocess the data. */
+ lzms_postprocess_data(ubuf, ulen);
+
+ LZMS_DEBUG("Decompression successful.");
+ return 0;