X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzms_decompress.c;h=4791054a7f468202d21e97d60fe830cc0667ee03;hp=47cd656194b7dc3b3e1a853d9f112b12b9a485f0;hb=908381d2809a48acd9490ec080e51087ae1529fd;hpb=ff5bc819b64a59560c883bc3354fe4e4b2d7798c diff --git a/src/lzms_decompress.c b/src/lzms_decompress.c index 47cd6561..4791054a 100644 --- a/src/lzms_decompress.c +++ b/src/lzms_decompress.c @@ -5,7 +5,7 @@ */ /* - * Copyright (C) 2013, 2014, 2015 Eric Biggers + * Copyright (C) 2013-2016 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -257,10 +257,10 @@ /* The TABLEBITS values can be changed; they only affect decoding speed. */ #define LZMS_LITERAL_TABLEBITS 10 -#define LZMS_LENGTH_TABLEBITS 10 -#define LZMS_LZ_OFFSET_TABLEBITS 10 -#define LZMS_DELTA_OFFSET_TABLEBITS 10 -#define LZMS_DELTA_POWER_TABLEBITS 8 +#define LZMS_LENGTH_TABLEBITS 9 +#define LZMS_LZ_OFFSET_TABLEBITS 11 +#define LZMS_DELTA_OFFSET_TABLEBITS 11 +#define LZMS_DELTA_POWER_TABLEBITS 7 struct lzms_range_decoder { @@ -276,10 +276,10 @@ struct lzms_range_decoder { /* Pointer to the next little-endian 16-bit integer in the compressed * input data (reading forwards). */ - const le16 *next; + const u8 *next; /* Pointer to the end of the compressed input data. */ - const le16 *end; + const u8 *end; }; typedef u64 bitbuf_t; @@ -295,10 +295,10 @@ struct lzms_input_bitstream { /* Pointer to the one past the next little-endian 16-bit integer in the * compressed input data (reading backwards). */ - const le16 *next; + const u8 *next; /* Pointer to the beginning of the compressed input data. */ - const le16 *begin; + const u8 *begin; }; #define BITBUF_NBITS (8 * sizeof(bitbuf_t)) @@ -321,64 +321,30 @@ struct lzms_decompressor { union { struct { - struct lzms_range_decoder rd; - struct lzms_input_bitstream is; - - /* LRU queues for match sources */ - u32 recent_lz_offsets[LZMS_NUM_LZ_REPS + 1]; - u64 recent_delta_pairs[LZMS_NUM_DELTA_REPS + 1]; - u32 pending_lz_offset; - u64 pending_delta_pair; - const u8 *lz_offset_still_pending; - const u8 *delta_pair_still_pending; - - /* States and probability entries for item type disambiguation */ + struct lzms_probabilites probs; - u32 main_state; - u32 match_state; - u32 lz_state; - u32 delta_state; - u32 lz_rep_states[LZMS_NUM_LZ_REP_DECISIONS]; - u32 delta_rep_states[LZMS_NUM_DELTA_REP_DECISIONS]; - - struct lzms_probability_entry main_probs[LZMS_NUM_MAIN_PROBS]; - struct lzms_probability_entry match_probs[LZMS_NUM_MATCH_PROBS]; - struct lzms_probability_entry lz_probs[LZMS_NUM_LZ_PROBS]; - struct lzms_probability_entry delta_probs[LZMS_NUM_DELTA_PROBS]; - struct lzms_probability_entry lz_rep_probs[LZMS_NUM_LZ_REP_DECISIONS] - [LZMS_NUM_LZ_REP_PROBS]; - struct lzms_probability_entry delta_rep_probs[LZMS_NUM_DELTA_REP_DECISIONS] - [LZMS_NUM_DELTA_REP_PROBS]; - - /* Huffman decoding */ - - u16 literal_decode_table[(1 << LZMS_LITERAL_TABLEBITS) + - (2 * LZMS_NUM_LITERAL_SYMS)] - _aligned_attribute(DECODE_TABLE_ALIGNMENT); + DECODE_TABLE(literal_decode_table, LZMS_NUM_LITERAL_SYMS, + LZMS_LITERAL_TABLEBITS, LZMS_MAX_CODEWORD_LENGTH); u32 literal_freqs[LZMS_NUM_LITERAL_SYMS]; struct lzms_huffman_rebuild_info literal_rebuild_info; - u16 lz_offset_decode_table[(1 << LZMS_LZ_OFFSET_TABLEBITS) + - ( 2 * LZMS_MAX_NUM_OFFSET_SYMS)] - _aligned_attribute(DECODE_TABLE_ALIGNMENT); + DECODE_TABLE(lz_offset_decode_table, LZMS_MAX_NUM_OFFSET_SYMS, + LZMS_LZ_OFFSET_TABLEBITS, LZMS_MAX_CODEWORD_LENGTH); u32 lz_offset_freqs[LZMS_MAX_NUM_OFFSET_SYMS]; struct lzms_huffman_rebuild_info lz_offset_rebuild_info; - u16 length_decode_table[(1 << LZMS_LENGTH_TABLEBITS) + - (2 * LZMS_NUM_LENGTH_SYMS)] - _aligned_attribute(DECODE_TABLE_ALIGNMENT); + DECODE_TABLE(length_decode_table, LZMS_NUM_LENGTH_SYMS, + LZMS_LENGTH_TABLEBITS, LZMS_MAX_CODEWORD_LENGTH); u32 length_freqs[LZMS_NUM_LENGTH_SYMS]; struct lzms_huffman_rebuild_info length_rebuild_info; - u16 delta_offset_decode_table[(1 << LZMS_DELTA_OFFSET_TABLEBITS) + - (2 * LZMS_MAX_NUM_OFFSET_SYMS)] - _aligned_attribute(DECODE_TABLE_ALIGNMENT); + DECODE_TABLE(delta_offset_decode_table, LZMS_MAX_NUM_OFFSET_SYMS, + LZMS_DELTA_OFFSET_TABLEBITS, LZMS_MAX_CODEWORD_LENGTH); u32 delta_offset_freqs[LZMS_MAX_NUM_OFFSET_SYMS]; struct lzms_huffman_rebuild_info delta_offset_rebuild_info; - u16 delta_power_decode_table[(1 << LZMS_DELTA_POWER_TABLEBITS) + - (2 * LZMS_NUM_DELTA_POWER_SYMS)] - _aligned_attribute(DECODE_TABLE_ALIGNMENT); + DECODE_TABLE(delta_power_decode_table, LZMS_NUM_DELTA_POWER_SYMS, + LZMS_DELTA_POWER_TABLEBITS, LZMS_MAX_CODEWORD_LENGTH); u32 delta_power_freqs[LZMS_NUM_DELTA_POWER_SYMS]; struct lzms_huffman_rebuild_info delta_power_rebuild_info; @@ -392,10 +358,10 @@ struct lzms_decompressor { }; /* Initialize the input bitstream @is to read backwards from the compressed data - * buffer @in that is @count 16-bit integers long. */ + * buffer @in that is @count bytes long. */ static void lzms_input_bitstream_init(struct lzms_input_bitstream *is, - const le16 *in, size_t count) + const u8 *in, size_t count) { is->bitbuf = 0; is->bitsleft = 0; @@ -416,18 +382,22 @@ lzms_ensure_bits(struct lzms_input_bitstream *is, unsigned num_bits) avail = BITBUF_NBITS - is->bitsleft; if (UNALIGNED_ACCESS_IS_FAST && CPU_IS_LITTLE_ENDIAN && - WORDSIZE == 8 && likely((u8 *)is->next - (u8 *)is->begin >= 8)) + WORDBYTES == 8 && likely(is->next - is->begin >= 8)) { - is->next -= avail >> 4; + is->next -= (avail & ~15) >> 3; is->bitbuf |= load_u64_unaligned(is->next) << (avail & 15); is->bitsleft += avail & ~15; } else { - if (likely(is->next != is->begin)) - is->bitbuf |= (bitbuf_t)le16_to_cpu(*--is->next) + if (likely(is->next != is->begin)) { + is->next -= sizeof(le16); + is->bitbuf |= (bitbuf_t)get_unaligned_le16(is->next) << (avail - 16); - if (likely(is->next != is->begin)) - is->bitbuf |=(bitbuf_t)le16_to_cpu(*--is->next) + } + if (likely(is->next != is->begin)) { + is->next -= sizeof(le16); + is->bitbuf |= (bitbuf_t)get_unaligned_le16(is->next) << (avail - 32); + } is->bitsleft += 32; } } @@ -465,14 +435,15 @@ lzms_read_bits(struct lzms_input_bitstream *is, unsigned num_bits) } /* Initialize the range decoder @rd to read forwards from the compressed data - * buffer @in that is @count 16-bit integers long. */ + * buffer @in that is @count bytes long. */ static void lzms_range_decoder_init(struct lzms_range_decoder *rd, - const le16 *in, size_t count) + const u8 *in, size_t count) { rd->range = 0xffffffff; - rd->code = ((u32)le16_to_cpu(in[0]) << 16) | le16_to_cpu(in[1]); - rd->next = in + 2; + rd->code = ((u32)get_unaligned_le16(in) << 16) | + get_unaligned_le16(in + 2); + rd->next = in + 4; rd->end = in + count; } @@ -492,16 +463,22 @@ lzms_decode_bit(struct lzms_range_decoder *rd, u32 *state_p, u32 num_states, /* Load the probability entry corresponding to the current state. */ prob_entry = &probs[*state_p]; + /* Update the state early. We'll still need to OR the state with 1 + * later if the decoded bit is a 1. */ + *state_p = (*state_p << 1) & (num_states - 1); + /* Get the probability (out of LZMS_PROBABILITY_DENOMINATOR) that the * next bit is 0. */ prob = lzms_get_probability(prob_entry); /* Normalize if needed. */ - if (rd->range <= 0xffff) { + if (!(rd->range & 0xFFFF0000)) { rd->range <<= 16; rd->code <<= 16; - if (likely(rd->next != rd->end)) - rd->code |= le16_to_cpu(*rd->next++); + if (likely(rd->next != rd->end)) { + rd->code |= get_unaligned_le16(rd->next); + rd->next += sizeof(le16); + } } /* Based on the probability, calculate the bound between the 0-bit @@ -513,7 +490,6 @@ lzms_decode_bit(struct lzms_range_decoder *rd, u32 *state_p, u32 num_states, rd->range = bound; /* Update the state and probability entry based on the decoded bit. */ - *state_p = ((*state_p << 1) | 0) & (num_states - 1); lzms_update_probability_entry(prob_entry, 0); return 0; } else { @@ -522,54 +498,12 @@ lzms_decode_bit(struct lzms_range_decoder *rd, u32 *state_p, u32 num_states, rd->code -= bound; /* Update the state and probability entry based on the decoded bit. */ - *state_p = ((*state_p << 1) | 1) & (num_states - 1); lzms_update_probability_entry(prob_entry, 1); + *state_p |= 1; return 1; } } -static inline int -lzms_decode_main_bit(struct lzms_decompressor *d) -{ - return lzms_decode_bit(&d->rd, &d->main_state, - LZMS_NUM_MAIN_PROBS, d->main_probs); -} - -static inline int -lzms_decode_match_bit(struct lzms_decompressor *d) -{ - return lzms_decode_bit(&d->rd, &d->match_state, - LZMS_NUM_MATCH_PROBS, d->match_probs); -} - -static inline int -lzms_decode_lz_bit(struct lzms_decompressor *d) -{ - return lzms_decode_bit(&d->rd, &d->lz_state, - LZMS_NUM_LZ_PROBS, d->lz_probs); -} - -static inline int -lzms_decode_delta_bit(struct lzms_decompressor *d) -{ - return lzms_decode_bit(&d->rd, &d->delta_state, - LZMS_NUM_DELTA_PROBS, d->delta_probs); -} - -static inline int -lzms_decode_lz_rep_bit(struct lzms_decompressor *d, int idx) -{ - return lzms_decode_bit(&d->rd, &d->lz_rep_states[idx], - LZMS_NUM_LZ_REP_PROBS, d->lz_rep_probs[idx]); -} - -static inline int -lzms_decode_delta_rep_bit(struct lzms_decompressor *d, int idx) -{ - return lzms_decode_bit(&d->rd, &d->delta_rep_states[idx], - LZMS_NUM_DELTA_REP_PROBS, d->delta_rep_probs[idx]); -} - static void lzms_build_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info) { @@ -604,6 +538,50 @@ lzms_init_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info, lzms_build_huffman_code(rebuild_info); } +static void +lzms_init_huffman_codes(struct lzms_decompressor *d, unsigned num_offset_slots) +{ + lzms_init_huffman_code(&d->literal_rebuild_info, + LZMS_NUM_LITERAL_SYMS, + LZMS_LITERAL_CODE_REBUILD_FREQ, + d->codewords, + d->literal_freqs, + d->literal_decode_table, + LZMS_LITERAL_TABLEBITS); + + lzms_init_huffman_code(&d->lz_offset_rebuild_info, + num_offset_slots, + LZMS_LZ_OFFSET_CODE_REBUILD_FREQ, + d->codewords, + d->lz_offset_freqs, + d->lz_offset_decode_table, + LZMS_LZ_OFFSET_TABLEBITS); + + lzms_init_huffman_code(&d->length_rebuild_info, + LZMS_NUM_LENGTH_SYMS, + LZMS_LENGTH_CODE_REBUILD_FREQ, + d->codewords, + d->length_freqs, + d->length_decode_table, + LZMS_LENGTH_TABLEBITS); + + lzms_init_huffman_code(&d->delta_offset_rebuild_info, + num_offset_slots, + LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ, + d->codewords, + d->delta_offset_freqs, + d->delta_offset_decode_table, + LZMS_DELTA_OFFSET_TABLEBITS); + + lzms_init_huffman_code(&d->delta_power_rebuild_info, + LZMS_NUM_DELTA_POWER_SYMS, + LZMS_DELTA_POWER_CODE_REBUILD_FREQ, + d->codewords, + d->delta_power_freqs, + d->delta_power_decode_table, + LZMS_DELTA_POWER_TABLEBITS); +} + static noinline void lzms_rebuild_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info) { @@ -611,49 +589,43 @@ lzms_rebuild_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info) lzms_dilute_symbol_frequencies(rebuild_info->freqs, rebuild_info->num_syms); } +/* XXX: mostly copied from read_huffsym() in decompress_common.h because LZMS + * needs its own bitstream */ static inline unsigned lzms_decode_huffman_symbol(struct lzms_input_bitstream *is, u16 decode_table[], unsigned table_bits, u32 freqs[], struct lzms_huffman_rebuild_info *rebuild_info) { - unsigned key_bits; unsigned entry; - unsigned sym; + unsigned symbol; + unsigned length; lzms_ensure_bits(is, LZMS_MAX_CODEWORD_LENGTH); - /* Index the decode table by the next table_bits bits of the input. */ - key_bits = lzms_peek_bits(is, table_bits); - entry = decode_table[key_bits]; - if (likely(entry < 0xC000)) { - /* Fast case: The decode table directly provided the symbol and - * codeword length. The low 11 bits are the symbol, and the - * high 5 bits are the codeword length. */ - lzms_remove_bits(is, entry >> 11); - sym = entry & 0x7FF; - } else { - /* Slow case: The codeword for the symbol is longer than - * table_bits, so the symbol does not have an entry directly in - * the first (1 << table_bits) entries of the decode table. - * Traverse the appropriate binary tree bit-by-bit in order to - * decode the symbol. */ + entry = decode_table[lzms_peek_bits(is, table_bits)]; + symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT; + length = entry & DECODE_TABLE_LENGTH_MASK; + + if (entry >= (1U << (table_bits + DECODE_TABLE_SYMBOL_SHIFT))) { lzms_remove_bits(is, table_bits); - do { - key_bits = (entry & 0x3FFF) + lzms_pop_bits(is, 1); - } while ((entry = decode_table[key_bits]) >= 0xC000); - sym = entry; + entry = decode_table[symbol + lzms_peek_bits(is, length)]; + symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT; + length = entry & DECODE_TABLE_LENGTH_MASK; } - freqs[sym]++; + lzms_remove_bits(is, length); + + freqs[symbol]++; if (--rebuild_info->num_syms_until_rebuild == 0) lzms_rebuild_huffman_code(rebuild_info); - return sym; + return symbol; } static inline unsigned -lzms_decode_literal(struct lzms_decompressor *d) +lzms_decode_literal(struct lzms_decompressor *d, + struct lzms_input_bitstream *is) { - return lzms_decode_huffman_symbol(&d->is, + return lzms_decode_huffman_symbol(is, d->literal_decode_table, LZMS_LITERAL_TABLEBITS, d->literal_freqs, @@ -661,21 +633,23 @@ lzms_decode_literal(struct lzms_decompressor *d) } static inline u32 -lzms_decode_lz_offset(struct lzms_decompressor *d) +lzms_decode_lz_offset(struct lzms_decompressor *d, + struct lzms_input_bitstream *is) { - unsigned slot = lzms_decode_huffman_symbol(&d->is, + unsigned slot = lzms_decode_huffman_symbol(is, d->lz_offset_decode_table, LZMS_LZ_OFFSET_TABLEBITS, d->lz_offset_freqs, &d->lz_offset_rebuild_info); return lzms_offset_slot_base[slot] + - lzms_read_bits(&d->is, lzms_extra_offset_bits[slot]); + lzms_read_bits(is, lzms_extra_offset_bits[slot]); } static inline u32 -lzms_decode_length(struct lzms_decompressor *d) +lzms_decode_length(struct lzms_decompressor *d, + struct lzms_input_bitstream *is) { - unsigned slot = lzms_decode_huffman_symbol(&d->is, + unsigned slot = lzms_decode_huffman_symbol(is, d->length_decode_table, LZMS_LENGTH_TABLEBITS, d->length_freqs, @@ -684,108 +658,177 @@ lzms_decode_length(struct lzms_decompressor *d) unsigned num_extra_bits = lzms_extra_length_bits[slot]; /* Usually most lengths are short and have no extra bits. */ if (num_extra_bits) - length += lzms_read_bits(&d->is, num_extra_bits); + length += lzms_read_bits(is, num_extra_bits); return length; } static inline u32 -lzms_decode_delta_offset(struct lzms_decompressor *d) +lzms_decode_delta_offset(struct lzms_decompressor *d, + struct lzms_input_bitstream *is) { - unsigned slot = lzms_decode_huffman_symbol(&d->is, + unsigned slot = lzms_decode_huffman_symbol(is, d->delta_offset_decode_table, LZMS_DELTA_OFFSET_TABLEBITS, d->delta_offset_freqs, &d->delta_offset_rebuild_info); return lzms_offset_slot_base[slot] + - lzms_read_bits(&d->is, lzms_extra_offset_bits[slot]); + lzms_read_bits(is, lzms_extra_offset_bits[slot]); } static inline unsigned -lzms_decode_delta_power(struct lzms_decompressor *d) +lzms_decode_delta_power(struct lzms_decompressor *d, + struct lzms_input_bitstream *is) { - return lzms_decode_huffman_symbol(&d->is, + return lzms_decode_huffman_symbol(is, d->delta_power_decode_table, LZMS_DELTA_POWER_TABLEBITS, d->delta_power_freqs, &d->delta_power_rebuild_info); } -/* Decode the series of literals and matches from the LZMS-compressed data. - * Return 0 if successful or -1 if the compressed data is invalid. */ static int -lzms_decode_items(struct lzms_decompressor * const restrict d, - u8 * const restrict out, const size_t out_nbytes) +lzms_create_decompressor(size_t max_bufsize, void **d_ret) +{ + struct lzms_decompressor *d; + + if (max_bufsize > LZMS_MAX_BUFFER_SIZE) + return WIMLIB_ERR_INVALID_PARAM; + + d = ALIGNED_MALLOC(sizeof(struct lzms_decompressor), + DECODE_TABLE_ALIGNMENT); + if (!d) + return WIMLIB_ERR_NOMEM; + + *d_ret = d; + return 0; +} + +/* + * Decompress @in_nbytes bytes of LZMS-compressed data at @in and write the + * uncompressed data, which had original size @out_nbytes, to @out. Return 0 if + * successful or -1 if the compressed data is invalid. + */ +static int +lzms_decompress(const void * const restrict in, const size_t in_nbytes, + void * const restrict out, const size_t out_nbytes, + void * const restrict _d) { + struct lzms_decompressor *d = _d; u8 *out_next = out; u8 * const out_end = out + out_nbytes; + struct lzms_range_decoder rd; + struct lzms_input_bitstream is; - while (out_next != out_end) { + /* LRU queues for match sources */ + u32 recent_lz_offsets[LZMS_NUM_LZ_REPS + 1]; + u64 recent_delta_pairs[LZMS_NUM_DELTA_REPS + 1]; - if (!lzms_decode_main_bit(d)) { + /* Previous item type: 0 = literal, 1 = LZ match, 2 = delta match. + * This is used to handle delayed updates of the LRU queues. Instead of + * actually delaying the updates, we can check when decoding each rep + * match whether a delayed update needs to be taken into account, and if + * so get the match source from slot 'rep_idx + 1' instead of from slot + * 'rep_idx'. */ + unsigned prev_item_type = 0; - /* Literal */ - *out_next++ = lzms_decode_literal(d); + /* States and probability entries for item type disambiguation */ + u32 main_state = 0; + u32 match_state = 0; + u32 lz_state = 0; + u32 delta_state = 0; + u32 lz_rep_states[LZMS_NUM_LZ_REP_DECISIONS] = {}; + u32 delta_rep_states[LZMS_NUM_DELTA_REP_DECISIONS] = {}; + + /* + * Requirements on the compressed data: + * + * 1. LZMS-compressed data is a series of 16-bit integers, so the + * compressed data buffer cannot take up an odd number of bytes. + * 2. There must be at least 4 bytes of compressed data, since otherwise + * we cannot even initialize the range decoder. + */ + if ((in_nbytes & 1) || (in_nbytes < 4)) + return -1; + + lzms_range_decoder_init(&rd, in, in_nbytes); + + lzms_input_bitstream_init(&is, in, in_nbytes); - } else if (!lzms_decode_match_bit(d)) { + lzms_init_probabilities(&d->probs); + lzms_init_huffman_codes(d, lzms_get_num_offset_slots(out_nbytes)); + + for (int i = 0; i < LZMS_NUM_LZ_REPS + 1; i++) + recent_lz_offsets[i] = i + 1; + + for (int i = 0; i < LZMS_NUM_DELTA_REPS + 1; i++) + recent_delta_pairs[i] = i + 1; + + /* Main decode loop */ + while (out_next != out_end) { + + if (!lzms_decode_bit(&rd, &main_state, + LZMS_NUM_MAIN_PROBS, d->probs.main)) + { + /* Literal */ + *out_next++ = lzms_decode_literal(d, &is); + prev_item_type = 0; + + } else if (!lzms_decode_bit(&rd, &match_state, + LZMS_NUM_MATCH_PROBS, + d->probs.match)) + { /* LZ match */ u32 offset; u32 length; - if (!lzms_decode_lz_bit(d)) { + STATIC_ASSERT(LZMS_NUM_LZ_REPS == 3); + + if (!lzms_decode_bit(&rd, &lz_state, + LZMS_NUM_LZ_PROBS, d->probs.lz)) + { /* Explicit offset */ - offset = lzms_decode_lz_offset(d); + offset = lzms_decode_lz_offset(d, &is); + + recent_lz_offsets[3] = recent_lz_offsets[2]; + recent_lz_offsets[2] = recent_lz_offsets[1]; + recent_lz_offsets[1] = recent_lz_offsets[0]; } else { /* Repeat offset */ - if (d->pending_lz_offset != 0 && - out_next != d->lz_offset_still_pending) + if (!lzms_decode_bit(&rd, &lz_rep_states[0], + LZMS_NUM_LZ_REP_PROBS, + d->probs.lz_rep[0])) { - BUILD_BUG_ON(LZMS_NUM_LZ_REPS != 3); - d->recent_lz_offsets[3] = d->recent_lz_offsets[2]; - d->recent_lz_offsets[2] = d->recent_lz_offsets[1]; - d->recent_lz_offsets[1] = d->recent_lz_offsets[0]; - d->recent_lz_offsets[0] = d->pending_lz_offset; - d->pending_lz_offset = 0; - } - - BUILD_BUG_ON(LZMS_NUM_LZ_REPS != 3); - if (!lzms_decode_lz_rep_bit(d, 0)) { - offset = d->recent_lz_offsets[0]; - d->recent_lz_offsets[0] = d->recent_lz_offsets[1]; - d->recent_lz_offsets[1] = d->recent_lz_offsets[2]; - d->recent_lz_offsets[2] = d->recent_lz_offsets[3]; - } else if (!lzms_decode_lz_rep_bit(d, 1)) { - offset = d->recent_lz_offsets[1]; - d->recent_lz_offsets[1] = d->recent_lz_offsets[2]; - d->recent_lz_offsets[2] = d->recent_lz_offsets[3]; + offset = recent_lz_offsets[0 + (prev_item_type & 1)]; + recent_lz_offsets[0 + (prev_item_type & 1)] = recent_lz_offsets[0]; + } else if (!lzms_decode_bit(&rd, &lz_rep_states[1], + LZMS_NUM_LZ_REP_PROBS, + d->probs.lz_rep[1])) + { + offset = recent_lz_offsets[1 + (prev_item_type & 1)]; + recent_lz_offsets[1 + (prev_item_type & 1)] = recent_lz_offsets[1]; + recent_lz_offsets[1] = recent_lz_offsets[0]; } else { - offset = d->recent_lz_offsets[2]; - d->recent_lz_offsets[2] = d->recent_lz_offsets[3]; + offset = recent_lz_offsets[2 + (prev_item_type & 1)]; + recent_lz_offsets[2 + (prev_item_type & 1)] = recent_lz_offsets[2]; + recent_lz_offsets[2] = recent_lz_offsets[1]; + recent_lz_offsets[1] = recent_lz_offsets[0]; } } + recent_lz_offsets[0] = offset; + prev_item_type = 1; - if (d->pending_lz_offset != 0) { - BUILD_BUG_ON(LZMS_NUM_LZ_REPS != 3); - d->recent_lz_offsets[3] = d->recent_lz_offsets[2]; - d->recent_lz_offsets[2] = d->recent_lz_offsets[1]; - d->recent_lz_offsets[1] = d->recent_lz_offsets[0]; - d->recent_lz_offsets[0] = d->pending_lz_offset; - } - d->pending_lz_offset = offset; - - length = lzms_decode_length(d); + length = lzms_decode_length(d, &is); if (unlikely(length > out_end - out_next)) return -1; - if (unlikely(offset > out_next - out)) + if (unlikely(offset > out_next - (u8 *)out)) return -1; lz_copy(out_next, length, offset, out_end, LZMS_MIN_MATCH_LENGTH); out_next += length; - - d->lz_offset_still_pending = out_next; } else { /* Delta match */ @@ -797,54 +840,50 @@ lzms_decode_items(struct lzms_decompressor * const restrict d, u32 offset; const u8 *matchptr; u32 length; + u64 pair; + + STATIC_ASSERT(LZMS_NUM_DELTA_REPS == 3); - if (!lzms_decode_delta_bit(d)) { + if (!lzms_decode_bit(&rd, &delta_state, + LZMS_NUM_DELTA_PROBS, + d->probs.delta)) + { /* Explicit offset */ - power = lzms_decode_delta_power(d); - raw_offset = lzms_decode_delta_offset(d); - } else { - /* Repeat offset */ - u64 val; + power = lzms_decode_delta_power(d, &is); + raw_offset = lzms_decode_delta_offset(d, &is); - if (d->pending_delta_pair != 0 && - out_next != d->delta_pair_still_pending) + pair = ((u64)power << 32) | raw_offset; + recent_delta_pairs[3] = recent_delta_pairs[2]; + recent_delta_pairs[2] = recent_delta_pairs[1]; + recent_delta_pairs[1] = recent_delta_pairs[0]; + } else { + if (!lzms_decode_bit(&rd, &delta_rep_states[0], + LZMS_NUM_DELTA_REP_PROBS, + d->probs.delta_rep[0])) { - BUILD_BUG_ON(LZMS_NUM_DELTA_REPS != 3); - d->recent_delta_pairs[3] = d->recent_delta_pairs[2]; - d->recent_delta_pairs[2] = d->recent_delta_pairs[1]; - d->recent_delta_pairs[1] = d->recent_delta_pairs[0]; - d->recent_delta_pairs[0] = d->pending_delta_pair; - d->pending_delta_pair = 0; - } - - BUILD_BUG_ON(LZMS_NUM_DELTA_REPS != 3); - if (!lzms_decode_delta_rep_bit(d, 0)) { - val = d->recent_delta_pairs[0]; - d->recent_delta_pairs[0] = d->recent_delta_pairs[1]; - d->recent_delta_pairs[1] = d->recent_delta_pairs[2]; - d->recent_delta_pairs[2] = d->recent_delta_pairs[3]; - } else if (!lzms_decode_delta_rep_bit(d, 1)) { - val = d->recent_delta_pairs[1]; - d->recent_delta_pairs[1] = d->recent_delta_pairs[2]; - d->recent_delta_pairs[2] = d->recent_delta_pairs[3]; + pair = recent_delta_pairs[0 + (prev_item_type >> 1)]; + recent_delta_pairs[0 + (prev_item_type >> 1)] = recent_delta_pairs[0]; + } else if (!lzms_decode_bit(&rd, &delta_rep_states[1], + LZMS_NUM_DELTA_REP_PROBS, + d->probs.delta_rep[1])) + { + pair = recent_delta_pairs[1 + (prev_item_type >> 1)]; + recent_delta_pairs[1 + (prev_item_type >> 1)] = recent_delta_pairs[1]; + recent_delta_pairs[1] = recent_delta_pairs[0]; } else { - val = d->recent_delta_pairs[2]; - d->recent_delta_pairs[2] = d->recent_delta_pairs[3]; + pair = recent_delta_pairs[2 + (prev_item_type >> 1)]; + recent_delta_pairs[2 + (prev_item_type >> 1)] = recent_delta_pairs[2]; + recent_delta_pairs[2] = recent_delta_pairs[1]; + recent_delta_pairs[1] = recent_delta_pairs[0]; } - power = val >> 32; - raw_offset = (u32)val; - } - if (d->pending_delta_pair != 0) { - BUILD_BUG_ON(LZMS_NUM_DELTA_REPS != 3); - d->recent_delta_pairs[3] = d->recent_delta_pairs[2]; - d->recent_delta_pairs[2] = d->recent_delta_pairs[1]; - d->recent_delta_pairs[1] = d->recent_delta_pairs[0]; - d->recent_delta_pairs[0] = d->pending_delta_pair; + power = pair >> 32; + raw_offset = (u32)pair; } - d->pending_delta_pair = raw_offset | ((u64)power << 32); + recent_delta_pairs[0] = pair; + prev_item_type = 2; - length = lzms_decode_length(d); + length = lzms_decode_length(d, &is); span = (u32)1 << power; offset = raw_offset << power; @@ -858,7 +897,7 @@ lzms_decode_items(struct lzms_decompressor * const restrict d, return -1; /* buffer underrun? */ - if (unlikely(offset + span > out_next - out)) + if (unlikely(offset + span > out_next - (u8 *)out)) return -1; /* buffer overrun? */ @@ -872,144 +911,8 @@ lzms_decode_items(struct lzms_decompressor * const restrict d, out_next++; matchptr++; } while (--length); - - d->delta_pair_still_pending = out_next; } } - return 0; -} - -static void -lzms_init_decompressor(struct lzms_decompressor *d, const void *in, - size_t in_nbytes, unsigned num_offset_slots) -{ - /* Match offset LRU queues */ - for (int i = 0; i < LZMS_NUM_LZ_REPS + 1; i++) - d->recent_lz_offsets[i] = i + 1; - for (int i = 0; i < LZMS_NUM_DELTA_REPS + 1; i++) - d->recent_delta_pairs[i] = i + 1; - d->pending_lz_offset = 0; - d->pending_delta_pair = 0; - - /* Range decoding */ - - lzms_range_decoder_init(&d->rd, in, in_nbytes / sizeof(le16)); - - d->main_state = 0; - lzms_init_probability_entries(d->main_probs, LZMS_NUM_MAIN_PROBS); - - d->match_state = 0; - lzms_init_probability_entries(d->match_probs, LZMS_NUM_MATCH_PROBS); - - d->lz_state = 0; - lzms_init_probability_entries(d->lz_probs, LZMS_NUM_LZ_PROBS); - - for (int i = 0; i < LZMS_NUM_LZ_REP_DECISIONS; i++) { - d->lz_rep_states[i] = 0; - lzms_init_probability_entries(d->lz_rep_probs[i], - LZMS_NUM_LZ_REP_PROBS); - } - - d->delta_state = 0; - lzms_init_probability_entries(d->delta_probs, LZMS_NUM_DELTA_PROBS); - - for (int i = 0; i < LZMS_NUM_DELTA_REP_DECISIONS; i++) { - d->delta_rep_states[i] = 0; - lzms_init_probability_entries(d->delta_rep_probs[i], - LZMS_NUM_DELTA_REP_PROBS); - } - - /* Huffman decoding */ - - lzms_input_bitstream_init(&d->is, in, in_nbytes / sizeof(le16)); - - lzms_init_huffman_code(&d->literal_rebuild_info, - LZMS_NUM_LITERAL_SYMS, - LZMS_LITERAL_CODE_REBUILD_FREQ, - d->codewords, - d->literal_freqs, - d->literal_decode_table, - LZMS_LITERAL_TABLEBITS); - - lzms_init_huffman_code(&d->lz_offset_rebuild_info, - num_offset_slots, - LZMS_LZ_OFFSET_CODE_REBUILD_FREQ, - d->codewords, - d->lz_offset_freqs, - d->lz_offset_decode_table, - LZMS_LZ_OFFSET_TABLEBITS); - - lzms_init_huffman_code(&d->length_rebuild_info, - LZMS_NUM_LENGTH_SYMS, - LZMS_LENGTH_CODE_REBUILD_FREQ, - d->codewords, - d->length_freqs, - d->length_decode_table, - LZMS_LENGTH_TABLEBITS); - - lzms_init_huffman_code(&d->delta_offset_rebuild_info, - num_offset_slots, - LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ, - d->codewords, - d->delta_offset_freqs, - d->delta_offset_decode_table, - LZMS_DELTA_OFFSET_TABLEBITS); - - lzms_init_huffman_code(&d->delta_power_rebuild_info, - LZMS_NUM_DELTA_POWER_SYMS, - LZMS_DELTA_POWER_CODE_REBUILD_FREQ, - d->codewords, - d->delta_power_freqs, - d->delta_power_decode_table, - LZMS_DELTA_POWER_TABLEBITS); -} - -static int -lzms_create_decompressor(size_t max_bufsize, void **d_ret) -{ - struct lzms_decompressor *d; - - if (max_bufsize > LZMS_MAX_BUFFER_SIZE) - return WIMLIB_ERR_INVALID_PARAM; - - d = ALIGNED_MALLOC(sizeof(struct lzms_decompressor), - DECODE_TABLE_ALIGNMENT); - if (!d) - return WIMLIB_ERR_NOMEM; - - *d_ret = d; - return 0; -} - -/* - * Decompress @in_nbytes bytes of LZMS-compressed data at @in and write the - * uncompressed data, which had original size @out_nbytes, to @out. Return 0 if - * successful or -1 if the compressed data is invalid. - */ -static int -lzms_decompress(const void *in, size_t in_nbytes, void *out, size_t out_nbytes, - void *_d) -{ - struct lzms_decompressor *d = _d; - - /* - * Requirements on the compressed data: - * - * 1. LZMS-compressed data is a series of 16-bit integers, so the - * compressed data buffer cannot take up an odd number of bytes. - * 2. To prevent poor performance on some architectures, we require that - * the compressed data buffer is 2-byte aligned. - * 3. There must be at least 4 bytes of compressed data, since otherwise - * we cannot even initialize the range decoder. - */ - if ((in_nbytes & 1) || ((uintptr_t)in & 1) || (in_nbytes < 4)) - return -1; - - lzms_init_decompressor(d, in, in_nbytes, - lzms_get_num_offset_slots(out_nbytes)); - - if (lzms_decode_items(d, out, out_nbytes)) - return -1; lzms_x86_filter(out, out_nbytes, d->last_target_usages, true); return 0;