X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fxpress_decompress.c;h=d6e606cb962df887311488d554e8e0e0b1bdd1ad;hp=8a9f491c19be8b4fe644d5f4d9e59616759c020b;hb=HEAD;hpb=4458b62fdbcf9fcb94f19623907086fc995e721f diff --git a/src/xpress_decompress.c b/src/xpress_decompress.c index 8a9f491c..035bcfd0 100644 --- a/src/xpress_decompress.c +++ b/src/xpress_decompress.c @@ -6,7 +6,7 @@ /* * - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2012-2016 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -19,7 +19,7 @@ * details. * * You should have received a copy of the GNU Lesser General Public License - * along with this file; if not, see http://www.gnu.org/licenses/. + * along with this file; if not, see https://www.gnu.org/licenses/. */ @@ -58,9 +58,9 @@ * fail during decompression if the Huffman symbol 256 is not found after * the actual data." * - * This is the case for the implementation in WIMGAPI. However, wimlib's - * decompressor in this file currently does not care if this extra symbol is - * there or not. + * This is the case with Microsoft's implementation in WIMGAPI, for example. So + * although our implementation doesn't currently check for this extra symbol, + * compressors would be wise to add it. */ #ifdef HAVE_CONFIG_H @@ -70,110 +70,117 @@ #include "wimlib/decompressor_ops.h" #include "wimlib/decompress_common.h" #include "wimlib/error.h" +#include "wimlib/util.h" #include "wimlib/xpress_constants.h" /* This value is chosen for fast decompression. */ -#define XPRESS_TABLEBITS 12 +#define XPRESS_TABLEBITS 11 + +struct xpress_decompressor { + union { + DECODE_TABLE(decode_table, XPRESS_NUM_SYMBOLS, + XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); + u8 lens[XPRESS_NUM_SYMBOLS]; + }; + DECODE_TABLE_WORKING_SPACE(working_space, XPRESS_NUM_SYMBOLS, + XPRESS_MAX_CODEWORD_LEN); +} __attribute__((aligned(DECODE_TABLE_ALIGNMENT))); -/* Decode the matches and literal bytes in a region of XPRESS-encoded data. */ static int -xpress_decode_window(struct input_bitstream *istream, const u16 *decode_table, - u8 *window, unsigned window_size) +xpress_decompress(const void *restrict compressed_data, size_t compressed_size, + void *restrict uncompressed_data, size_t uncompressed_size, + void *restrict _d) { - u8 *window_ptr = window; - u8 *window_end = &window[window_size]; - unsigned sym; - unsigned match_len; - unsigned offset_high_bit; - unsigned match_offset; - - while (window_ptr != window_end) { + struct xpress_decompressor *d = _d; + const u8 * const in_begin = compressed_data; + u8 * const out_begin = uncompressed_data; + u8 *out_next = out_begin; + u8 * const out_end = out_begin + uncompressed_size; + struct input_bitstream is; + + /* Read the Huffman codeword lengths. */ + if (compressed_size < XPRESS_NUM_SYMBOLS / 2) + return -1; + for (int i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { + d->lens[2 * i + 0] = in_begin[i] & 0xf; + d->lens[2 * i + 1] = in_begin[i] >> 4; + } - sym = read_huffsym(istream, decode_table, - XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); - if (sym < XPRESS_NUM_CHARS) { - /* Literal */ - *window_ptr++ = sym; - continue; - } + /* Build a decoding table for the Huffman code. */ + if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS, + XPRESS_TABLEBITS, d->lens, + XPRESS_MAX_CODEWORD_LEN, + d->working_space)) + return -1; - /* Match */ - match_len = sym & 0xf; - offset_high_bit = (sym >> 4) & 0xf; + /* Decode the matches and literals. */ - bitstream_ensure_bits(istream, 16); + init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2, + compressed_size - XPRESS_NUM_SYMBOLS / 2); - match_offset = (1 << offset_high_bit) | - bitstream_pop_bits(istream, offset_high_bit); + while (out_next != out_end) { + unsigned sym; + unsigned log2_offset; + u32 length; + u32 offset; - if (match_len == 0xf) { - match_len += bitstream_read_byte(istream); - if (match_len == 0xf + 0xff) - match_len = bitstream_read_u16(istream); + sym = read_huffsym(&is, d->decode_table, + XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); + if (sym < XPRESS_NUM_CHARS) { + /* Literal */ + *out_next++ = sym; + } else { + /* Match */ + length = sym & 0xf; + log2_offset = (sym >> 4) & 0xf; + + bitstream_ensure_bits(&is, 16); + + offset = ((u32)1 << log2_offset) | + bitstream_pop_bits(&is, log2_offset); + + if (length == 0xf) { + length += bitstream_read_byte(&is); + if (length == 0xf + 0xff) + length = bitstream_read_u16(&is); + } + length += XPRESS_MIN_MATCH_LEN; + + if (unlikely(lz_copy(length, offset, + out_begin, out_next, out_end, + XPRESS_MIN_MATCH_LEN))) + return -1; + + out_next += length; } - match_len += XPRESS_MIN_MATCH_LEN; - - if (unlikely(match_offset > window_ptr - window)) - return -1; - - if (unlikely(match_len > window_end - window_ptr)) - return -1; - - lz_copy(window_ptr, match_len, match_offset, window_end, - XPRESS_MIN_MATCH_LEN); - - window_ptr += match_len; } return 0; } static int -xpress_decompress(const void *compressed_data, size_t compressed_size, - void *uncompressed_data, size_t uncompressed_size, void *_ctx) +xpress_create_decompressor(size_t max_block_size, void **d_ret) { - const u8 *cdata = compressed_data; - u8 *lens_p; - union { - u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS] - _aligned_attribute(DECODE_TABLE_ALIGNMENT); - u8 lens[XPRESS_NUM_SYMBOLS]; - } u; - struct input_bitstream istream; + struct xpress_decompressor *d; - /* XPRESS uses only one Huffman code. It contains 512 symbols, and the - * code lengths of these symbols are given literally as 4-bit integers - * in the first 256 bytes of the compressed data. */ - if (compressed_size < XPRESS_NUM_SYMBOLS / 2) - return -1; - - lens_p = u.lens; - for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { - *lens_p++ = cdata[i] & 0xf; - *lens_p++ = cdata[i] >> 4; - } - - if (make_huffman_decode_table(u.decode_table, XPRESS_NUM_SYMBOLS, - XPRESS_TABLEBITS, u.lens, - XPRESS_MAX_CODEWORD_LEN)) - return -1; + if (max_block_size > XPRESS_MAX_OFFSET + 1) + return WIMLIB_ERR_INVALID_PARAM; - init_input_bitstream(&istream, cdata + XPRESS_NUM_SYMBOLS / 2, - compressed_size - XPRESS_NUM_SYMBOLS / 2); + d = ALIGNED_MALLOC(sizeof(*d), DECODE_TABLE_ALIGNMENT); + if (!d) + return WIMLIB_ERR_NOMEM; - return xpress_decode_window(&istream, u.decode_table, - uncompressed_data, uncompressed_size); + *d_ret = d; + return 0; } -static int -xpress_create_decompressor(size_t max_block_size, void **dec_ret) +static void +xpress_free_decompressor(void *_d) { - if (max_block_size > XPRESS_MAX_OFFSET + 1) - return WIMLIB_ERR_INVALID_PARAM; - - return 0; + ALIGNED_FREE(_d); } const struct decompressor_ops xpress_decompressor_ops = { .create_decompressor = xpress_create_decompressor, .decompress = xpress_decompress, + .free_decompressor = xpress_free_decompressor, };