X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fxpress-compress.c;h=46f683f1895c0517ad953c340b918b79b6c1b1a3;hp=518a0e3ba70b23fdd24a85df93a95c5da33f4f52;hb=386f6bd95d4579f4d54afdf40c602b85a6f19245;hpb=49a63aa13cdeb4c1348697ccd92207a1a65ec7b0 diff --git a/src/xpress-compress.c b/src/xpress-compress.c index 518a0e3b..46f683f1 100644 --- a/src/xpress-compress.c +++ b/src/xpress-compress.c @@ -7,7 +7,7 @@ */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2012, 2013, 2014 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -31,19 +31,32 @@ #include "wimlib.h" #include "wimlib/assert.h" -#include "wimlib/compress.h" +#include "wimlib/compressor_ops.h" +#include "wimlib/compress_common.h" #include "wimlib/error.h" +#include "wimlib/lz_hash.h" #include "wimlib/util.h" #include "wimlib/xpress.h" -#ifdef HAVE_ALLOCA_H -# include -#endif - #include +struct xpress_record_ctx { + u32 freqs[XPRESS_NUM_SYMBOLS]; + struct xpress_item *chosen_items; +}; + +struct xpress_compressor { + u8 *window; + u32 max_window_size; + struct xpress_item *chosen_items; + u32 *prev_tab; + u32 codewords[XPRESS_NUM_SYMBOLS]; + u8 lens[XPRESS_NUM_SYMBOLS]; + struct xpress_record_ctx record_ctx; +}; + /* Intermediate XPRESS match/literal representation. */ -struct xpress_match { +struct xpress_item { u16 adjusted_len; /* Match length minus XPRESS_MIN_MATCH_LEN */ u16 offset; /* Match offset */ /* For literals, offset == 0 and adjusted_len is the literal. */ @@ -56,9 +69,9 @@ struct xpress_match { * @codewords and @lens provide the Huffman code that is being used. */ static void -xpress_write_match(struct xpress_match match, +xpress_write_match(struct xpress_item match, struct output_bitstream *restrict ostream, - const u16 codewords[restrict], + const u32 codewords[restrict], const u8 lens[restrict]) { u8 len_hdr = min(match.adjusted_len, 0xf); @@ -79,36 +92,32 @@ xpress_write_match(struct xpress_match match, } static void -xpress_write_matches_and_literals(struct output_bitstream *ostream, - const struct xpress_match matches[restrict], - input_idx_t num_matches, - const u16 codewords[restrict], - const u8 lens[restrict]) +xpress_write_items(struct output_bitstream *ostream, + const struct xpress_item items[restrict], + u32 num_items, + const u32 codewords[restrict], + const u8 lens[restrict]) { - for (input_idx_t i = 0; i < num_matches; i++) { - if (matches[i].offset) { - /* Real match */ - xpress_write_match(matches[i], ostream, codewords, lens); + for (u32 i = 0; i < num_items; i++) { + if (items[i].offset) { + /* Match */ + xpress_write_match(items[i], ostream, codewords, lens); } else { - /* Literal byte */ - u8 lit = matches[i].adjusted_len; + /* Literal */ + u8 lit = items[i].adjusted_len; bitstream_put_bits(ostream, codewords[lit], lens[lit]); } } bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]); } -struct xpress_record_ctx { - freq_t freqs[XPRESS_NUM_SYMBOLS]; - struct xpress_match *matches; -}; - static void xpress_record_literal(u8 lit, void *_ctx) { struct xpress_record_ctx *ctx = _ctx; ctx->freqs[lit]++; - *(ctx->matches++) = (struct xpress_match) { .offset = 0, .adjusted_len = lit }; + *(ctx->chosen_items++) = + (struct xpress_item) { .offset = 0, .adjusted_len = lit }; } static void @@ -129,8 +138,9 @@ xpress_record_match(unsigned len, unsigned offset, void *_ctx) XPRESS_ASSERT(sym < XPRESS_NUM_SYMBOLS); ctx->freqs[sym]++; - *(ctx->matches++) = (struct xpress_match) { .offset = offset, - .adjusted_len = adjusted_len }; + *(ctx->chosen_items++) = + (struct xpress_item) { .offset = offset, + .adjusted_len = adjusted_len }; } static const struct lz_params xpress_lz_params = { @@ -144,27 +154,16 @@ static const struct lz_params xpress_lz_params = { .too_far = 4096, }; -/* API function documented in wimlib.h */ -WIMLIBAPI unsigned -wimlib_xpress_compress(const void * restrict uncompressed_data, - unsigned uncompressed_len, - void * restrict compressed_data) +static size_t +xpress_compress(const void *uncompressed_data, size_t uncompressed_size, + void *compressed_data, size_t compressed_size_avail, void *_c) { + struct xpress_compressor *c = _c; u8 *cptr = compressed_data; struct output_bitstream ostream; - - struct xpress_record_ctx record_ctx; - - struct xpress_match *matches; - input_idx_t *prev_tab; - u8 *udata; - - u16 codewords[XPRESS_NUM_SYMBOLS]; - u8 lens[XPRESS_NUM_SYMBOLS]; - input_idx_t num_matches; - input_idx_t compressed_len; - input_idx_t i; - const size_t stack_max = 65536; + u32 num_chosen_items; + u32 i; + size_t compressed_size; /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's * impossible to compress 256 bytes or less of data to less than the @@ -175,94 +174,161 @@ wimlib_xpress_compress(const void * restrict uncompressed_data, * * +4 to take into account that init_output_bitstream() requires at * least 4 bytes of data. */ - if (uncompressed_len < XPRESS_NUM_SYMBOLS / 2 + 1 + 4) + if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 1 + 4) return 0; - if (uncompressed_len <= stack_max) { - matches = alloca(uncompressed_len * sizeof(matches[0])); - udata = alloca(uncompressed_len + 8); - prev_tab = alloca(uncompressed_len * sizeof(prev_tab[0])); - } else { - matches = MALLOC(uncompressed_len * sizeof(matches[0])); - udata = MALLOC(uncompressed_len + 8); - prev_tab = MALLOC(uncompressed_len * sizeof(prev_tab[0])); - if (matches == NULL || udata == NULL || prev_tab == NULL) { - WARNING("Failed to allocate memory for compression..."); - compressed_len = 0; - goto out_free; - } - } - /* Copy the data to a temporary buffer, but only to avoid * inconsequential accesses of uninitialized memory in * lz_analyze_block(). */ - memcpy(udata, uncompressed_data, uncompressed_len); - memset(udata + uncompressed_len, 0, 8); + memcpy(c->window, uncompressed_data, uncompressed_size); + memset(c->window + uncompressed_size, 0, 8); /* Determine match/literal sequence to divide the data into. */ - ZERO_ARRAY(record_ctx.freqs); - record_ctx.matches = matches; - lz_analyze_block(udata, - uncompressed_len, + memset(c->record_ctx.freqs, 0, sizeof(c->record_ctx.freqs)); + c->record_ctx.chosen_items = c->chosen_items; + lz_analyze_block(c->window, + uncompressed_size, xpress_record_match, xpress_record_literal, - &record_ctx, + &c->record_ctx, &xpress_lz_params, - prev_tab); + c->prev_tab); - num_matches = (record_ctx.matches - matches); + num_chosen_items = (c->record_ctx.chosen_items - c->chosen_items); /* Account for end of data symbol. */ - record_ctx.freqs[XPRESS_END_OF_DATA]++; + c->record_ctx.freqs[XPRESS_END_OF_DATA]++; /* Build the Huffman code. */ make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN, - record_ctx.freqs, lens, codewords); + c->record_ctx.freqs, c->lens, c->codewords); /* Output the Huffman code as a series of 512 4-bit lengths. */ for (i = 0; i < XPRESS_NUM_SYMBOLS; i += 2) - *cptr++ = (lens[i] & 0xf) | (lens[i + 1] << 4); + *cptr++ = (c->lens[i] & 0xf) | (c->lens[i + 1] << 4); /* Output the encoded matches/literals. */ init_output_bitstream(&ostream, cptr, - uncompressed_len - XPRESS_NUM_SYMBOLS / 2 - 1); - xpress_write_matches_and_literals(&ostream, matches, - num_matches, codewords, lens); + compressed_size_avail - XPRESS_NUM_SYMBOLS / 2 - 1); + + xpress_write_items(&ostream, c->chosen_items, num_chosen_items, + c->codewords, c->lens); /* Flush any pending data and get the length of the compressed data. */ - compressed_len = flush_output_bitstream(&ostream); - if (compressed_len == ~(input_idx_t)0) { - compressed_len = 0; - goto out_free; - } - compressed_len += XPRESS_NUM_SYMBOLS / 2; + compressed_size = flush_output_bitstream(&ostream); + if (compressed_size == (u32)~0UL) + return 0; + + compressed_size += XPRESS_NUM_SYMBOLS / 2; -#if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION) || 1 +#if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION) /* Verify that we really get the same thing back when decompressing. */ - if (wimlib_xpress_decompress(compressed_data, compressed_len, - udata, uncompressed_len)) { - ERROR("Failed to decompress data we " - "compressed using XPRESS algorithm"); - wimlib_assert(0); - compressed_len = 0; - goto out_free; - } - - if (memcmp(uncompressed_data, udata, uncompressed_len)) { - ERROR("Data we compressed using XPRESS algorithm " - "didn't decompress to original"); - wimlib_assert(0); - compressed_len = 0; - goto out_free; + struct wimlib_decompressor *decompressor; + + if (0 == wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_XPRESS, + c->max_window_size, + NULL, + &decompressor)) + { + int ret; + ret = wimlib_decompress(compressed_data, + compressed_size, + c->window, + uncompressed_size, + decompressor); + wimlib_free_decompressor(decompressor); + + if (ret) { + ERROR("Failed to decompress data we " + "compressed using XPRESS algorithm"); + wimlib_assert(0); + return 0; + } + if (memcmp(uncompressed_data, c->window, + uncompressed_size)) + { + ERROR("Data we compressed using XPRESS algorithm " + "didn't decompress to original"); + wimlib_assert(0); + return 0; + } + } else { + WARNING("Failed to create decompressor for " + "data verification!"); + } } #endif -out_free: - if (uncompressed_len > stack_max) { - FREE(matches); - FREE(udata); - FREE(prev_tab); + return compressed_size; +} + +static void +xpress_free_compressor(void *_c) +{ + struct xpress_compressor *c = _c; + + if (c) { + FREE(c->window); + FREE(c->chosen_items); + FREE(c->prev_tab); + FREE(c); } - return compressed_len; } + +static int +xpress_create_compressor(size_t max_window_size, + const struct wimlib_compressor_params_header *params, + void **c_ret) +{ + struct xpress_compressor *c; + + if (max_window_size == 0 || max_window_size > (1U << 26)) + return WIMLIB_ERR_INVALID_PARAM; + + c = CALLOC(1, sizeof(struct xpress_compressor)); + if (c == NULL) + goto oom; + + c->window = MALLOC(max_window_size + 8); + if (c->window == NULL) + goto oom; + + c->max_window_size = max_window_size; + + c->chosen_items = MALLOC(max_window_size * sizeof(c->chosen_items[0])); + if (c->chosen_items == NULL) + goto oom; + + c->prev_tab = MALLOC(max_window_size * sizeof(c->prev_tab[0])); + if (c->prev_tab == NULL) + goto oom; + + *c_ret = c; + return 0; + +oom: + xpress_free_compressor(c); + return WIMLIB_ERR_NOMEM; +} + +static u64 +xpress_get_needed_memory(size_t max_window_size, + const struct wimlib_compressor_params_header *params) +{ + u64 size = 0; + + size += sizeof(struct xpress_compressor); + size += max_window_size + 8; + size += max_window_size * sizeof(((struct xpress_compressor*)0)->chosen_items[0]); + size += max_window_size * sizeof(((struct xpress_compressor*)0)->prev_tab[0]); + + return size; +} + +const struct compressor_ops xpress_compressor_ops = { + .get_needed_memory = xpress_get_needed_memory, + .create_compressor = xpress_create_compressor, + .compress = xpress_compress, + .free_compressor = xpress_free_compressor, +};