]> wimlib.net Git - wimlib/blobdiff - src/xpress-compress.c
xpress-compress.c: Rename xpress_match => xpress_item
[wimlib] / src / xpress-compress.c
index 4b2469498871b682b9357112249363a5df06fd04..46f683f1895c0517ad953c340b918b79b6c1b1a3 100644 (file)
@@ -7,7 +7,7 @@
  */
 
 /*
- * Copyright (C) 2012, 2013 Eric Biggers
+ * Copyright (C) 2012, 2013, 2014 Eric Biggers
  *
  * This file is part of wimlib, a library for working with WIM files.
  *
 
 #include "wimlib.h"
 #include "wimlib/assert.h"
-#include "wimlib/compress.h"
+#include "wimlib/compressor_ops.h"
+#include "wimlib/compress_common.h"
 #include "wimlib/error.h"
+#include "wimlib/lz_hash.h"
 #include "wimlib/util.h"
 #include "wimlib/xpress.h"
 
-#ifdef HAVE_ALLOCA_H
-#  include <alloca.h>
-#endif
-
 #include <string.h>
 
+struct xpress_record_ctx {
+       u32 freqs[XPRESS_NUM_SYMBOLS];
+       struct xpress_item *chosen_items;
+};
+
+struct xpress_compressor {
+       u8 *window;
+       u32 max_window_size;
+       struct xpress_item *chosen_items;
+       u32 *prev_tab;
+       u32 codewords[XPRESS_NUM_SYMBOLS];
+       u8 lens[XPRESS_NUM_SYMBOLS];
+       struct xpress_record_ctx record_ctx;
+};
+
 /* Intermediate XPRESS match/literal representation.  */
-struct xpress_match {
+struct xpress_item {
        u16 adjusted_len;  /* Match length minus XPRESS_MIN_MATCH_LEN */
        u16 offset;        /* Match offset */
        /* For literals, offset == 0 and adjusted_len is the literal.  */
@@ -56,9 +69,9 @@ struct xpress_match {
  * @codewords and @lens provide the Huffman code that is being used.
  */
 static void
-xpress_write_match(struct xpress_match match,
+xpress_write_match(struct xpress_item match,
                   struct output_bitstream *restrict ostream,
-                  const u16 codewords[restrict],
+                  const u32 codewords[restrict],
                   const u8 lens[restrict])
 {
        u8 len_hdr = min(match.adjusted_len, 0xf);
@@ -79,36 +92,32 @@ xpress_write_match(struct xpress_match match,
 }
 
 static void
-xpress_write_matches_and_literals(struct output_bitstream *ostream,
-                                 const struct xpress_match matches[restrict],
-                                 input_idx_t num_matches,
-                                 const u16 codewords[restrict],
-                                 const u8 lens[restrict])
+xpress_write_items(struct output_bitstream *ostream,
+                  const struct xpress_item items[restrict],
+                  u32 num_items,
+                  const u32 codewords[restrict],
+                  const u8 lens[restrict])
 {
-       for (input_idx_t i = 0; i < num_matches; i++) {
-               if (matches[i].offset) {
-                       /* Real match  */
-                       xpress_write_match(matches[i], ostream, codewords, lens);
+       for (u32 i = 0; i < num_items; i++) {
+               if (items[i].offset) {
+                       /* Match  */
+                       xpress_write_match(items[i], ostream, codewords, lens);
                } else {
-                       /* Literal byte  */
-                       u8 lit = matches[i].adjusted_len;
+                       /* Literal  */
+                       u8 lit = items[i].adjusted_len;
                        bitstream_put_bits(ostream, codewords[lit], lens[lit]);
                }
        }
        bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]);
 }
 
-struct xpress_record_ctx {
-       freq_t freqs[XPRESS_NUM_SYMBOLS];
-       struct xpress_match *matches;
-};
-
 static void
 xpress_record_literal(u8 lit, void *_ctx)
 {
        struct xpress_record_ctx *ctx = _ctx;
        ctx->freqs[lit]++;
-       *(ctx->matches++) = (struct xpress_match) { .offset = 0, .adjusted_len = lit };
+       *(ctx->chosen_items++) =
+               (struct xpress_item) { .offset = 0, .adjusted_len = lit };
 }
 
 static void
@@ -129,8 +138,9 @@ xpress_record_match(unsigned len, unsigned offset, void *_ctx)
        XPRESS_ASSERT(sym < XPRESS_NUM_SYMBOLS);
 
        ctx->freqs[sym]++;
-       *(ctx->matches++) = (struct xpress_match) { .offset = offset,
-                                                   .adjusted_len = adjusted_len };
+       *(ctx->chosen_items++) =
+               (struct xpress_item) { .offset = offset,
+                                      .adjusted_len = adjusted_len };
 }
 
 static const struct lz_params xpress_lz_params = {
@@ -144,27 +154,16 @@ static const struct lz_params xpress_lz_params = {
        .too_far        = 4096,
 };
 
-/* API function documented in wimlib.h  */
-WIMLIBAPI unsigned
-wimlib_xpress_compress(const void * restrict uncompressed_data,
-                      unsigned uncompressed_len,
-                      void * restrict compressed_data)
+static size_t
+xpress_compress(const void *uncompressed_data, size_t uncompressed_size,
+               void *compressed_data, size_t compressed_size_avail, void *_c)
 {
+       struct xpress_compressor *c = _c;
        u8 *cptr = compressed_data;
        struct output_bitstream ostream;
-
-       struct xpress_record_ctx record_ctx;
-
-       struct xpress_match *matches;
-       input_idx_t *prev_tab;
-       u8 *udata;
-
-       u16 codewords[XPRESS_NUM_SYMBOLS];
-       u8 lens[XPRESS_NUM_SYMBOLS];
-       input_idx_t num_matches;
-       input_idx_t compressed_len;
-       input_idx_t i;
-       const size_t stack_max = 65536;
+       u32 num_chosen_items;
+       u32 i;
+       size_t compressed_size;
 
        /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
         * impossible to compress 256 bytes or less of data to less than the
@@ -175,94 +174,161 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
         *
         * +4 to take into account that init_output_bitstream() requires at
         * least 4 bytes of data.  */
-       if (uncompressed_len < XPRESS_NUM_SYMBOLS / 2 + 1 + 4)
+       if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 1 + 4)
                return 0;
 
-       if (uncompressed_len <= stack_max) {
-               matches = alloca(uncompressed_len * sizeof(matches[0]));
-               udata = alloca(uncompressed_len + 8);
-               prev_tab = alloca(uncompressed_len * sizeof(prev_tab[0]));
-       } else {
-               matches = MALLOC(uncompressed_len * sizeof(matches[0]));
-               udata = MALLOC(uncompressed_len + 8);
-               prev_tab = MALLOC(uncompressed_len * sizeof(prev_tab[0]));
-               if (matches == NULL || udata == NULL || prev_tab == NULL) {
-                       WARNING("Failed to allocate memory for compression...");
-                       compressed_len = 0;
-                       goto out_free;
-               }
-       }
-
        /* Copy the data to a temporary buffer, but only to avoid
         * inconsequential accesses of uninitialized memory in
         * lz_analyze_block().  */
-       memcpy(udata, uncompressed_data, uncompressed_len);
-       memset(udata + uncompressed_len, 0, 8);
+       memcpy(c->window, uncompressed_data, uncompressed_size);
+       memset(c->window + uncompressed_size, 0, 8);
 
        /* Determine match/literal sequence to divide the data into.  */
-       ZERO_ARRAY(record_ctx.freqs);
-       record_ctx.matches = matches;
-       lz_analyze_block(udata,
-                        uncompressed_len,
+       memset(c->record_ctx.freqs, 0, sizeof(c->record_ctx.freqs));
+       c->record_ctx.chosen_items = c->chosen_items;
+       lz_analyze_block(c->window,
+                        uncompressed_size,
                         xpress_record_match,
                         xpress_record_literal,
-                        &record_ctx,
+                        &c->record_ctx,
                         &xpress_lz_params,
-                        prev_tab);
+                        c->prev_tab);
 
-       num_matches = (record_ctx.matches - matches);
+       num_chosen_items = (c->record_ctx.chosen_items - c->chosen_items);
 
        /* Account for end of data symbol.  */
-       record_ctx.freqs[XPRESS_END_OF_DATA]++;
+       c->record_ctx.freqs[XPRESS_END_OF_DATA]++;
 
        /* Build the Huffman code.  */
        make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN,
-                                   record_ctx.freqs, lens, codewords);
+                                   c->record_ctx.freqs, c->lens, c->codewords);
 
        /* Output the Huffman code as a series of 512 4-bit lengths.  */
        for (i = 0; i < XPRESS_NUM_SYMBOLS; i += 2)
-               *cptr++ = (lens[i] & 0xf) | (lens[i + 1] << 4);
+               *cptr++ = (c->lens[i] & 0xf) | (c->lens[i + 1] << 4);
 
        /* Output the encoded matches/literals.  */
        init_output_bitstream(&ostream, cptr,
-                             uncompressed_len - XPRESS_NUM_SYMBOLS / 2 - 1);
-       xpress_write_matches_and_literals(&ostream, matches,
-                                         num_matches, codewords, lens);
+                             compressed_size_avail - XPRESS_NUM_SYMBOLS / 2 - 1);
+
+       xpress_write_items(&ostream, c->chosen_items, num_chosen_items,
+                          c->codewords, c->lens);
 
        /* Flush any pending data and get the length of the compressed data.  */
-       compressed_len = flush_output_bitstream(&ostream);
-       if (compressed_len == ~(input_idx_t)0) {
-               compressed_len = 0;
-               goto out_free;
-       }
-       compressed_len += XPRESS_NUM_SYMBOLS / 2;
+       compressed_size = flush_output_bitstream(&ostream);
+       if (compressed_size == (u32)~0UL)
+               return 0;
+
+       compressed_size += XPRESS_NUM_SYMBOLS / 2;
 
 #if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION)
        /* Verify that we really get the same thing back when decompressing.  */
-       if (wimlib_xpress_decompress(compressed_data, compressed_len,
-                                    udata, uncompressed_len))
        {
-               ERROR("Failed to decompress data we "
-                     "compressed using XPRESS algorithm");
-               wimlib_assert(0);
-               compressed_len = 0;
-               goto out_free;
-       }
-
-       if (memcmp(uncompressed_data, udata, uncompressed_len)) {
-               ERROR("Data we compressed using XPRESS algorithm "
-                     "didn't decompress to original");
-               wimlib_assert(0);
-               compressed_len = 0;
-               goto out_free;
+               struct wimlib_decompressor *decompressor;
+
+               if (0 == wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_XPRESS,
+                                                   c->max_window_size,
+                                                   NULL,
+                                                   &decompressor))
+               {
+                       int ret;
+                       ret = wimlib_decompress(compressed_data,
+                                               compressed_size,
+                                               c->window,
+                                               uncompressed_size,
+                                               decompressor);
+                       wimlib_free_decompressor(decompressor);
+
+                       if (ret) {
+                               ERROR("Failed to decompress data we "
+                                     "compressed using XPRESS algorithm");
+                               wimlib_assert(0);
+                               return 0;
+                       }
+                       if (memcmp(uncompressed_data, c->window,
+                                  uncompressed_size))
+                       {
+                               ERROR("Data we compressed using XPRESS algorithm "
+                                     "didn't decompress to original");
+                               wimlib_assert(0);
+                               return 0;
+                       }
+               } else {
+                       WARNING("Failed to create decompressor for "
+                               "data verification!");
+               }
        }
 #endif
 
-out_free:
-       if (uncompressed_len > stack_max) {
-               FREE(matches);
-               FREE(udata);
-               FREE(prev_tab);
+       return compressed_size;
+}
+
+static void
+xpress_free_compressor(void *_c)
+{
+       struct xpress_compressor *c = _c;
+
+       if (c) {
+               FREE(c->window);
+               FREE(c->chosen_items);
+               FREE(c->prev_tab);
+               FREE(c);
        }
-       return compressed_len;
 }
+
+static int
+xpress_create_compressor(size_t max_window_size,
+                        const struct wimlib_compressor_params_header *params,
+                        void **c_ret)
+{
+       struct xpress_compressor *c;
+
+       if (max_window_size == 0 || max_window_size > (1U << 26))
+               return WIMLIB_ERR_INVALID_PARAM;
+
+       c = CALLOC(1, sizeof(struct xpress_compressor));
+       if (c == NULL)
+               goto oom;
+
+       c->window = MALLOC(max_window_size + 8);
+       if (c->window == NULL)
+               goto oom;
+
+       c->max_window_size = max_window_size;
+
+       c->chosen_items = MALLOC(max_window_size * sizeof(c->chosen_items[0]));
+       if (c->chosen_items == NULL)
+               goto oom;
+
+       c->prev_tab = MALLOC(max_window_size * sizeof(c->prev_tab[0]));
+       if (c->prev_tab == NULL)
+               goto oom;
+
+       *c_ret = c;
+       return 0;
+
+oom:
+       xpress_free_compressor(c);
+       return WIMLIB_ERR_NOMEM;
+}
+
+static u64
+xpress_get_needed_memory(size_t max_window_size,
+                        const struct wimlib_compressor_params_header *params)
+{
+       u64 size = 0;
+
+       size += sizeof(struct xpress_compressor);
+       size += max_window_size + 8;
+       size += max_window_size * sizeof(((struct xpress_compressor*)0)->chosen_items[0]);
+       size += max_window_size * sizeof(((struct xpress_compressor*)0)->prev_tab[0]);
+
+       return size;
+}
+
+const struct compressor_ops xpress_compressor_ops = {
+       .get_needed_memory  = xpress_get_needed_memory,
+       .create_compressor  = xpress_create_compressor,
+       .compress           = xpress_compress,
+       .free_compressor    = xpress_free_compressor,
+};