X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzx_compress.c;h=28a2656fb45852006f529d3df2f686c67fd6bbd2;hp=4434cde34a02bd13a8b1685765207c2d8b15bee1;hb=de567a8c5dcd0910a8c762d75bf11b9c9683396c;hpb=e7a3df0a6bf2af6500611f6c464dc36cab3332d8 diff --git a/src/lzx_compress.c b/src/lzx_compress.c index 4434cde3..28a2656f 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -82,7 +82,7 @@ * cache. However, fallback behavior (immediately terminating the block) on * cache overflow is still required. */ -#define LZX_CACHE_PER_POS 6 +#define LZX_CACHE_PER_POS 7 /* * LZX_CACHE_LENGTH is the number of lz_match structures in the match cache, @@ -105,7 +105,7 @@ /* * LZX_BIT_COST is a scaling factor that represents the cost to output one bit. - * THis makes it possible to consider fractional bit costs. + * This makes it possible to consider fractional bit costs. * * Note: this is only useful as a statistical trick for when the true costs are * unknown. In reality, each token in LZX requires a whole number of bits to @@ -120,14 +120,15 @@ #define LZX_CONSIDER_ALIGNED_COSTS 0 /* - * The maximum compression level at which we use the faster algorithm. + * LZX_MAX_FAST_LEVEL is the maximum compression level at which we use the + * faster algorithm. */ #define LZX_MAX_FAST_LEVEL 34 /* * LZX_HASH2_ORDER is the log base 2 of the number of entries in the hash table * for finding length 2 matches. This can be as high as 16 (in which case the - * hash function is trivial), but using a smaller hash table actually speeds up + * hash function is trivial), but using a smaller hash table speeds up * compression due to reduced cache pressure. */ #define LZX_HASH2_ORDER 12 @@ -145,7 +146,6 @@ #include "wimlib/bt_matchfinder.h" #include "wimlib/compress_common.h" #include "wimlib/compressor_ops.h" -#include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/hc_matchfinder.h" #include "wimlib/lz_extend.h" @@ -368,6 +368,10 @@ struct lzx_compressor { /* Pointer to the compress() implementation chosen at allocation time */ void (*impl)(struct lzx_compressor *, struct lzx_output_bitstream *); + /* If true, the compressor need not preserve the input buffer if it + * compresses the data successfully. */ + bool destructive; + /* The Huffman symbol frequency counters for the current block. */ struct lzx_freqs freqs; @@ -442,7 +446,7 @@ struct lzx_compressor { * contains the number of matches that were found at * that position; this is followed by the matches * themselves, if any, sorted by strictly increasing - * length and strictly increasing offset. + * length. * * Note: in rare cases, there will be a very high number * of matches in the block and this array will overflow. @@ -486,14 +490,15 @@ struct lzx_output_bitstream { u32 bitcount; /* Pointer to the start of the output buffer. */ - le16 *start; + u8 *start; /* Pointer to the position in the output buffer at which the next coding * unit should be written. */ - le16 *next; + u8 *next; - /* Pointer past the end of the output buffer. */ - le16 *end; + /* Pointer just past the end of the output buffer, rounded down to a + * 2-byte boundary. */ + u8 *end; }; /* @@ -513,7 +518,7 @@ lzx_init_output(struct lzx_output_bitstream *os, void *buffer, size_t size) os->bitcount = 0; os->start = buffer; os->next = os->start; - os->end = os->start + size / sizeof(le16); + os->end = os->start + (size & ~1); } /* @@ -523,9 +528,9 @@ lzx_init_output(struct lzx_output_bitstream *os, void *buffer, size_t size) * bits in @bits cannot be set. At most 17 bits can be written at once. * * @max_num_bits is a compile-time constant that specifies the maximum number of - * bits that can ever be written at the call site. Currently, it is used to - * optimize away the conditional code for writing a second 16-bit coding unit - * when writing fewer than 17 bits. + * bits that can ever be written at the call site. It is used to optimize away + * the conditional code for writing a second 16-bit coding unit when writing + * fewer than 17 bits. * * If the output buffer space is exhausted, then the bits will be ignored, and * lzx_flush_output() will return 0 when it gets called. @@ -553,16 +558,20 @@ lzx_write_varbits(struct lzx_output_bitstream *os, os->bitcount -= 16; /* Write a coding unit, unless it would overflow the buffer. */ - if (os->next != os->end) - put_unaligned_u16_le(os->bitbuf >> os->bitcount, os->next++); + if (os->next != os->end) { + put_unaligned_u16_le(os->bitbuf >> os->bitcount, os->next); + os->next += 2; + } /* If writing 17 bits, a second coding unit might need to be * written. But because 'max_num_bits' is a compile-time * constant, the compiler will optimize away this code at most * call sites. */ if (max_num_bits == 17 && os->bitcount == 16) { - if (os->next != os->end) - put_unaligned_u16_le(os->bitbuf, os->next++); + if (os->next != os->end) { + put_unaligned_u16_le(os->bitbuf, os->next); + os->next += 2; + } os->bitcount = 0; } } @@ -571,8 +580,7 @@ lzx_write_varbits(struct lzx_output_bitstream *os, /* Use when @num_bits is a compile-time constant. Otherwise use * lzx_write_varbits(). */ static inline void -lzx_write_bits(struct lzx_output_bitstream *os, - const u32 bits, const unsigned num_bits) +lzx_write_bits(struct lzx_output_bitstream *os, u32 bits, unsigned num_bits) { lzx_write_varbits(os, bits, num_bits, num_bits); } @@ -587,10 +595,12 @@ lzx_flush_output(struct lzx_output_bitstream *os) if (os->next == os->end) return 0; - if (os->bitcount != 0) - put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next++); + if (os->bitcount != 0) { + put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next); + os->next += 2; + } - return (const u8 *)os->next - (const u8 *)os->start; + return os->next - os->start; } /* Build the main, length, and aligned offset Huffman codes used in LZX. @@ -1204,7 +1214,7 @@ lzx_tally_item_list(struct lzx_compressor *c, struct lzx_optimum_node *cur_node) * Also, note that because of the presence of the recent offsets queue (which is * a type of adaptive state), the algorithm cannot work backwards and compute * "cost to end" instead of "cost to beginning". Furthermore, the way the - * algorithm handles this adaptive state in the "minimum-cost" parse is actually + * algorithm handles this adaptive state in the "minimum cost" parse is actually * only an approximation. It's possible for the globally optimal, minimum cost * path to contain a prefix, ending at a position, where that path prefix is * *not* the minimum cost path to that position. This can happen if such a path @@ -1425,7 +1435,7 @@ lzx_find_min_cost_path(struct lzx_compressor * const restrict c, } } while (cur_node != end_node); - /* Return the match offset queue at the end of the minimum-cost path. */ + /* Return the match offset queue at the end of the minimum cost path. */ return QUEUE(block_end); } @@ -1614,13 +1624,11 @@ lzx_compress_near_optimal(struct lzx_compressor *c, max_len = in_end - in_next; nice_len = min(max_len, nice_len); - /* This extra check is needed to ensure that - * reading the next 3 bytes when looking for a - * length 2 match is valid. In addition, we - * cannot allow ourselves to find a length 2 - * match of the very last two bytes with the - * very first two bytes, since such a match has - * an offset too large to be represented. */ + /* This extra check is needed to ensure that we + * never output a length 2 match of the very + * last two bytes with the very first two bytes, + * since such a match has an offset too large to + * be represented. */ if (unlikely(max_len < 3)) { in_next++; cache_ptr->length = 0; @@ -1638,8 +1646,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c, if (matchfinder_node_valid(cur_match) && (LZX_HASH2_ORDER == 16 || load_u16_unaligned(&in_begin[cur_match]) == - load_u16_unaligned(in_next)) && - in_begin[cur_match + 2] != in_next[2]) + load_u16_unaligned(in_next))) { lz_matchptr->length = 2; lz_matchptr->offset = in_next - &in_begin[cur_match]; @@ -2005,7 +2012,8 @@ lzx_get_compressor_size(size_t max_bufsize, unsigned compression_level) } static u64 -lzx_get_needed_memory(size_t max_bufsize, unsigned compression_level) +lzx_get_needed_memory(size_t max_bufsize, unsigned compression_level, + bool destructive) { u64 size = 0; @@ -2013,13 +2021,14 @@ lzx_get_needed_memory(size_t max_bufsize, unsigned compression_level) return 0; size += lzx_get_compressor_size(max_bufsize, compression_level); - size += max_bufsize; /* in_buffer */ + if (!destructive) + size += max_bufsize; /* in_buffer */ return size; } static int lzx_create_compressor(size_t max_bufsize, unsigned compression_level, - void **c_ret) + bool destructive, void **c_ret) { unsigned window_order; struct lzx_compressor *c; @@ -2034,12 +2043,16 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level, if (!c) goto oom0; + c->destructive = destructive; + c->num_main_syms = lzx_get_num_main_syms(window_order); c->window_order = window_order; - c->in_buffer = MALLOC(max_bufsize); - if (!c->in_buffer) - goto oom1; + if (!c->destructive) { + c->in_buffer = MALLOC(max_bufsize); + if (!c->in_buffer) + goto oom1; + } if (compression_level <= LZX_MAX_FAST_LEVEL) { @@ -2112,13 +2125,17 @@ lzx_compress(const void *in, size_t in_nbytes, { struct lzx_compressor *c = _c; struct lzx_output_bitstream os; + size_t result; /* Don't bother trying to compress very small inputs. */ if (in_nbytes < 100) return 0; /* Copy the input data into the internal buffer and preprocess it. */ - memcpy(c->in_buffer, in, in_nbytes); + if (c->destructive) + c->in_buffer = (void *)in; + else + memcpy(c->in_buffer, in, in_nbytes); c->in_nbytes = in_nbytes; lzx_do_e8_preprocessing(c->in_buffer, in_nbytes); @@ -2133,7 +2150,10 @@ lzx_compress(const void *in, size_t in_nbytes, (*c->impl)(c, &os); /* Flush the output bitstream and return the compressed size or 0. */ - return lzx_flush_output(&os); + result = lzx_flush_output(&os); + if (!result && c->destructive) + lzx_undo_e8_preprocessing(c->in_buffer, c->in_nbytes); + return result; } static void @@ -2141,7 +2161,8 @@ lzx_free_compressor(void *_c) { struct lzx_compressor *c = _c; - FREE(c->in_buffer); + if (!c->destructive) + FREE(c->in_buffer); ALIGNED_FREE(c); }