From: Eric Biggers Date: Mon, 30 Dec 2013 02:59:08 +0000 (-0600) Subject: Update LZMS LRU queue handling X-Git-Tag: v1.6.0~35 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=027518fa61bd9232ff7fa7ecd270546d920c912f;hp=dbfee435692344cccd48bb4c7deb3af23ac80176 Update LZMS LRU queue handling --- diff --git a/include/wimlib/lzms.h b/include/wimlib/lzms.h index 177ad548..e5170734 100644 --- a/include/wimlib/lzms.h +++ b/include/wimlib/lzms.h @@ -68,6 +68,39 @@ struct lzms_probability_entry { u64 recent_bits; }; +/* LRU queues for LZ matches. */ +struct lzms_lz_lru_queues { + + /* Recent LZ match offsets */ + u32 recent_offsets[LZMS_NUM_RECENT_OFFSETS + 1]; + + /* These variables are used to delay updates to the LRU queues by one + * decoded item. */ + u32 prev_offset; + u32 upcoming_offset; +}; + +/* LRU queues for delta matches. */ +struct lzms_delta_lru_queues { + + /* Recent delta match powers and offsets */ + u32 recent_powers[LZMS_NUM_RECENT_OFFSETS + 1]; + u32 recent_offsets[LZMS_NUM_RECENT_OFFSETS + 1]; + + /* These variables are used to delay updates to the LRU queues by one + * decoded item. */ + u32 prev_power; + u32 prev_offset; + u32 upcoming_power; + u32 upcoming_offset; +}; + +/* LRU (least-recently-used) queues for match information. */ +struct lzms_lru_queues { + struct lzms_lz_lru_queues lz; + struct lzms_delta_lru_queues delta; +}; + extern u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1]; extern u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1]; @@ -92,4 +125,10 @@ lzms_get_length_slot(u32 value) LZMS_NUM_LEN_SYMS); } +extern void +lzms_init_lru_queues(struct lzms_lru_queues *lru); + +extern void +lzms_update_lru_queues(struct lzms_lru_queues *lru); + #endif /* _WIMLIB_LZMS_H */ diff --git a/src/lzms-common.c b/src/lzms-common.c index a6fc8014..dc118575 100644 --- a/src/lzms-common.c +++ b/src/lzms-common.c @@ -256,3 +256,70 @@ lzms_x86_filter(u8 data[restrict], } } } + +static void +lzms_init_lz_lru_queues(struct lzms_lz_lru_queues *lz) +{ + /* Recent offsets for LZ matches */ + for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) + lz->recent_offsets[i] = i + 1; + + lz->prev_offset = 0; + lz->upcoming_offset = 0; +} + +static void +lzms_init_delta_lru_queues(struct lzms_delta_lru_queues *delta) +{ + /* Recent offsets and powers for LZ matches */ + for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) { + delta->recent_offsets[i] = i + 1; + delta->recent_powers[i] = 0; + } + delta->prev_offset = 0; + delta->prev_power = 0; + delta->upcoming_offset = 0; + delta->upcoming_power = 0; +} + + +void +lzms_init_lru_queues(struct lzms_lru_queues *lru) +{ + lzms_init_lz_lru_queues(&lru->lz); + lzms_init_delta_lru_queues(&lru->delta); +} + +static void +lzms_update_lz_lru_queues(struct lzms_lz_lru_queues *lz) +{ + if (lz->prev_offset != 0) { + for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) + lz->recent_offsets[i + 1] = lz->recent_offsets[i]; + lz->recent_offsets[0] = lz->prev_offset; + } + lz->prev_offset = lz->upcoming_offset; +} + +static void +lzms_update_delta_lru_queues(struct lzms_delta_lru_queues *delta) +{ + if (delta->prev_offset != 0) { + for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) { + delta->recent_offsets[i + 1] = delta->recent_offsets[i]; + delta->recent_powers[i + 1] = delta->recent_powers[i]; + } + delta->recent_offsets[0] = delta->prev_offset; + delta->recent_powers[0] = delta->prev_power; + } + + delta->prev_offset = delta->upcoming_offset; + delta->prev_power = delta->upcoming_power; +} + +void +lzms_update_lru_queues(struct lzms_lru_queues *lru) +{ + lzms_update_lz_lru_queues(&lru->lz); + lzms_update_delta_lru_queues(&lru->delta); +} diff --git a/src/lzms-compress.c b/src/lzms-compress.c index f7ef633e..5a2e3ed5 100644 --- a/src/lzms-compress.c +++ b/src/lzms-compress.c @@ -40,6 +40,7 @@ #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/lz_hash.h" +#include "wimlib/lz_sarray.h" #include "wimlib/lzms.h" #include "wimlib/util.h" @@ -174,6 +175,9 @@ struct lzms_compressor { * as the window. */ u32 *prev_tab; + /* Suffix array match-finder. */ + struct lz_sarray lz_sarray; + /* Maximum block size this compressor instantiation allows. This is the * allocated size of @window. */ u32 max_block_size; @@ -201,33 +205,13 @@ struct lzms_compressor { struct lzms_huffman_encoder delta_power_encoder; struct lzms_huffman_encoder delta_offset_encoder; - /* LRU (least-recently-used) queue of LZ match offsets. */ - u64 recent_lz_offsets[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* LRU (least-recently-used) queue of delta match powers. */ - u32 recent_delta_powers[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* LRU (least-recently-used) queue of delta match offsets. */ - u32 recent_delta_offsets[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* These variables are used to delay updates to the LRU queues by one - * decoded item. */ - u32 prev_lz_offset; - u32 prev_delta_power; - u32 prev_delta_offset; - u32 upcoming_lz_offset; - u32 upcoming_delta_power; - u32 upcoming_delta_offset; + /* LRU (least-recently-used) queues for match information. */ + struct lzms_lru_queues lru; /* Used for preprocessing. */ s32 last_target_usages[65536]; }; -struct lzms_match { - u32 length; - u32 offset; -}; - /* Initialize the output bitstream @os to write forwards to the specified * compressed data buffer @out that is @out_limit 16-bit integers long. */ static void @@ -485,9 +469,9 @@ lzms_encode_value(struct lzms_huffman_encoder *enc, u32 value) static void lzms_begin_encode_item(struct lzms_compressor *ctx) { - ctx->upcoming_delta_offset = 0; - ctx->upcoming_lz_offset = 0; - ctx->upcoming_delta_power = 0; + ctx->lru.lz.upcoming_offset = 0; + ctx->lru.delta.upcoming_offset = 0; + ctx->lru.delta.upcoming_power = 0; } static void @@ -495,26 +479,7 @@ lzms_end_encode_item(struct lzms_compressor *ctx, u32 length) { LZMS_ASSERT(ctx->window_size - ctx->cur_window_pos >= length); ctx->cur_window_pos += length; - - /* Update LRU queues */ - if (ctx->prev_lz_offset != 0) { - for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) - ctx->recent_lz_offsets[i + 1] = ctx->recent_lz_offsets[i]; - ctx->recent_lz_offsets[0] = ctx->prev_lz_offset; - } - - if (ctx->prev_delta_offset != 0) { - for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) { - ctx->recent_delta_powers[i + 1] = ctx->recent_delta_powers[i]; - ctx->recent_delta_offsets[i + 1] = ctx->recent_delta_offsets[i]; - } - ctx->recent_delta_powers[0] = ctx->prev_delta_power; - ctx->recent_delta_offsets[0] = ctx->prev_delta_offset; - } - - ctx->prev_lz_offset = ctx->upcoming_lz_offset; - ctx->prev_delta_offset = ctx->upcoming_delta_offset; - ctx->prev_delta_power = ctx->upcoming_delta_power; + lzms_update_lru_queues(&ctx->lru); } /* Encode a literal byte. */ @@ -556,7 +521,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) for (recent_offset_idx = 0; recent_offset_idx < LZMS_NUM_RECENT_OFFSETS; recent_offset_idx++) - if (offset == ctx->recent_lz_offsets[recent_offset_idx]) + if (offset == ctx->lru.lz.recent_offsets[recent_offset_idx]) break; if (recent_offset_idx == LZMS_NUM_RECENT_OFFSETS) { @@ -587,7 +552,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) /* Initial update of the LZ match offset LRU queue. */ for (; i < LZMS_NUM_RECENT_OFFSETS; i++) - ctx->recent_lz_offsets[i] = ctx->recent_lz_offsets[i + 1]; + ctx->lru.lz.recent_offsets[i] = ctx->lru.lz.recent_offsets[i + 1]; } /* Encode the match length. */ @@ -595,7 +560,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) /* Save the match offset for later insertion at the front of the LZ * match offset LRU queue. */ - ctx->upcoming_lz_offset = offset; + ctx->lru.lz.upcoming_offset = offset; lzms_end_encode_item(ctx, length); } @@ -640,38 +605,77 @@ lzms_fast_encode(struct lzms_compressor *ctx) } -#if 0 +/* Fast heuristic cost evaluation to use in the inner loop of the match-finder. + * Unlike lzms_match_cost() which does a true cost evaluation, this simply + * prioritize matches based on their offset. */ +static input_idx_t +lzms_match_cost_fast(input_idx_t length, input_idx_t offset, const void *_lru) +{ + const struct lzms_lz_lru_queues *lru = _lru; + -static struct lzms_match -lzms_get_best_match(struct lzms_compressor *ctx) + /* It seems well worth it to take the time to give priority to recently + * used offsets. */ + for (input_idx_t i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) + if (offset == lru->recent_offsets[i]) + return i; + + return offset; +} + +static void +lzms_lz_skip_bytes(struct lzms_compressor *ctx, u32 n) { - struct lzms_match match; + while (n--) + lz_sarray_skip_position(&ctx->lz_sarray); +} - /* TODO */ +static struct raw_match +lzms_get_near_optimal_match(struct lzms_compressor *ctx) +{ + struct raw_match matches[10]; + u32 num_matches; + + num_matches = lz_sarray_get_matches(&ctx->lz_sarray, + matches, + lzms_match_cost_fast, + &ctx->lru.lz); + if (num_matches == 0) + return (struct raw_match) { .len = 0 }; - match.length = 0; +#if 0 + fprintf(stderr, "Pos %u/%u: %u matches\n", + lz_sarray_get_pos(&ctx->lz_sarray) - 1, + ctx->window_size, num_matches); + for (u32 i = 0; i < num_matches; i++) + fprintf(stderr, "\tLen %u Offset %u\n", matches[i].len, matches[i].offset); +#endif - return match; + lzms_lz_skip_bytes(ctx, matches[0].len - 1); + return matches[0]; } static void lzms_slow_encode(struct lzms_compressor *ctx) { - struct lzms_match match; + struct raw_match match; + + /* Load window into suffix array match-finder. */ + lz_sarray_load_window(&ctx->lz_sarray, ctx->window, ctx->window_size); /* TODO */ while (ctx->cur_window_pos != ctx->window_size) { - match = lzms_get_best_match(ctx); - if (match.length == 0) { + + match = lzms_get_near_optimal_match(ctx); + if (match.len == 0) { /* Literal */ lzms_encode_literal(ctx, ctx->window[ctx->cur_window_pos]); } else { /* LZ match */ - lzms_encode_lz_match(ctx, match.length, match.offset); + lzms_encode_lz_match(ctx, match.len, match.offset); } } } -#endif static void lzms_init_range_encoder(struct lzms_range_encoder *enc, @@ -775,20 +779,8 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen, lzms_init_range_encoder(&ctx->delta_repeat_match_range_encoders[i], &ctx->rc, LZMS_NUM_DELTA_REPEAT_MATCH_STATES); - /* Initialize the LRU queue for recent match offsets. */ - for (size_t i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) - ctx->recent_lz_offsets[i] = i + 1; - - for (size_t i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) { - ctx->recent_delta_powers[i] = 0; - ctx->recent_delta_offsets[i] = i + 1; - } - ctx->prev_lz_offset = 0; - ctx->prev_delta_offset = 0; - ctx->prev_delta_power = 0; - ctx->upcoming_lz_offset = 0; - ctx->upcoming_delta_offset = 0; - ctx->upcoming_delta_power = 0; + /* Initialize LRU match information. */ + lzms_init_lru_queues(&ctx->lru); } /* Flush the output streams, prepare the final compressed data, and return its @@ -878,7 +870,10 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size, /* Determine and output a literal/match sequence that decompresses to * the preprocessed data. */ - lzms_fast_encode(ctx); + if (1) + lzms_slow_encode(ctx); + else + lzms_fast_encode(ctx); /* Get and return the compressed data size. */ compressed_size = lzms_finalize(ctx, compressed_data, @@ -944,6 +939,7 @@ lzms_free_compressor(void *_ctx) if (ctx) { FREE(ctx->window); FREE(ctx->prev_tab); + lz_sarray_destroy(&ctx->lz_sarray); FREE(ctx); } } @@ -972,6 +968,15 @@ lzms_create_compressor(size_t max_block_size, if (ctx->prev_tab == NULL) goto oom; + if (!lz_sarray_init(&ctx->lz_sarray, + max_block_size, + 2, + max_block_size, + 100, + 10)) + goto oom; + + ctx->max_block_size = max_block_size; *ctx_ret = ctx; diff --git a/src/lzms-decompress.c b/src/lzms-decompress.c index f010c95c..f2fcb60e 100644 --- a/src/lzms-decompress.c +++ b/src/lzms-decompress.c @@ -346,23 +346,8 @@ struct lzms_decompressor { struct lzms_huffman_decoder delta_power_decoder; struct lzms_huffman_decoder delta_offset_decoder; - /* LRU (least-recently-used) queue of LZ match offsets. */ - u64 recent_lz_offsets[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* LRU (least-recently-used) queue of delta match powers. */ - u32 recent_delta_powers[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* LRU (least-recently-used) queue of delta match offsets. */ - u32 recent_delta_offsets[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* These variables are used to delay updates to the LRU queues by one - * decoded item. */ - u32 prev_lz_offset; - u32 prev_delta_power; - u32 prev_delta_offset; - u32 upcoming_lz_offset; - u32 upcoming_delta_power; - u32 upcoming_delta_offset; + /* LRU (least-recently-used) queues for match information. */ + struct lzms_lru_queues lru; /* Used for postprocessing. */ s32 last_target_usages[65536]; @@ -749,17 +734,17 @@ lzms_decode_lz_match(struct lzms_decompressor *ctx) if (!lzms_range_decode_bit(&ctx->lz_repeat_match_range_decoders[i])) break; - offset = ctx->recent_lz_offsets[i]; + offset = ctx->lru.lz.recent_offsets[i]; for (; i < LZMS_NUM_RECENT_OFFSETS; i++) - ctx->recent_lz_offsets[i] = ctx->recent_lz_offsets[i + 1]; + ctx->lru.lz.recent_offsets[i] = ctx->lru.lz.recent_offsets[i + 1]; } /* Decode match length, which is always given explicitly (there is no * LRU queue for repeat lengths). */ length = lzms_decode_value(&ctx->length_decoder); - ctx->upcoming_lz_offset = offset; + ctx->lru.lz.upcoming_offset = offset; LZMS_DEBUG("Decoded %s LZ match: length=%u, offset=%u", (bit ? "repeat" : "explicit"), length, offset); @@ -789,19 +774,19 @@ lzms_decode_delta_match(struct lzms_decompressor *ctx) if (!lzms_range_decode_bit(&ctx->delta_repeat_match_range_decoders[i])) break; - power = ctx->recent_delta_powers[i]; - raw_offset = ctx->recent_delta_offsets[i]; + power = ctx->lru.delta.recent_powers[i]; + raw_offset = ctx->lru.delta.recent_offsets[i]; for (; i < LZMS_NUM_RECENT_OFFSETS; i++) { - ctx->recent_delta_powers[i] = ctx->recent_delta_powers[i + 1]; - ctx->recent_delta_offsets[i] = ctx->recent_delta_offsets[i + 1]; + ctx->lru.delta.recent_powers[i] = ctx->lru.delta.recent_powers[i + 1]; + ctx->lru.delta.recent_offsets[i] = ctx->lru.delta.recent_offsets[i + 1]; } } length = lzms_decode_value(&ctx->length_decoder); - ctx->upcoming_delta_power = power; - ctx->upcoming_delta_offset = raw_offset; + ctx->lru.delta.upcoming_power = power; + ctx->lru.delta.upcoming_offset = raw_offset; LZMS_DEBUG("Decoded %s delta match: length=%u, power=%u, raw_offset=%u", (bit ? "repeat" : "explicit"), length, power, raw_offset); @@ -834,9 +819,9 @@ lzms_decode_item(struct lzms_decompressor *ctx) { int ret; - ctx->upcoming_delta_offset = 0; - ctx->upcoming_lz_offset = 0; - ctx->upcoming_delta_power = 0; + ctx->lru.lz.upcoming_offset = 0; + ctx->lru.delta.upcoming_power = 0; + ctx->lru.delta.upcoming_offset = 0; if (lzms_range_decode_bit(&ctx->main_range_decoder)) ret = lzms_decode_match(ctx); @@ -847,24 +832,7 @@ lzms_decode_item(struct lzms_decompressor *ctx) return ret; /* Update LRU queues */ - if (ctx->prev_lz_offset != 0) { - for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) - ctx->recent_lz_offsets[i + 1] = ctx->recent_lz_offsets[i]; - ctx->recent_lz_offsets[0] = ctx->prev_lz_offset; - } - - if (ctx->prev_delta_offset != 0) { - for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) { - ctx->recent_delta_powers[i + 1] = ctx->recent_delta_powers[i]; - ctx->recent_delta_offsets[i + 1] = ctx->recent_delta_offsets[i]; - } - ctx->recent_delta_powers[0] = ctx->prev_delta_power; - ctx->recent_delta_offsets[0] = ctx->prev_delta_offset; - } - - ctx->prev_lz_offset = ctx->upcoming_lz_offset; - ctx->prev_delta_offset = ctx->upcoming_delta_offset; - ctx->prev_delta_power = ctx->upcoming_delta_power; + lzms_update_lru_queues(&ctx->lru); return 0; } @@ -972,20 +940,8 @@ lzms_init_decompressor(struct lzms_decompressor *ctx, lzms_init_range_decoder(&ctx->delta_repeat_match_range_decoders[i], &ctx->rd, LZMS_NUM_DELTA_REPEAT_MATCH_STATES); - /* Initialize the LRU queue for recent match offsets. */ - for (size_t i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) - ctx->recent_lz_offsets[i] = i + 1; - - for (size_t i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) { - ctx->recent_delta_powers[i] = 0; - ctx->recent_delta_offsets[i] = i + 1; - } - ctx->prev_lz_offset = 0; - ctx->prev_delta_offset = 0; - ctx->prev_delta_power = 0; - ctx->upcoming_lz_offset = 0; - ctx->upcoming_delta_offset = 0; - ctx->upcoming_delta_power = 0; + /* Initialize LRU match information. */ + lzms_init_lru_queues(&ctx->lru); LZMS_DEBUG("Decompressor successfully initialized"); }