/* * lz_hash_chains.c * * Hash chain match-finder for Lempel-Ziv compression. * * Copyright (c) 2014 Eric Biggers. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include "wimlib/lz_mf.h" #include "wimlib/util.h" #include #include /* Number of hash buckets. This can be changed, but should be a power of 2 so * that the correct hash bucket can be selected using a fast bitwise AND. */ #define LZ_HC_HASH_LEN (1 << 15) /* Number of bytes from which the hash code is computed at each position. This * can be changed, provided that lz_hc_hash() is updated as well. */ #define LZ_HC_HASH_BYTES 3 struct lz_hc { struct lz_mf base; u32 *hash_tab; u32 *prev_tab; u32 next_hash; }; static u32 crc32_table[256]; static pthread_once_t crc32_table_filled = PTHREAD_ONCE_INIT; static void crc32_init(void) { for (u32 b = 0; b < 256; b++) { u32 r = b; for (int i = 0; i < 8; i++) { if (r & 1) r = (r >> 1) ^ 0xEDB88320; else r >>= 1; } crc32_table[b] = r; } } /* This hash function is taken from the LZMA SDK. It seems to work well. * * TODO: Maybe use the SSE4.2 CRC32 instruction when available? */ static inline u32 lz_hc_hash(const u8 *p) { u32 hash = 0; hash ^= crc32_table[p[0]]; hash ^= p[1]; hash ^= (u32)p[2] << 8; return hash % LZ_HC_HASH_LEN; } static void lz_hc_set_default_params(struct lz_mf_params *params) { if (params->min_match_len < LZ_HC_HASH_BYTES) params->min_match_len = LZ_HC_HASH_BYTES; if (params->max_match_len == 0) params->max_match_len = params->max_window_size; if (params->max_search_depth == 0) params->max_search_depth = 50; if (params->nice_match_len == 0) params->nice_match_len = 24; if (params->nice_match_len < params->min_match_len) params->nice_match_len = params->min_match_len; if (params->nice_match_len > params->max_match_len) params->nice_match_len = params->max_match_len; } static bool lz_hc_params_valid(const struct lz_mf_params *_params) { struct lz_mf_params params = *_params; lz_hc_set_default_params(¶ms); /* Avoid edge case where min_match_len = 3, max_match_len = 2 */ return (params.min_match_len <= params.max_match_len); } static u64 lz_hc_get_needed_memory(u32 max_window_size) { u64 len = 0; len += LZ_HC_HASH_LEN; len += max_window_size; return len * sizeof(u32); } static bool lz_hc_init(struct lz_mf *_mf) { struct lz_hc *mf = (struct lz_hc *)_mf; lz_hc_set_default_params(&mf->base.params); /* Allocate space for 'hash_tab' and 'prev_tab'. */ mf->hash_tab = MALLOC(lz_hc_get_needed_memory(mf->base.params.max_window_size)); if (!mf->hash_tab) return false; mf->prev_tab = mf->hash_tab + LZ_HC_HASH_LEN; /* Fill in the CRC32 table if not done already. */ pthread_once(&crc32_table_filled, crc32_init); return true; } static void lz_hc_load_window(struct lz_mf *_mf, const u8 window[], u32 size) { struct lz_hc *mf = (struct lz_hc *)_mf; memset(mf->hash_tab, 0, LZ_HC_HASH_LEN * sizeof(u32)); if (size >= LZ_HC_HASH_BYTES) mf->next_hash = lz_hc_hash(window); } static inline u32 do_search(const u8 * restrict window, const u32 cur_window_pos, u32 prev_tab[restrict], u32 cur_match, const u32 min_len, const u32 max_len, const u32 max_search_depth, struct lz_match matches[restrict]) { const u8 * const strptr = &window[cur_window_pos]; u32 best_len = min_len - 1; u32 depth_remaining = max_search_depth; u32 num_matches = 0; for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) { const u8 * const matchptr = &window[cur_match]; if (matchptr[best_len] == strptr[best_len] && matchptr[best_len - 1] == strptr[best_len - 1] && matchptr[0] == strptr[0]) { u32 len = 0; while (++len != max_len) if (matchptr[len] != strptr[len]) break; if (len > best_len) { matches[num_matches++] = (struct lz_match) { .len = len, .offset = strptr - matchptr, }; best_len = len; if (best_len == max_len) break; } } } return num_matches; } static u32 lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[]) { struct lz_hc *mf = (struct lz_hc *)_mf; const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base); u32 hash; u32 cur_match; u32 num_matches = 0; if (bytes_remaining <= LZ_HC_HASH_BYTES) goto out; hash = mf->next_hash; mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1); prefetch(&mf->hash_tab[mf->next_hash]); cur_match = mf->hash_tab[hash]; mf->hash_tab[hash] = mf->base.cur_window_pos; mf->prev_tab[mf->base.cur_window_pos] = cur_match; num_matches = do_search(mf->base.cur_window, mf->base.cur_window_pos, mf->prev_tab, cur_match, mf->base.params.min_match_len, min(bytes_remaining, mf->base.params.nice_match_len), mf->base.params.max_search_depth, matches); /* If the longest match is @nice_match_len in length, it may have been * truncated. Try extending it up to the maximum match length. */ if (num_matches != 0 && matches[num_matches - 1].len == mf->base.params.nice_match_len) { const u8 * const strptr = lz_mf_get_window_ptr(&mf->base); const u8 * const matchptr = strptr - matches[num_matches - 1].offset; const u32 len_limit = min(bytes_remaining, mf->base.params.max_match_len); u32 len; len = matches[num_matches - 1].len; while (len < len_limit && strptr[len] == matchptr[len]) len++; matches[num_matches - 1].len = len; } out: mf->base.cur_window_pos++; return num_matches; } static void lz_hc_skip_position(struct lz_hc *mf) { const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base); u32 hash; if (bytes_remaining <= LZ_HC_HASH_BYTES) goto out; hash = mf->next_hash; mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1); prefetch(&mf->hash_tab[mf->next_hash]); mf->prev_tab[mf->base.cur_window_pos] = mf->hash_tab[hash]; mf->hash_tab[hash] = mf->base.cur_window_pos; out: mf->base.cur_window_pos++; } static void lz_hc_skip_positions(struct lz_mf *_mf, u32 n) { struct lz_hc *mf = (struct lz_hc *)_mf; do { lz_hc_skip_position(mf); } while (--n); } static void lz_hc_destroy(struct lz_mf *_mf) { struct lz_hc *mf = (struct lz_hc *)_mf; FREE(mf->hash_tab); } const struct lz_mf_ops lz_hash_chains_ops = { .params_valid = lz_hc_params_valid, .get_needed_memory = lz_hc_get_needed_memory, .init = lz_hc_init, .load_window = lz_hc_load_window, .get_matches = lz_hc_get_matches, .skip_positions = lz_hc_skip_positions, .destroy = lz_hc_destroy, .struct_size = sizeof(struct lz_hc), };