X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flz_hash_chains.c;h=c116f1251331a10a6dc9e11fd04862343a6072ef;hb=2b14c2bfd6d0a7a17433dd8529cfc8b7d969c4b0;hp=807e455aaf6c46b21cae44943daabcf45cf1aad4;hpb=e3a8f6a32b258ba8ac6929f6c468b1c873a96fc6;p=wimlib diff --git a/src/lz_hash_chains.c b/src/lz_hash_chains.c index 807e455a..c116f125 100644 --- a/src/lz_hash_chains.c +++ b/src/lz_hash_chains.c @@ -33,67 +33,38 @@ # include "config.h" #endif +#include "wimlib/lz_extend.h" +#include "wimlib/lz_hash3.h" #include "wimlib/lz_mf.h" #include "wimlib/util.h" -#include + #include -/* Number of hash buckets. This can be changed, but should be a power of 2 so - * that the correct hash bucket can be selected using a fast bitwise AND. */ -#define LZ_HC_HASH_LEN (1 << 15) +/* log2 of the number of buckets in the hash table. This can be changed. */ +#define LZ_HC_HASH_ORDER 15 -/* Number of bytes from which the hash code is computed at each position. This - * can be changed, provided that lz_hc_hash() is updated as well. */ -#define LZ_HC_HASH_BYTES 3 +#define LZ_HC_HASH_LEN (1 << LZ_HC_HASH_ORDER) struct lz_hc { struct lz_mf base; - u32 *hash_tab; - u32 *prev_tab; + u32 *hash_tab; /* followed by 'prev_tab' in memory */ u32 next_hash; }; -static u32 crc32_table[256]; -static pthread_once_t crc32_table_filled = PTHREAD_ONCE_INIT; - -static void -crc32_init(void) -{ - for (u32 b = 0; b < 256; b++) { - u32 r = b; - for (int i = 0; i < 8; i++) { - if (r & 1) - r = (r >> 1) ^ 0xEDB88320; - else - r >>= 1; - } - crc32_table[b] = r; - } -} - -/* This hash function is taken from the LZMA SDK. It seems to work well. - * - * TODO: Maybe use the SSE4.2 CRC32 instruction when available? */ static inline u32 lz_hc_hash(const u8 *p) { - u32 hash = 0; - - hash ^= crc32_table[p[0]]; - hash ^= p[1]; - hash ^= (u32)p[2] << 8; - - return hash % LZ_HC_HASH_LEN; + return lz_hash(p, LZ_HC_HASH_ORDER); } static void lz_hc_set_default_params(struct lz_mf_params *params) { - if (params->min_match_len < LZ_HC_HASH_BYTES) - params->min_match_len = LZ_HC_HASH_BYTES; + if (params->min_match_len < LZ_HASH_NBYTES) + params->min_match_len = LZ_HASH_NBYTES; if (params->max_match_len == 0) - params->max_match_len = params->max_window_size; + params->max_match_len = UINT32_MAX; if (params->max_search_depth == 0) params->max_search_depth = 50; @@ -115,7 +86,6 @@ lz_hc_params_valid(const struct lz_mf_params *_params) lz_hc_set_default_params(¶ms); - /* Avoid edge case where min_match_len = 3, max_match_len = 2 */ return (params.min_match_len <= params.max_match_len); } @@ -137,17 +107,10 @@ lz_hc_init(struct lz_mf *_mf) lz_hc_set_default_params(&mf->base.params); - /* Allocate space for 'hash_tab' and 'prev_tab'. */ - mf->hash_tab = MALLOC(lz_hc_get_needed_memory(mf->base.params.max_window_size)); if (!mf->hash_tab) return false; - mf->prev_tab = mf->hash_tab + LZ_HC_HASH_LEN; - - /* Fill in the CRC32 table if not done already. */ - pthread_once(&crc32_table_filled, crc32_init); - return true; } @@ -157,9 +120,6 @@ lz_hc_load_window(struct lz_mf *_mf, const u8 window[], u32 size) struct lz_hc *mf = (struct lz_hc *)_mf; memset(mf->hash_tab, 0, LZ_HC_HASH_LEN * sizeof(u32)); - - if (size >= LZ_HC_HASH_BYTES) - mf->next_hash = lz_hc_hash(window); } static u32 @@ -167,19 +127,27 @@ lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[]) { struct lz_hc *mf = (struct lz_hc *)_mf; const u8 * const window = mf->base.cur_window; - const u32 cur_pos = mf->base.cur_window_pos; + const u32 cur_pos = mf->base.cur_window_pos++; const u8 * const strptr = &window[cur_pos]; const u32 bytes_remaining = mf->base.cur_window_size - cur_pos; - u32 * const prev_tab = mf->prev_tab; - const u32 max_len = min(bytes_remaining, mf->base.params.nice_match_len); + u32 * const prev_tab = mf->hash_tab + LZ_HC_HASH_LEN; + const u32 max_len = min(bytes_remaining, mf->base.params.max_match_len); + const u32 nice_len = min(max_len, mf->base.params.nice_match_len); u32 best_len = mf->base.params.min_match_len - 1; u32 depth_remaining = mf->base.params.max_search_depth; - u32 num_matches = 0; + struct lz_match *lz_matchptr = matches; u32 hash; u32 cur_match; + u32 sequence; - if (unlikely(bytes_remaining <= LZ_HC_HASH_BYTES)) - goto out; + if (unlikely(bytes_remaining < LZ_HASH_REQUIRED_NBYTES + 1)) + return 0; + + /* Insert the current position into the appropriate hash chain and set + * 'cur_match' to the previous head. + * + * For a slight performance improvement, we do each hash calculation one + * position in advance and prefetch the necessary hash table entry. */ hash = mf->next_hash; mf->next_hash = lz_hc_hash(strptr + 1); @@ -188,78 +156,122 @@ lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[]) mf->hash_tab[hash] = cur_pos; prev_tab[cur_pos] = cur_match; + /* Ensure we can find a match of at least the requested length. */ + if (unlikely(best_len >= max_len)) + return 0; + + if (UNALIGNED_ACCESS_IS_FAST) + sequence = load_u24_unaligned(strptr); + + /* Search the appropriate hash chain for matches. */ for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) { const u8 * const matchptr = &window[cur_match]; u32 len; - if (matchptr[best_len] != strptr[best_len] || - matchptr[best_len - 1] != strptr[best_len - 1] || - matchptr[0] != strptr[0]) + /* Considering the potential match at 'matchptr': is it longer + * than 'best_len'? + * + * The bytes at index 'best_len' are the most likely to differ, + * so check them first. */ + if (matchptr[best_len] != strptr[best_len]) goto next_match; - for (len = 1; len < best_len - 1; len++) - if (matchptr[len] != strptr[len]) + if (UNALIGNED_ACCESS_IS_FAST) { + if (load_u24_unaligned(matchptr) != sequence) goto next_match; - len = best_len; + len = lz_extend(strptr, matchptr, 3, max_len); + + if (len > best_len) { + best_len = len; - do { - if (++len == max_len) { - const u32 len_limit = min(bytes_remaining, - mf->base.params.max_match_len); - while (len < len_limit && strptr[len] == matchptr[len]) - len++; - matches[num_matches++] = (struct lz_match) { - .len = len, + *lz_matchptr++ = (struct lz_match) { + .len = best_len, .offset = strptr - matchptr, }; - goto out; + + if (best_len >= nice_len) + break; } - } while (matchptr[len] == strptr[len]); + } else { + + /* The bytes at indices 'best_len - 1' and '0' are less + * important to check separately. But doing so still + * gives a slight performance improvement, at least on + * x86_64, probably because they create separate + * branches for the CPU to predict independently of the + * branches in the main comparison loops. + */ + if (matchptr[best_len - 1] != strptr[best_len - 1] || + matchptr[0] != strptr[0]) + goto next_match; - best_len = len; - matches[num_matches++] = (struct lz_match) { - .len = len, - .offset = strptr - matchptr, - }; + for (len = 1; len < best_len - 1; len++) + if (matchptr[len] != strptr[len]) + goto next_match; + + /* The match is the longest found so far --- at least + * 'best_len' + 1 bytes. Continue extending it. */ + + if (++best_len != max_len && + strptr[best_len] == matchptr[best_len]) + while (++best_len != max_len) + if (strptr[best_len] != matchptr[best_len]) + break; + + /* Record the match. */ + *lz_matchptr++ = (struct lz_match) { + .len = best_len, + .offset = strptr - matchptr, + }; + + /* Terminate the search if 'nice_len' was reached. */ + if (best_len >= nice_len) + break; + } next_match: + /* Continue to next match in the chain. */ ; } -out: - mf->base.cur_window_pos++; - return num_matches; + return lz_matchptr - matches; } static void -lz_hc_skip_position(struct lz_hc *mf) +lz_hc_skip_positions(struct lz_mf *_mf, u32 n) { - const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base); + struct lz_hc *mf = (struct lz_hc *)_mf; + u32 * const hash_tab = mf->hash_tab; + u32 * const prev_tab = hash_tab + LZ_HC_HASH_LEN; + const u8 * const window = mf->base.cur_window; + u32 cur_pos = mf->base.cur_window_pos; + u32 end_pos = cur_pos + n; + const u32 bytes_remaining = mf->base.cur_window_size - cur_pos; u32 hash; + u32 next_hash; - if (bytes_remaining <= LZ_HC_HASH_BYTES) - goto out; - - hash = mf->next_hash; - mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1); - prefetch(&mf->hash_tab[mf->next_hash]); - mf->prev_tab[mf->base.cur_window_pos] = mf->hash_tab[hash]; - mf->hash_tab[hash] = mf->base.cur_window_pos; + mf->base.cur_window_pos = end_pos; -out: - mf->base.cur_window_pos++; -} + if (unlikely(bytes_remaining < n + (LZ_HASH_REQUIRED_NBYTES + 1) - 1)) { + /* Nearing end of window. */ + if (unlikely(bytes_remaining < (LZ_HASH_REQUIRED_NBYTES + 1))) + return; -static void -lz_hc_skip_positions(struct lz_mf *_mf, u32 n) -{ - struct lz_hc *mf = (struct lz_hc *)_mf; + end_pos = cur_pos + bytes_remaining - (LZ_HASH_REQUIRED_NBYTES + 1) + 1; + } + next_hash = mf->next_hash; do { - lz_hc_skip_position(mf); - } while (--n); + hash = next_hash; + next_hash = lz_hc_hash(&window[cur_pos + 1]); + prev_tab[cur_pos] = hash_tab[hash]; + hash_tab[hash] = cur_pos; + } while (++cur_pos != end_pos); + + prefetch(&hash_tab[next_hash]); + mf->next_hash = next_hash; } static void