X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flz_hash_chains.c;h=c116f1251331a10a6dc9e11fd04862343a6072ef;hb=0a97ffa6074b6b69b9982d83b398145c5ac860ab;hp=012d7c462cbdb4f92172d2bb30e5ab158483b149;hpb=6929ca50c6d64ac208e7ccc5f1c2c08dada71061;p=wimlib diff --git a/src/lz_hash_chains.c b/src/lz_hash_chains.c index 012d7c46..c116f125 100644 --- a/src/lz_hash_chains.c +++ b/src/lz_hash_chains.c @@ -33,67 +33,38 @@ # include "config.h" #endif +#include "wimlib/lz_extend.h" +#include "wimlib/lz_hash3.h" #include "wimlib/lz_mf.h" #include "wimlib/util.h" -#include + #include -/* Number of hash buckets. This can be changed, but should be a power of 2 so - * that the correct hash bucket can be selected using a fast bitwise AND. */ -#define LZ_HC_HASH_LEN (1 << 15) +/* log2 of the number of buckets in the hash table. This can be changed. */ +#define LZ_HC_HASH_ORDER 15 -/* Number of bytes from which the hash code is computed at each position. This - * can be changed, provided that lz_hc_hash() is updated as well. */ -#define LZ_HC_HASH_BYTES 3 +#define LZ_HC_HASH_LEN (1 << LZ_HC_HASH_ORDER) struct lz_hc { struct lz_mf base; - u32 *hash_tab; - u32 *prev_tab; + u32 *hash_tab; /* followed by 'prev_tab' in memory */ u32 next_hash; }; -static u32 crc32_table[256]; -static pthread_once_t crc32_table_filled = PTHREAD_ONCE_INIT; - -static void -crc32_init(void) -{ - for (u32 b = 0; b < 256; b++) { - u32 r = b; - for (int i = 0; i < 8; i++) { - if (r & 1) - r = (r >> 1) ^ 0xEDB88320; - else - r >>= 1; - } - crc32_table[b] = r; - } -} - -/* This hash function is taken from the LZMA SDK. It seems to work well. - * - * TODO: Maybe use the SSE4.2 CRC32 instruction when available? */ static inline u32 lz_hc_hash(const u8 *p) { - u32 hash = 0; - - hash ^= crc32_table[p[0]]; - hash ^= p[1]; - hash ^= (u32)p[2] << 8; - - return hash % LZ_HC_HASH_LEN; + return lz_hash(p, LZ_HC_HASH_ORDER); } static void lz_hc_set_default_params(struct lz_mf_params *params) { - if (params->min_match_len < LZ_HC_HASH_BYTES) - params->min_match_len = LZ_HC_HASH_BYTES; + if (params->min_match_len < LZ_HASH_NBYTES) + params->min_match_len = LZ_HASH_NBYTES; if (params->max_match_len == 0) - params->max_match_len = params->max_window_size; + params->max_match_len = UINT32_MAX; if (params->max_search_depth == 0) params->max_search_depth = 50; @@ -115,7 +86,6 @@ lz_hc_params_valid(const struct lz_mf_params *_params) lz_hc_set_default_params(¶ms); - /* Avoid edge case where min_match_len = 3, max_match_len = 2 */ return (params.min_match_len <= params.max_match_len); } @@ -137,17 +107,10 @@ lz_hc_init(struct lz_mf *_mf) lz_hc_set_default_params(&mf->base.params); - /* Allocate space for 'hash_tab' and 'prev_tab'. */ - mf->hash_tab = MALLOC(lz_hc_get_needed_memory(mf->base.params.max_window_size)); if (!mf->hash_tab) return false; - mf->prev_tab = mf->hash_tab + LZ_HC_HASH_LEN; - - /* Fill in the CRC32 table if not done already. */ - pthread_once(&crc32_table_filled, crc32_init); - return true; } @@ -157,135 +120,158 @@ lz_hc_load_window(struct lz_mf *_mf, const u8 window[], u32 size) struct lz_hc *mf = (struct lz_hc *)_mf; memset(mf->hash_tab, 0, LZ_HC_HASH_LEN * sizeof(u32)); - - if (size >= LZ_HC_HASH_BYTES) - mf->next_hash = lz_hc_hash(window); } -static inline u32 -do_search(const u8 * restrict window, - const u32 cur_window_pos, - u32 prev_tab[restrict], - u32 cur_match, - const u32 min_len, - const u32 max_len, - const u32 max_search_depth, - struct lz_match matches[restrict]) +static u32 +lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[]) { - const u8 * const strptr = &window[cur_window_pos]; - u32 best_len = min_len - 1; - u32 depth_remaining = max_search_depth; - u32 num_matches = 0; + struct lz_hc *mf = (struct lz_hc *)_mf; + const u8 * const window = mf->base.cur_window; + const u32 cur_pos = mf->base.cur_window_pos++; + const u8 * const strptr = &window[cur_pos]; + const u32 bytes_remaining = mf->base.cur_window_size - cur_pos; + u32 * const prev_tab = mf->hash_tab + LZ_HC_HASH_LEN; + const u32 max_len = min(bytes_remaining, mf->base.params.max_match_len); + const u32 nice_len = min(max_len, mf->base.params.nice_match_len); + u32 best_len = mf->base.params.min_match_len - 1; + u32 depth_remaining = mf->base.params.max_search_depth; + struct lz_match *lz_matchptr = matches; + u32 hash; + u32 cur_match; + u32 sequence; + + if (unlikely(bytes_remaining < LZ_HASH_REQUIRED_NBYTES + 1)) + return 0; + + /* Insert the current position into the appropriate hash chain and set + * 'cur_match' to the previous head. + * + * For a slight performance improvement, we do each hash calculation one + * position in advance and prefetch the necessary hash table entry. */ + + hash = mf->next_hash; + mf->next_hash = lz_hc_hash(strptr + 1); + prefetch(&mf->hash_tab[mf->next_hash]); + cur_match = mf->hash_tab[hash]; + mf->hash_tab[hash] = cur_pos; + prev_tab[cur_pos] = cur_match; + + /* Ensure we can find a match of at least the requested length. */ + if (unlikely(best_len >= max_len)) + return 0; + if (UNALIGNED_ACCESS_IS_FAST) + sequence = load_u24_unaligned(strptr); + + /* Search the appropriate hash chain for matches. */ for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) { const u8 * const matchptr = &window[cur_match]; u32 len; - if (matchptr[best_len] != strptr[best_len] || - matchptr[best_len - 1] != strptr[best_len - 1] || - matchptr[0] != strptr[0]) + /* Considering the potential match at 'matchptr': is it longer + * than 'best_len'? + * + * The bytes at index 'best_len' are the most likely to differ, + * so check them first. */ + if (matchptr[best_len] != strptr[best_len]) goto next_match; - for (len = 1; len < best_len - 1; len++) - if (matchptr[len] != strptr[len]) + if (UNALIGNED_ACCESS_IS_FAST) { + if (load_u24_unaligned(matchptr) != sequence) goto next_match; - len = best_len; + len = lz_extend(strptr, matchptr, 3, max_len); + + if (len > best_len) { + best_len = len; + + *lz_matchptr++ = (struct lz_match) { + .len = best_len, + .offset = strptr - matchptr, + }; + + if (best_len >= nice_len) + break; + } + } else { + + /* The bytes at indices 'best_len - 1' and '0' are less + * important to check separately. But doing so still + * gives a slight performance improvement, at least on + * x86_64, probably because they create separate + * branches for the CPU to predict independently of the + * branches in the main comparison loops. + */ + if (matchptr[best_len - 1] != strptr[best_len - 1] || + matchptr[0] != strptr[0]) + goto next_match; - while (++len != max_len) - if (matchptr[len] != strptr[len]) - break; + for (len = 1; len < best_len - 1; len++) + if (matchptr[len] != strptr[len]) + goto next_match; - matches[num_matches++] = (struct lz_match) { - .len = len, - .offset = strptr - matchptr, - }; - best_len = len; - if (best_len == max_len) - break; - next_match: - ; - } - return num_matches; -} + /* The match is the longest found so far --- at least + * 'best_len' + 1 bytes. Continue extending it. */ -static u32 -lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[]) -{ - struct lz_hc *mf = (struct lz_hc *)_mf; - const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base); - u32 hash; - u32 cur_match; - u32 num_matches = 0; + if (++best_len != max_len && + strptr[best_len] == matchptr[best_len]) + while (++best_len != max_len) + if (strptr[best_len] != matchptr[best_len]) + break; - if (bytes_remaining <= LZ_HC_HASH_BYTES) - goto out; + /* Record the match. */ + *lz_matchptr++ = (struct lz_match) { + .len = best_len, + .offset = strptr - matchptr, + }; - hash = mf->next_hash; - mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1); - prefetch(&mf->hash_tab[mf->next_hash]); - cur_match = mf->hash_tab[hash]; - mf->hash_tab[hash] = mf->base.cur_window_pos; - mf->prev_tab[mf->base.cur_window_pos] = cur_match; - - num_matches = do_search(mf->base.cur_window, - mf->base.cur_window_pos, - mf->prev_tab, - cur_match, - mf->base.params.min_match_len, - min(bytes_remaining, mf->base.params.nice_match_len), - mf->base.params.max_search_depth, - matches); - - /* If the longest match is @nice_match_len in length, it may have been - * truncated. Try extending it up to the maximum match length. */ - if (num_matches != 0 && - matches[num_matches - 1].len == mf->base.params.nice_match_len) - { - const u8 * const strptr = lz_mf_get_window_ptr(&mf->base); - const u8 * const matchptr = strptr - matches[num_matches - 1].offset; - const u32 len_limit = min(bytes_remaining, mf->base.params.max_match_len); - u32 len; + /* Terminate the search if 'nice_len' was reached. */ + if (best_len >= nice_len) + break; + } - len = matches[num_matches - 1].len; - while (len < len_limit && strptr[len] == matchptr[len]) - len++; - matches[num_matches - 1].len = len; + next_match: + /* Continue to next match in the chain. */ + ; } -out: - mf->base.cur_window_pos++; - return num_matches; + return lz_matchptr - matches; } static void -lz_hc_skip_position(struct lz_hc *mf) +lz_hc_skip_positions(struct lz_mf *_mf, u32 n) { - const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base); + struct lz_hc *mf = (struct lz_hc *)_mf; + u32 * const hash_tab = mf->hash_tab; + u32 * const prev_tab = hash_tab + LZ_HC_HASH_LEN; + const u8 * const window = mf->base.cur_window; + u32 cur_pos = mf->base.cur_window_pos; + u32 end_pos = cur_pos + n; + const u32 bytes_remaining = mf->base.cur_window_size - cur_pos; u32 hash; + u32 next_hash; - if (bytes_remaining <= LZ_HC_HASH_BYTES) - goto out; - - hash = mf->next_hash; - mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1); - prefetch(&mf->hash_tab[mf->next_hash]); - mf->prev_tab[mf->base.cur_window_pos] = mf->hash_tab[hash]; - mf->hash_tab[hash] = mf->base.cur_window_pos; + mf->base.cur_window_pos = end_pos; -out: - mf->base.cur_window_pos++; -} + if (unlikely(bytes_remaining < n + (LZ_HASH_REQUIRED_NBYTES + 1) - 1)) { + /* Nearing end of window. */ + if (unlikely(bytes_remaining < (LZ_HASH_REQUIRED_NBYTES + 1))) + return; -static void -lz_hc_skip_positions(struct lz_mf *_mf, u32 n) -{ - struct lz_hc *mf = (struct lz_hc *)_mf; + end_pos = cur_pos + bytes_remaining - (LZ_HASH_REQUIRED_NBYTES + 1) + 1; + } + next_hash = mf->next_hash; do { - lz_hc_skip_position(mf); - } while (--n); + hash = next_hash; + next_hash = lz_hc_hash(&window[cur_pos + 1]); + prev_tab[cur_pos] = hash_tab[hash]; + hash_tab[hash] = cur_pos; + } while (++cur_pos != end_pos); + + prefetch(&hash_tab[next_hash]); + mf->next_hash = next_hash; } static void