From: Eric Biggers Date: Wed, 14 Jan 2015 03:48:01 +0000 (-0600) Subject: LZ hashing cleanup X-Git-Tag: v1.8.0~61 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=e7a3df0a6bf2af6500611f6c464dc36cab3332d8 LZ hashing cleanup --- diff --git a/include/wimlib/bt_matchfinder.h b/include/wimlib/bt_matchfinder.h index 78ecdd75..536ead6a 100644 --- a/include/wimlib/bt_matchfinder.h +++ b/include/wimlib/bt_matchfinder.h @@ -169,7 +169,7 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf, unsigned len; unsigned best_len = min_len - 1; - if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES + 1)) { + if (unlikely(max_len < LZ_HASH3_REQUIRED_NBYTES + 1)) { *best_len_ret = best_len; return lz_matchptr; } @@ -278,7 +278,7 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf, unsigned best_lt_len, best_gt_len; unsigned len; - if (unlikely(in_end - in_next < LZ_HASH_REQUIRED_NBYTES + 1)) + if (unlikely(in_end - in_next < LZ_HASH3_REQUIRED_NBYTES + 1)) return; hash = *next_hash; diff --git a/include/wimlib/hc_matchfinder.h b/include/wimlib/hc_matchfinder.h index a5cca95e..dd1cfab9 100644 --- a/include/wimlib/hc_matchfinder.h +++ b/include/wimlib/hc_matchfinder.h @@ -173,7 +173,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf, pos_t cur_node; /* Insert the current sequence into the appropriate linked list. */ - if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES)) + if (unlikely(max_len < LOAD_U24_REQUIRED_NBYTES)) goto out; first_3_bytes = load_u24_unaligned(in_next); hash = lz_hash(first_3_bytes, HC_MATCHFINDER_HASH_ORDER); @@ -279,7 +279,7 @@ hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf, { u32 hash; - if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES)) + if (unlikely(in_next + count >= in_end - LZ_HASH3_REQUIRED_NBYTES)) return; do { diff --git a/include/wimlib/lz_hash.h b/include/wimlib/lz_hash.h index dbab5894..5aa4eff5 100644 --- a/include/wimlib/lz_hash.h +++ b/include/wimlib/lz_hash.h @@ -10,52 +10,51 @@ * You can do whatever you want with this file. */ -#ifndef _WIMLIB_LZ_HASH_H -#define _WIMLIB_LZ_HASH_H +#ifndef _LZ_HASH_H +#define _LZ_HASH_H #include "wimlib/unaligned.h" -/* Constant for the multiplicative hash function. */ -#define LZ_HASH_MULTIPLIER 0x1E35A7BD - -static inline u32 -loaded_u32_to_u24(u32 v) -{ - if (CPU_IS_LITTLE_ENDIAN) - return v & 0xFFFFFF; - else - return v >> 8; -} - +/* + * The hash function: given a sequence prefix held in the low-order bits of a + * 32-bit value, multiply by a carefully-chosen large constant. Discard any + * bits of the product that don't fit in a 32-bit value, but take the + * next-highest @num_bits bits of the product as the hash value, as those have + * the most randomness. + */ static inline u32 -load_u24_unaligned(const u8 *p) +lz_hash(u32 seq, unsigned num_bits) { - if (UNALIGNED_ACCESS_IS_FAST) - return loaded_u32_to_u24(load_u32_unaligned(p)); - else - return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16); + return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits); } +/* + * Hash the 2-byte sequence beginning at @p, producing a hash of length + * @num_bits bits. At least 2 bytes of data must be available at @p. + */ static inline u32 -lz_hash(u32 str, unsigned num_bits) +lz_hash_2_bytes(const u8 *p, unsigned num_bits) { - return (u32)(str * LZ_HASH_MULTIPLIER) >> (32 - num_bits); + u32 seq = load_u16_unaligned(p); + if (num_bits >= 16) + return seq; + return lz_hash(seq, num_bits); } /* - * Hash the next 3-byte sequence in the window, producing a hash of length - * 'num_bits' bits. At least LZ_HASH_REQUIRED_NBYTES must be available at 'p'; - * this might be 4 bytes rather than 3 because an unaligned load is faster on - * some architectures. + * Hash the 3-byte sequence beginning at @p, producing a hash of length + * @num_bits bits. At least LZ_HASH3_REQUIRED_NBYTES bytes of data must be + * available at @p; note that this may be more than 3. */ static inline u32 lz_hash_3_bytes(const u8 *p, unsigned num_bits) { - return lz_hash(load_u24_unaligned(p), num_bits); + u32 seq = load_u24_unaligned(p); + if (num_bits >= 24) + return seq; + return lz_hash(seq, num_bits); } -/* Number of bytes the hash function actually requires be available, due to the - * possibility of an unaligned load. */ -#define LZ_HASH_REQUIRED_NBYTES (UNALIGNED_ACCESS_IS_FAST ? 4 : 3) +#define LZ_HASH3_REQUIRED_NBYTES LOAD_U24_REQUIRED_NBYTES -#endif /* _WIMLIB_LZ_HASH_H */ +#endif /* _LZ_HASH_H */ diff --git a/include/wimlib/unaligned.h b/include/wimlib/unaligned.h index 9bd2a55f..34240bd0 100644 --- a/include/wimlib/unaligned.h +++ b/include/wimlib/unaligned.h @@ -109,4 +109,38 @@ put_unaligned_u32_le(u32 v, void *p) } } +/* + * Given a 32-bit value that was loaded with the platform's native endianness, + * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24 + * bits contain the first 3 bytes, arranged in octets in a platform-dependent + * order, at the memory location from which the input 32-bit value was loaded. + */ +static inline u32 +loaded_u32_to_u24(u32 v) +{ + if (CPU_IS_LITTLE_ENDIAN) + return v & 0xFFFFFF; + else + return v >> 8; +} + +/* + * Load the next 3 bytes from the memory location @p into the 24 low-order bits + * of a 32-bit value. The order in which the 3 bytes will be arranged as octets + * in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES + * bytes must be available at @p; note that this may be more than 3. + */ +static inline u32 +load_u24_unaligned(const u8 *p) +{ +#if UNALIGNED_ACCESS_IS_FAST +# define LOAD_U24_REQUIRED_NBYTES 4 + return loaded_u32_to_u24(load_u32_unaligned(p)); +#else +# define LOAD_U24_REQUIRED_NBYTES 3 + return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16); +#endif +} + + #endif /* _WIMLIB_UNALIGNED_H */ diff --git a/src/lzx_compress.c b/src/lzx_compress.c index 0db8f108..4434cde3 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -471,17 +471,6 @@ struct lzx_compressor { }; }; -/* Compute a hash value for the next 2 bytes of uncompressed data. */ -static inline u32 -lz_hash_2_bytes(const u8 *in_next) -{ - u16 next_2_bytes = load_u16_unaligned(in_next); - if (LZX_HASH2_ORDER == 16) - return next_2_bytes; - else - return lz_hash(next_2_bytes, LZX_HASH2_ORDER); -} - /* * Structure to keep track of the current state of sending bits to the * compressed output buffer. @@ -1632,9 +1621,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c, * match of the very last two bytes with the * very first two bytes, since such a match has * an offset too large to be represented. */ - if (unlikely(max_len < - max(LZ_HASH_REQUIRED_NBYTES, 3))) - { + if (unlikely(max_len < 3)) { in_next++; cache_ptr->length = 0; cache_ptr++; @@ -1645,7 +1632,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c, lz_matchptr = cache_ptr + 1; /* Check for a length 2 match. */ - hash2 = lz_hash_2_bytes(in_next); + hash2 = lz_hash_2_bytes(in_next, LZX_HASH2_ORDER); cur_match = c->hash2_tab[hash2]; c->hash2_tab[hash2] = in_next - in_begin; if (matchfinder_node_valid(cur_match) && @@ -1692,16 +1679,14 @@ lzx_compress_near_optimal(struct lzx_compressor *c, if (unlikely(max_len > in_end - in_next)) { max_len = in_end - in_next; nice_len = min(max_len, nice_len); - if (unlikely(max_len < - max(LZ_HASH_REQUIRED_NBYTES, 3))) - { + if (unlikely(max_len < 3)) { in_next++; cache_ptr->length = 0; cache_ptr++; continue; } } - c->hash2_tab[lz_hash_2_bytes(in_next)] = + c->hash2_tab[lz_hash_2_bytes(in_next, LZX_HASH2_ORDER)] = in_next - in_begin; bt_matchfinder_skip_position(&c->bt_mf, in_begin,