X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=include%2Fwimlib%2Flz_hash.h;h=5aa4eff5d34319677f1a42e4bca6bf6ff132fd6d;hb=acb5c41d2c9be655eca863a0315a22d1f54889e2;hp=9d097ae668e0ec1b0b1baffae76c976125f3d17e;hpb=e940fda88a92ff9e931ec88fb4c0e1ebd6fa2dfb;p=wimlib diff --git a/include/wimlib/lz_hash.h b/include/wimlib/lz_hash.h index 9d097ae6..5aa4eff5 100644 --- a/include/wimlib/lz_hash.h +++ b/include/wimlib/lz_hash.h @@ -1,30 +1,60 @@ -#ifndef _WIMLIB_LZ_HASH_H -#define _WIMLIB_LZ_HASH_H +/* + * lz_hash.h + * + * Hashing for Lempel-Ziv matchfinding. + * + * Author: Eric Biggers + * Year: 2014, 2015 + * + * The author dedicates this file to the public domain. + * You can do whatever you want with this file. + */ -#include "wimlib/compress_common.h" +#ifndef _LZ_HASH_H +#define _LZ_HASH_H -struct lz_params { - unsigned min_match; - unsigned max_match; - unsigned max_offset; - unsigned nice_match; - unsigned good_match; - unsigned max_chain_len; - unsigned max_lazy_match; - unsigned too_far; -}; +#include "wimlib/unaligned.h" -typedef void (*lz_record_match_t)(unsigned len, unsigned offset, void *ctx); -typedef void (*lz_record_literal_t)(u8 lit, void *ctx); +/* + * The hash function: given a sequence prefix held in the low-order bits of a + * 32-bit value, multiply by a carefully-chosen large constant. Discard any + * bits of the product that don't fit in a 32-bit value, but take the + * next-highest @num_bits bits of the product as the hash value, as those have + * the most randomness. + */ +static inline u32 +lz_hash(u32 seq, unsigned num_bits) +{ + return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits); +} -extern void -lz_analyze_block(const u8 window[restrict], - input_idx_t window_size, - lz_record_match_t record_match, - lz_record_literal_t record_literal, - void *record_ctx, - const struct lz_params *params, - input_idx_t prev_tab[restrict]); +/* + * Hash the 2-byte sequence beginning at @p, producing a hash of length + * @num_bits bits. At least 2 bytes of data must be available at @p. + */ +static inline u32 +lz_hash_2_bytes(const u8 *p, unsigned num_bits) +{ + u32 seq = load_u16_unaligned(p); + if (num_bits >= 16) + return seq; + return lz_hash(seq, num_bits); +} +/* + * Hash the 3-byte sequence beginning at @p, producing a hash of length + * @num_bits bits. At least LZ_HASH3_REQUIRED_NBYTES bytes of data must be + * available at @p; note that this may be more than 3. + */ +static inline u32 +lz_hash_3_bytes(const u8 *p, unsigned num_bits) +{ + u32 seq = load_u24_unaligned(p); + if (num_bits >= 24) + return seq; + return lz_hash(seq, num_bits); +} -#endif /* _WIMLIB_LZ_HASH_H */ +#define LZ_HASH3_REQUIRED_NBYTES LOAD_U24_REQUIRED_NBYTES + +#endif /* _LZ_HASH_H */