X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=include%2Fwimlib%2Fbt_matchfinder.h;h=8a69bca4acfed21bdbde39013e53711e7d4f0ff7;hp=1d30e36216a05876e8074956b3b7dfe4de2e012c;hb=e9a04c1cb384cf3cf23d70107e85f79c4ac0a555;hpb=3e8aa757aaa63297f0d54007adf46411778fb6a8 diff --git a/include/wimlib/bt_matchfinder.h b/include/wimlib/bt_matchfinder.h index 1d30e362..8a69bca4 100644 --- a/include/wimlib/bt_matchfinder.h +++ b/include/wimlib/bt_matchfinder.h @@ -11,8 +11,8 @@ * * This is a Binary Trees (bt) based matchfinder. * - * The data structure is a hash table where each hash bucket contains a binary - * tree of sequences whose first 3 bytes share the same hash code. Each + * The main data structure is a hash table where each hash bucket contains a + * binary tree of sequences whose first 3 bytes share the same hash code. Each * sequence is identified by its starting position in the input buffer. Each * binary tree is always sorted such that each left child represents a sequence * lexicographically lesser than its parent and each right child represents a @@ -45,166 +45,165 @@ * ---------------------------------------------------------------------------- */ -#ifndef _BT_MATCHFINDER_H -#define _BT_MATCHFINDER_H + +#include #include "wimlib/lz_extend.h" #include "wimlib/lz_hash.h" -#include "wimlib/matchfinder_common.h" - -#if MATCHFINDER_MAX_WINDOW_ORDER < 13 -# define BT_MATCHFINDER_HASH_ORDER 14 -#elif MATCHFINDER_MAX_WINDOW_ORDER < 15 -# define BT_MATCHFINDER_HASH_ORDER 15 -#else -# define BT_MATCHFINDER_HASH_ORDER 16 + +#define BT_MATCHFINDER_HASH3_ORDER 16 + +/* TEMPLATED functions and structures have MF_SUFFIX appended to their name. */ +#undef TEMPLATED +#define TEMPLATED(name) CONCAT(name, MF_SUFFIX) + +#ifndef _WIMLIB_BT_MATCHFINDER_H +#define _WIMLIB_BT_MATCHFINDER_H + +/* Non-templated definitions */ + +/* Representation of a match found by the bt_matchfinder */ +struct lz_match { + + /* The number of bytes matched. */ + u32 length; + + /* The offset back from the current position that was matched. */ + u32 offset; +}; + +#endif /* _WIMLIB_BT_MATCHFINDER_H */ + +struct TEMPLATED(bt_matchfinder) { + + /* The hash table for finding length 2 matches, if enabled */ +#ifdef BT_MATCHFINDER_HASH2_ORDER + mf_pos_t hash2_tab[1UL << BT_MATCHFINDER_HASH2_ORDER]; #endif -#define BT_MATCHFINDER_HASH_LENGTH (1UL << BT_MATCHFINDER_HASH_ORDER) + /* The hash table which contains the roots of the binary trees for + * finding length 3 matches */ + mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER]; -struct bt_matchfinder { - pos_t hash_tab[BT_MATCHFINDER_HASH_LENGTH]; - pos_t child_tab[]; -} _aligned_attribute(MATCHFINDER_ALIGNMENT); + /* The child node references for the binary trees. The left and right + * children of the node for the sequence with position 'pos' are + * 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively. */ + mf_pos_t child_tab[]; +}; /* Return the number of bytes that must be allocated for a 'bt_matchfinder' that * can work with buffers up to the specified size. */ static inline size_t -bt_matchfinder_size(size_t max_bufsize) +TEMPLATED(bt_matchfinder_size)(size_t max_bufsize) { - return sizeof(pos_t) * (BT_MATCHFINDER_HASH_LENGTH + (2 * max_bufsize)); + return sizeof(struct TEMPLATED(bt_matchfinder)) + + (2 * max_bufsize * sizeof(mf_pos_t)); } /* Prepare the matchfinder for a new input buffer. */ static inline void -bt_matchfinder_init(struct bt_matchfinder *mf) +TEMPLATED(bt_matchfinder_init)(struct TEMPLATED(bt_matchfinder) *mf) { - matchfinder_init(mf->hash_tab, BT_MATCHFINDER_HASH_LENGTH); + memset(mf, 0, sizeof(*mf)); } -static inline u32 -bt_matchfinder_hash_3_bytes(const u8 *in_next) +static inline mf_pos_t * +TEMPLATED(bt_left_child)(struct TEMPLATED(bt_matchfinder) *mf, u32 node) { - return lz_hash_3_bytes(in_next, BT_MATCHFINDER_HASH_ORDER); -} - -static inline pos_t * -bt_child(struct bt_matchfinder *mf, pos_t node, int offset) -{ - if (MATCHFINDER_MAX_WINDOW_ORDER < sizeof(pos_t) * 8) { - /* no cast needed */ - return &mf->child_tab[(node << 1) + offset]; - } else { - return &mf->child_tab[((size_t)node << 1) + offset]; - } + return &mf->child_tab[(node << 1) + 0]; } -static inline pos_t * -bt_left_child(struct bt_matchfinder *mf, pos_t node) +static inline mf_pos_t * +TEMPLATED(bt_right_child)(struct TEMPLATED(bt_matchfinder) *mf, u32 node) { - return bt_child(mf, node, 0); + return &mf->child_tab[(node << 1) + 1]; } -static inline pos_t * -bt_right_child(struct bt_matchfinder *mf, pos_t node) -{ - return bt_child(mf, node, 1); -} - -/* - * Retrieve a list of matches with the current position. - * - * @mf - * The matchfinder structure. - * @in_begin - * Pointer to the beginning of the input buffer. - * @in_next - * Pointer to the next byte in the input buffer to process. This is the - * pointer to the sequence being matched against. - * @min_len - * Only record matches that are at least this long. - * @max_len - * The maximum permissible match length at this position. - * @nice_len - * Stop searching if a match of at least this length is found. - * @max_search_depth - * Limit on the number of potential matches to consider. - * @next_hash - * Pointer to the hash code for the current sequence, which was computed - * one position in advance so that the binary tree root could be - * prefetched. This is an input/output parameter. - * @best_len_ret - * The length of the longest match found is written here. (This is - * actually redundant with the 'struct lz_match' array, but this is easier - * for the compiler to optimize when inlined and the caller immediately - * does a check against 'best_len'.) - * @lz_matchptr - * An array in which this function will record the matches. The recorded - * matches will be sorted by strictly increasing length and strictly - * increasing offset. The maximum number of matches that may be found is - * 'min(nice_len, max_len) - 3 + 1'. - * - * The return value is a pointer to the next available slot in the @lz_matchptr - * array. (If no matches were found, this will be the same as @lz_matchptr.) - */ +/* Advance the binary tree matchfinder by one byte, optionally recording + * matches. @record_matches should be a compile-time constant. */ static inline struct lz_match * -bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf, - const u8 * const in_begin, - const u8 * const in_next, - const unsigned min_len, - const unsigned max_len, - const unsigned nice_len, - const unsigned max_search_depth, - u32 * restrict next_hash, - unsigned * restrict best_len_ret, - struct lz_match * restrict lz_matchptr) +TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * const restrict mf, + const u8 * const restrict in_begin, + const ptrdiff_t cur_pos, + const u32 max_len, + const u32 nice_len, + const u32 max_search_depth, + u32 * const restrict next_hash, + u32 * const restrict best_len_ret, + struct lz_match * restrict lz_matchptr, + const bool record_matches) { - unsigned depth_remaining = max_search_depth; - u32 hash; - pos_t cur_node; + const u8 *in_next = in_begin + cur_pos; + u32 depth_remaining = max_search_depth; +#ifdef BT_MATCHFINDER_HASH2_ORDER + u16 seq2; + u32 hash2; +#endif + u32 hash3; + u32 cur_node; const u8 *matchptr; - pos_t *pending_lt_ptr, *pending_gt_ptr; - unsigned best_lt_len, best_gt_len; - unsigned len; - unsigned best_len = min_len - 1; + mf_pos_t *pending_lt_ptr, *pending_gt_ptr; + u32 best_lt_len, best_gt_len; + u32 len; + u32 best_len = 2; - if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES + 1)) { + if (unlikely(max_len < LZ_HASH3_REQUIRED_NBYTES + 1)) { *best_len_ret = best_len; return lz_matchptr; } - hash = *next_hash; - *next_hash = bt_matchfinder_hash_3_bytes(in_next + 1); - cur_node = mf->hash_tab[hash]; - mf->hash_tab[hash] = in_next - in_begin; - prefetch(&mf->hash_tab[*next_hash]); + hash3 = *next_hash; + *next_hash = lz_hash(load_u24_unaligned(in_next + 1), BT_MATCHFINDER_HASH3_ORDER); + prefetchw(&mf->hash3_tab[*next_hash]); - pending_lt_ptr = bt_left_child(mf, in_next - in_begin); - pending_gt_ptr = bt_right_child(mf, in_next - in_begin); - best_lt_len = 0; - best_gt_len = 0; - len = 0; +#ifdef BT_MATCHFINDER_HASH2_ORDER + seq2 = load_u16_unaligned(in_next); + hash2 = lz_hash(seq2, BT_MATCHFINDER_HASH2_ORDER); + cur_node = mf->hash2_tab[hash2]; + mf->hash2_tab[hash2] = cur_pos; + if (record_matches && + seq2 == load_u16_unaligned(&in_begin[cur_node]) && + likely(in_next != in_begin)) + { + lz_matchptr->length = 2; + lz_matchptr->offset = in_next - &in_begin[cur_node]; + lz_matchptr++; + } +#endif - if (!matchfinder_node_valid(cur_node)) { - *pending_lt_ptr = MATCHFINDER_NULL; - *pending_gt_ptr = MATCHFINDER_NULL; + cur_node = mf->hash3_tab[hash3]; + mf->hash3_tab[hash3] = cur_pos; + + pending_lt_ptr = TEMPLATED(bt_left_child)(mf, cur_pos); + pending_gt_ptr = TEMPLATED(bt_right_child)(mf, cur_pos); + + if (!cur_node) { + *pending_lt_ptr = 0; + *pending_gt_ptr = 0; *best_len_ret = best_len; return lz_matchptr; } + best_lt_len = 0; + best_gt_len = 0; + len = 0; + for (;;) { matchptr = &in_begin[cur_node]; if (matchptr[len] == in_next[len]) { - len = lz_extend(in_next, matchptr, len + 1, max_len); - if (len > best_len) { - best_len = len; - lz_matchptr->length = len; - lz_matchptr->offset = in_next - matchptr; - lz_matchptr++; + len = lz_extend(in_next, matchptr, len + 1, + (record_matches ? max_len : nice_len)); + if (!record_matches || len > best_len) { + if (record_matches) { + best_len = len; + lz_matchptr->length = len; + lz_matchptr->offset = in_next - matchptr; + lz_matchptr++; + } if (len >= nice_len) { - *pending_lt_ptr = *bt_left_child(mf, cur_node); - *pending_gt_ptr = *bt_right_child(mf, cur_node); + *pending_lt_ptr = *TEMPLATED(bt_left_child)(mf, cur_node); + *pending_gt_ptr = *TEMPLATED(bt_right_child)(mf, cur_node); *best_len_ret = best_len; return lz_matchptr; } @@ -213,29 +212,89 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf, if (matchptr[len] < in_next[len]) { *pending_lt_ptr = cur_node; - pending_lt_ptr = bt_right_child(mf, cur_node); + pending_lt_ptr = TEMPLATED(bt_right_child)(mf, cur_node); cur_node = *pending_lt_ptr; best_lt_len = len; if (best_gt_len < len) len = best_gt_len; } else { *pending_gt_ptr = cur_node; - pending_gt_ptr = bt_left_child(mf, cur_node); + pending_gt_ptr = TEMPLATED(bt_left_child)(mf, cur_node); cur_node = *pending_gt_ptr; best_gt_len = len; if (best_lt_len < len) len = best_lt_len; } - if (!matchfinder_node_valid(cur_node) || !--depth_remaining) { - *pending_lt_ptr = MATCHFINDER_NULL; - *pending_gt_ptr = MATCHFINDER_NULL; + if (!cur_node || !--depth_remaining) { + *pending_lt_ptr = 0; + *pending_gt_ptr = 0; *best_len_ret = best_len; return lz_matchptr; } } } +/* + * Retrieve a list of matches with the current position. + * + * @mf + * The matchfinder structure. + * @in_begin + * Pointer to the beginning of the input buffer. + * @cur_pos + * The current position in the input buffer (the position of the sequence + * being matched against). + * @max_len + * The maximum permissible match length at this position. + * @nice_len + * Stop searching if a match of at least this length is found. + * Must be <= @max_len. + * @max_search_depth + * Limit on the number of potential matches to consider. Must be >= 1. + * @next_hash + * Pointer to the hash code for the current sequence, which was computed + * one position in advance so that the binary tree root could be + * prefetched. This is an input/output parameter. + * @best_len_ret + * If a match of length >= 3 was found, then the length of the longest such + * match is written here; otherwise 2 is written here. (Note: this is + * redundant with the 'struct lz_match' array, but this is easier for the + * compiler to optimize when inlined and the caller immediately does a + * check against 'best_len'.) + * @lz_matchptr + * An array in which this function will record the matches. The recorded + * matches will be sorted by strictly increasing length and increasing + * offset. The maximum number of matches that may be found is + * 'min(nice_len, max_len) - 2 + 1', or one less if length 2 matches are + * disabled. + * + * The return value is a pointer to the next available slot in the @lz_matchptr + * array. (If no matches were found, this will be the same as @lz_matchptr.) + */ +static inline struct lz_match * +TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) *mf, + const u8 *in_begin, + ptrdiff_t cur_pos, + u32 max_len, + u32 nice_len, + u32 max_search_depth, + u32 *next_hash, + u32 *best_len_ret, + struct lz_match *lz_matchptr) +{ + return TEMPLATED(bt_matchfinder_advance_one_byte)(mf, + in_begin, + cur_pos, + max_len, + nice_len, + max_search_depth, + next_hash, + best_len_ret, + lz_matchptr, + true); +} + /* * Advance the matchfinder, but don't record any matches. * @@ -243,10 +302,10 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf, * The matchfinder structure. * @in_begin * Pointer to the beginning of the input buffer. - * @in_next - * Pointer to the next byte in the input buffer to process. - * @in_end - * Pointer to the end of the input buffer. + * @cur_pos + * The current position in the input buffer. + * @max_len + * The maximum permissible match length at this position. * @nice_len * Stop searching if a match of at least this length is found. * @max_search_depth @@ -261,78 +320,23 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf, * actually record any matches. */ static inline void -bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf, - const u8 * const in_begin, - const u8 * const in_next, - const u8 * const in_end, - const unsigned nice_len, - const unsigned max_search_depth, - u32 * restrict next_hash) +TEMPLATED(bt_matchfinder_skip_position)(struct TEMPLATED(bt_matchfinder) *mf, + const u8 *in_begin, + ptrdiff_t cur_pos, + u32 max_len, + u32 nice_len, + u32 max_search_depth, + u32 *next_hash) { - unsigned depth_remaining = max_search_depth; - u32 hash; - pos_t cur_node; - const u8 *matchptr; - pos_t *pending_lt_ptr, *pending_gt_ptr; - unsigned best_lt_len, best_gt_len; - unsigned len; - - if (unlikely(in_end - in_next < LZ_HASH_REQUIRED_NBYTES + 1)) - return; - - hash = *next_hash; - *next_hash = bt_matchfinder_hash_3_bytes(in_next + 1); - cur_node = mf->hash_tab[hash]; - mf->hash_tab[hash] = in_next - in_begin; - prefetch(&mf->hash_tab[*next_hash]); - - depth_remaining = max_search_depth; - pending_lt_ptr = bt_left_child(mf, in_next - in_begin); - pending_gt_ptr = bt_right_child(mf, in_next - in_begin); - best_lt_len = 0; - best_gt_len = 0; - len = 0; - - if (!matchfinder_node_valid(cur_node)) { - *pending_lt_ptr = MATCHFINDER_NULL; - *pending_gt_ptr = MATCHFINDER_NULL; - return; - } - - for (;;) { - matchptr = &in_begin[cur_node]; - - if (matchptr[len] == in_next[len]) { - len = lz_extend(in_next, matchptr, len + 1, nice_len); - if (len == nice_len) { - *pending_lt_ptr = *bt_left_child(mf, cur_node); - *pending_gt_ptr = *bt_right_child(mf, cur_node); - return; - } - } - - if (matchptr[len] < in_next[len]) { - *pending_lt_ptr = cur_node; - pending_lt_ptr = bt_right_child(mf, cur_node); - cur_node = *pending_lt_ptr; - best_lt_len = len; - if (best_gt_len < len) - len = best_gt_len; - } else { - *pending_gt_ptr = cur_node; - pending_gt_ptr = bt_left_child(mf, cur_node); - cur_node = *pending_gt_ptr; - best_gt_len = len; - if (best_lt_len < len) - len = best_lt_len; - } - - if (!matchfinder_node_valid(cur_node) || !--depth_remaining) { - *pending_lt_ptr = MATCHFINDER_NULL; - *pending_gt_ptr = MATCHFINDER_NULL; - return; - } - } + u32 best_len; + TEMPLATED(bt_matchfinder_advance_one_byte)(mf, + in_begin, + cur_pos, + max_len, + nice_len, + max_search_depth, + next_hash, + &best_len, + NULL, + false); } - -#endif /* _BT_MATCHFINDER_H */