X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=include%2Fwimlib%2Fbt_matchfinder.h;h=8a69bca4acfed21bdbde39013e53711e7d4f0ff7;hp=459fedd368be9bb44704f6dd40e952da7500cc1d;hb=e9a04c1cb384cf3cf23d70107e85f79c4ac0a555;hpb=7e71e0ea5e7c2958cef2cc3522367a7da82e4728 diff --git a/include/wimlib/bt_matchfinder.h b/include/wimlib/bt_matchfinder.h index 459fedd3..8a69bca4 100644 --- a/include/wimlib/bt_matchfinder.h +++ b/include/wimlib/bt_matchfinder.h @@ -11,8 +11,8 @@ * * This is a Binary Trees (bt) based matchfinder. * - * The data structure is a hash table where each hash bucket contains a binary - * tree of sequences whose first 3 bytes share the same hash code. Each + * The main data structure is a hash table where each hash bucket contains a + * binary tree of sequences whose first 3 bytes share the same hash code. Each * sequence is identified by its starting position in the input buffer. Each * binary tree is always sorted such that each left child represents a sequence * lexicographically lesser than its parent and each right child represents a @@ -51,7 +51,7 @@ #include "wimlib/lz_extend.h" #include "wimlib/lz_hash.h" -#define BT_MATCHFINDER_HASH_ORDER 16 +#define BT_MATCHFINDER_HASH3_ORDER 16 /* TEMPLATED functions and structures have MF_SUFFIX appended to their name. */ #undef TEMPLATED @@ -72,17 +72,23 @@ struct lz_match { u32 offset; }; -static inline u32 -bt_matchfinder_hash_3_bytes(const u8 *in_next) -{ - return lz_hash_3_bytes(in_next, BT_MATCHFINDER_HASH_ORDER); -} - #endif /* _WIMLIB_BT_MATCHFINDER_H */ struct TEMPLATED(bt_matchfinder) { - pos_t hash_tab[1UL << BT_MATCHFINDER_HASH_ORDER]; - pos_t child_tab[]; + + /* The hash table for finding length 2 matches, if enabled */ +#ifdef BT_MATCHFINDER_HASH2_ORDER + mf_pos_t hash2_tab[1UL << BT_MATCHFINDER_HASH2_ORDER]; +#endif + + /* The hash table which contains the roots of the binary trees for + * finding length 3 matches */ + mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER]; + + /* The child node references for the binary trees. The left and right + * children of the node for the sequence with position 'pos' are + * 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively. */ + mf_pos_t child_tab[]; }; /* Return the number of bytes that must be allocated for a 'bt_matchfinder' that @@ -91,7 +97,7 @@ static inline size_t TEMPLATED(bt_matchfinder_size)(size_t max_bufsize) { return sizeof(struct TEMPLATED(bt_matchfinder)) + - (2 * max_bufsize * sizeof(pos_t)); + (2 * max_bufsize * sizeof(mf_pos_t)); } /* Prepare the matchfinder for a new input buffer. */ @@ -101,98 +107,75 @@ TEMPLATED(bt_matchfinder_init)(struct TEMPLATED(bt_matchfinder) *mf) memset(mf, 0, sizeof(*mf)); } -static inline pos_t * -TEMPLATED(bt_child)(struct TEMPLATED(bt_matchfinder) *mf, pos_t node, int offset) +static inline mf_pos_t * +TEMPLATED(bt_left_child)(struct TEMPLATED(bt_matchfinder) *mf, u32 node) { - return &mf->child_tab[(node << 1) + offset]; + return &mf->child_tab[(node << 1) + 0]; } -static inline pos_t * -TEMPLATED(bt_left_child)(struct TEMPLATED(bt_matchfinder) *mf, pos_t node) +static inline mf_pos_t * +TEMPLATED(bt_right_child)(struct TEMPLATED(bt_matchfinder) *mf, u32 node) { - return TEMPLATED(bt_child)(mf, node, 0); + return &mf->child_tab[(node << 1) + 1]; } -static inline pos_t * -TEMPLATED(bt_right_child)(struct TEMPLATED(bt_matchfinder) *mf, pos_t node) -{ - return TEMPLATED(bt_child)(mf, node, 1); -} - -/* - * Retrieve a list of matches with the current position. - * - * @mf - * The matchfinder structure. - * @in_begin - * Pointer to the beginning of the input buffer. - * @in_next - * Pointer to the next byte in the input buffer to process. This is the - * pointer to the sequence being matched against. - * @min_len - * Only record matches that are at least this long. - * @max_len - * The maximum permissible match length at this position. - * @nice_len - * Stop searching if a match of at least this length is found. - * Must be <= @max_len. - * @max_search_depth - * Limit on the number of potential matches to consider. Must be >= 1. - * @next_hash - * Pointer to the hash code for the current sequence, which was computed - * one position in advance so that the binary tree root could be - * prefetched. This is an input/output parameter. - * @best_len_ret - * The length of the longest match found is written here. (This is - * actually redundant with the 'struct lz_match' array, but this is easier - * for the compiler to optimize when inlined and the caller immediately - * does a check against 'best_len'.) - * @lz_matchptr - * An array in which this function will record the matches. The recorded - * matches will be sorted by strictly increasing length and strictly - * increasing offset. The maximum number of matches that may be found is - * 'min(nice_len, max_len) - 3 + 1'. - * - * The return value is a pointer to the next available slot in the @lz_matchptr - * array. (If no matches were found, this will be the same as @lz_matchptr.) - */ +/* Advance the binary tree matchfinder by one byte, optionally recording + * matches. @record_matches should be a compile-time constant. */ static inline struct lz_match * -TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) * const restrict mf, - const u8 * const in_begin, - const u8 * const in_next, - const unsigned min_len, - const unsigned max_len, - const unsigned nice_len, - const unsigned max_search_depth, - u32 * restrict next_hash, - unsigned * restrict best_len_ret, - struct lz_match * restrict lz_matchptr) +TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * const restrict mf, + const u8 * const restrict in_begin, + const ptrdiff_t cur_pos, + const u32 max_len, + const u32 nice_len, + const u32 max_search_depth, + u32 * const restrict next_hash, + u32 * const restrict best_len_ret, + struct lz_match * restrict lz_matchptr, + const bool record_matches) { - unsigned depth_remaining = max_search_depth; - u32 hash; - pos_t cur_node; + const u8 *in_next = in_begin + cur_pos; + u32 depth_remaining = max_search_depth; +#ifdef BT_MATCHFINDER_HASH2_ORDER + u16 seq2; + u32 hash2; +#endif + u32 hash3; + u32 cur_node; const u8 *matchptr; - pos_t *pending_lt_ptr, *pending_gt_ptr; - unsigned best_lt_len, best_gt_len; - unsigned len; - unsigned best_len = min_len - 1; + mf_pos_t *pending_lt_ptr, *pending_gt_ptr; + u32 best_lt_len, best_gt_len; + u32 len; + u32 best_len = 2; if (unlikely(max_len < LZ_HASH3_REQUIRED_NBYTES + 1)) { *best_len_ret = best_len; return lz_matchptr; } - hash = *next_hash; - *next_hash = bt_matchfinder_hash_3_bytes(in_next + 1); - cur_node = mf->hash_tab[hash]; - mf->hash_tab[hash] = in_next - in_begin; - prefetchw(&mf->hash_tab[*next_hash]); + hash3 = *next_hash; + *next_hash = lz_hash(load_u24_unaligned(in_next + 1), BT_MATCHFINDER_HASH3_ORDER); + prefetchw(&mf->hash3_tab[*next_hash]); + +#ifdef BT_MATCHFINDER_HASH2_ORDER + seq2 = load_u16_unaligned(in_next); + hash2 = lz_hash(seq2, BT_MATCHFINDER_HASH2_ORDER); + cur_node = mf->hash2_tab[hash2]; + mf->hash2_tab[hash2] = cur_pos; + if (record_matches && + seq2 == load_u16_unaligned(&in_begin[cur_node]) && + likely(in_next != in_begin)) + { + lz_matchptr->length = 2; + lz_matchptr->offset = in_next - &in_begin[cur_node]; + lz_matchptr++; + } +#endif - pending_lt_ptr = TEMPLATED(bt_left_child)(mf, in_next - in_begin); - pending_gt_ptr = TEMPLATED(bt_right_child)(mf, in_next - in_begin); - best_lt_len = 0; - best_gt_len = 0; - len = 0; + cur_node = mf->hash3_tab[hash3]; + mf->hash3_tab[hash3] = cur_pos; + + pending_lt_ptr = TEMPLATED(bt_left_child)(mf, cur_pos); + pending_gt_ptr = TEMPLATED(bt_right_child)(mf, cur_pos); if (!cur_node) { *pending_lt_ptr = 0; @@ -201,16 +184,23 @@ TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) * const r return lz_matchptr; } + best_lt_len = 0; + best_gt_len = 0; + len = 0; + for (;;) { matchptr = &in_begin[cur_node]; if (matchptr[len] == in_next[len]) { - len = lz_extend(in_next, matchptr, len + 1, max_len); - if (len > best_len) { - best_len = len; - lz_matchptr->length = len; - lz_matchptr->offset = in_next - matchptr; - lz_matchptr++; + len = lz_extend(in_next, matchptr, len + 1, + (record_matches ? max_len : nice_len)); + if (!record_matches || len > best_len) { + if (record_matches) { + best_len = len; + lz_matchptr->length = len; + lz_matchptr->offset = in_next - matchptr; + lz_matchptr++; + } if (len >= nice_len) { *pending_lt_ptr = *TEMPLATED(bt_left_child)(mf, cur_node); *pending_gt_ptr = *TEMPLATED(bt_right_child)(mf, cur_node); @@ -245,6 +235,66 @@ TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) * const r } } +/* + * Retrieve a list of matches with the current position. + * + * @mf + * The matchfinder structure. + * @in_begin + * Pointer to the beginning of the input buffer. + * @cur_pos + * The current position in the input buffer (the position of the sequence + * being matched against). + * @max_len + * The maximum permissible match length at this position. + * @nice_len + * Stop searching if a match of at least this length is found. + * Must be <= @max_len. + * @max_search_depth + * Limit on the number of potential matches to consider. Must be >= 1. + * @next_hash + * Pointer to the hash code for the current sequence, which was computed + * one position in advance so that the binary tree root could be + * prefetched. This is an input/output parameter. + * @best_len_ret + * If a match of length >= 3 was found, then the length of the longest such + * match is written here; otherwise 2 is written here. (Note: this is + * redundant with the 'struct lz_match' array, but this is easier for the + * compiler to optimize when inlined and the caller immediately does a + * check against 'best_len'.) + * @lz_matchptr + * An array in which this function will record the matches. The recorded + * matches will be sorted by strictly increasing length and increasing + * offset. The maximum number of matches that may be found is + * 'min(nice_len, max_len) - 2 + 1', or one less if length 2 matches are + * disabled. + * + * The return value is a pointer to the next available slot in the @lz_matchptr + * array. (If no matches were found, this will be the same as @lz_matchptr.) + */ +static inline struct lz_match * +TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) *mf, + const u8 *in_begin, + ptrdiff_t cur_pos, + u32 max_len, + u32 nice_len, + u32 max_search_depth, + u32 *next_hash, + u32 *best_len_ret, + struct lz_match *lz_matchptr) +{ + return TEMPLATED(bt_matchfinder_advance_one_byte)(mf, + in_begin, + cur_pos, + max_len, + nice_len, + max_search_depth, + next_hash, + best_len_ret, + lz_matchptr, + true); +} + /* * Advance the matchfinder, but don't record any matches. * @@ -252,10 +302,10 @@ TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) * const r * The matchfinder structure. * @in_begin * Pointer to the beginning of the input buffer. - * @in_next - * Pointer to the next byte in the input buffer to process. - * @in_end - * Pointer to the end of the input buffer. + * @cur_pos + * The current position in the input buffer. + * @max_len + * The maximum permissible match length at this position. * @nice_len * Stop searching if a match of at least this length is found. * @max_search_depth @@ -270,76 +320,23 @@ TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) * const r * actually record any matches. */ static inline void -TEMPLATED(bt_matchfinder_skip_position)(struct TEMPLATED(bt_matchfinder) * const restrict mf, - const u8 * const in_begin, - const u8 * const in_next, - const u8 * const in_end, - const unsigned nice_len, - const unsigned max_search_depth, - u32 * restrict next_hash) +TEMPLATED(bt_matchfinder_skip_position)(struct TEMPLATED(bt_matchfinder) *mf, + const u8 *in_begin, + ptrdiff_t cur_pos, + u32 max_len, + u32 nice_len, + u32 max_search_depth, + u32 *next_hash) { - unsigned depth_remaining = max_search_depth; - u32 hash; - pos_t cur_node; - const u8 *matchptr; - pos_t *pending_lt_ptr, *pending_gt_ptr; - unsigned best_lt_len, best_gt_len; - unsigned len; - - if (unlikely(in_end - in_next < LZ_HASH3_REQUIRED_NBYTES + 1)) - return; - - hash = *next_hash; - *next_hash = bt_matchfinder_hash_3_bytes(in_next + 1); - cur_node = mf->hash_tab[hash]; - mf->hash_tab[hash] = in_next - in_begin; - prefetchw(&mf->hash_tab[*next_hash]); - - depth_remaining = max_search_depth; - pending_lt_ptr = TEMPLATED(bt_left_child)(mf, in_next - in_begin); - pending_gt_ptr = TEMPLATED(bt_right_child)(mf, in_next - in_begin); - best_lt_len = 0; - best_gt_len = 0; - len = 0; - - if (!cur_node) { - *pending_lt_ptr = 0; - *pending_gt_ptr = 0; - return; - } - - for (;;) { - matchptr = &in_begin[cur_node]; - - if (matchptr[len] == in_next[len]) { - len = lz_extend(in_next, matchptr, len + 1, nice_len); - if (len == nice_len) { - *pending_lt_ptr = *TEMPLATED(bt_left_child)(mf, cur_node); - *pending_gt_ptr = *TEMPLATED(bt_right_child)(mf, cur_node); - return; - } - } - - if (matchptr[len] < in_next[len]) { - *pending_lt_ptr = cur_node; - pending_lt_ptr = TEMPLATED(bt_right_child)(mf, cur_node); - cur_node = *pending_lt_ptr; - best_lt_len = len; - if (best_gt_len < len) - len = best_gt_len; - } else { - *pending_gt_ptr = cur_node; - pending_gt_ptr = TEMPLATED(bt_left_child)(mf, cur_node); - cur_node = *pending_gt_ptr; - best_gt_len = len; - if (best_lt_len < len) - len = best_lt_len; - } - - if (!cur_node || !--depth_remaining) { - *pending_lt_ptr = 0; - *pending_gt_ptr = 0; - return; - } - } + u32 best_len; + TEMPLATED(bt_matchfinder_advance_one_byte)(mf, + in_begin, + cur_pos, + max_len, + nice_len, + max_search_depth, + next_hash, + &best_len, + NULL, + false); }