/*
- * bt_matchfinder.h
+ * bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees
*
- * Author: Eric Biggers
- * Year: 2014, 2015
+ * Copyright 2022 Eric Biggers
*
- * The author dedicates this file to the public domain.
- * You can do whatever you want with this file.
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*
* ----------------------------------------------------------------------------
*
* This is a Binary Trees (bt) based matchfinder.
*
* The main data structure is a hash table where each hash bucket contains a
- * binary tree of sequences whose first 3 bytes share the same hash code. Each
+ * binary tree of sequences whose first 4 bytes share the same hash code. Each
* sequence is identified by its starting position in the input buffer. Each
* binary tree is always sorted such that each left child represents a sequence
* lexicographically lesser than its parent and each right child represents a
* sequence lexicographically greater than its parent.
*
* The algorithm processes the input buffer sequentially. At each byte
- * position, the hash code of the first 3 bytes of the sequence beginning at
+ * position, the hash code of the first 4 bytes of the sequence beginning at
* that position (the sequence being matched against) is computed. This
* identifies the hash bucket to use for that position. Then, a new binary tree
* node is created to represent the current sequence. Then, in a single tree
#include <string.h>
-#include "wimlib/lz_extend.h"
-#include "wimlib/lz_hash.h"
+#include "wimlib/matchfinder_common.h"
-#define BT_MATCHFINDER_HASH3_ORDER 16
+#define BT_MATCHFINDER_HASH3_ORDER 15
+#define BT_MATCHFINDER_HASH3_WAYS 2
+#define BT_MATCHFINDER_HASH4_ORDER 16
/* TEMPLATED functions and structures have MF_SUFFIX appended to their name. */
#undef TEMPLATED
mf_pos_t hash2_tab[1UL << BT_MATCHFINDER_HASH2_ORDER];
#endif
+ /* The hash table for finding length 3 matches */
+ mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
+
/* The hash table which contains the roots of the binary trees for
- * finding length 3 matches */
- mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER];
+ * finding length 4+ matches */
+ mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER];
/* The child node references for the binary trees. The left and right
* children of the node for the sequence with position 'pos' are
/* Return the number of bytes that must be allocated for a 'bt_matchfinder' that
* can work with buffers up to the specified size. */
-static inline size_t
+static forceinline size_t
TEMPLATED(bt_matchfinder_size)(size_t max_bufsize)
{
return sizeof(struct TEMPLATED(bt_matchfinder)) +
}
/* Prepare the matchfinder for a new input buffer. */
-static inline void
+static forceinline void
TEMPLATED(bt_matchfinder_init)(struct TEMPLATED(bt_matchfinder) *mf)
{
memset(mf, 0, sizeof(*mf));
}
-static inline mf_pos_t *
+static forceinline mf_pos_t *
TEMPLATED(bt_left_child)(struct TEMPLATED(bt_matchfinder) *mf, u32 node)
{
return &mf->child_tab[(node << 1) + 0];
}
-static inline mf_pos_t *
+static forceinline mf_pos_t *
TEMPLATED(bt_right_child)(struct TEMPLATED(bt_matchfinder) *mf, u32 node)
{
return &mf->child_tab[(node << 1) + 1];
}
+/* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches()
+ * and bt_matchfinder_skip_byte(). There must be sufficiently many bytes
+ * remaining to load a 32-bit integer from the *next* position. */
+#define BT_MATCHFINDER_REQUIRED_NBYTES 5
+
/* Advance the binary tree matchfinder by one byte, optionally recording
* matches. @record_matches should be a compile-time constant. */
-static inline struct lz_match *
-TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * const restrict mf,
- const u8 * const restrict in_begin,
+static forceinline struct lz_match *
+TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * const mf,
+ const u8 * const in_begin,
const ptrdiff_t cur_pos,
const u32 max_len,
const u32 nice_len,
const u32 max_search_depth,
- u32 * const restrict next_hash,
- u32 * const restrict best_len_ret,
- struct lz_match * restrict lz_matchptr,
+ u32 * const next_hashes,
+ u32 * const best_len_ret,
+ struct lz_match *lz_matchptr,
const bool record_matches)
{
const u8 *in_next = in_begin + cur_pos;
u32 depth_remaining = max_search_depth;
+ u32 next_hashseq;
+ u32 hash3;
+ u32 hash4;
#ifdef BT_MATCHFINDER_HASH2_ORDER
u16 seq2;
u32 hash2;
#endif
- u32 hash3;
+ STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
+ BT_MATCHFINDER_HASH3_WAYS <= 2);
u32 cur_node;
+#if BT_MATCHFINDER_HASH3_WAYS >= 2
+ u32 cur_node_2;
+#endif
const u8 *matchptr;
mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
u32 best_lt_len, best_gt_len;
u32 len;
- u32 best_len = 2;
+ u32 best_len = 3;
- if (unlikely(max_len < LOAD_U24_REQUIRED_NBYTES + 1)) {
- *best_len_ret = best_len;
- return lz_matchptr;
- }
+ next_hashseq = get_unaligned_le32(in_next + 1);
+
+ hash3 = next_hashes[0];
+ hash4 = next_hashes[1];
- hash3 = *next_hash;
- *next_hash = lz_hash(load_u24_unaligned(in_next + 1), BT_MATCHFINDER_HASH3_ORDER);
- prefetchw(&mf->hash3_tab[*next_hash]);
+ next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER);
+ next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER);
+ prefetchw(&mf->hash3_tab[next_hashes[0]]);
+ prefetchw(&mf->hash4_tab[next_hashes[1]]);
#ifdef BT_MATCHFINDER_HASH2_ORDER
seq2 = load_u16_unaligned(in_next);
}
#endif
- cur_node = mf->hash3_tab[hash3];
- mf->hash3_tab[hash3] = cur_pos;
+ cur_node = mf->hash3_tab[hash3][0];
+ mf->hash3_tab[hash3][0] = cur_pos;
+#if BT_MATCHFINDER_HASH3_WAYS >= 2
+ cur_node_2 = mf->hash3_tab[hash3][1];
+ mf->hash3_tab[hash3][1] = cur_node;
+#endif
+ if (record_matches && likely(in_next != in_begin)) {
+ u32 seq3 = load_u24_unaligned(in_next);
+ if (seq3 == load_u24_unaligned(&in_begin[cur_node])) {
+ lz_matchptr->length = 3;
+ lz_matchptr->offset = in_next - &in_begin[cur_node];
+ lz_matchptr++;
+ }
+ #if BT_MATCHFINDER_HASH3_WAYS >= 2
+ else if (seq3 == load_u24_unaligned(&in_begin[cur_node_2])) {
+ lz_matchptr->length = 3;
+ lz_matchptr->offset = in_next - &in_begin[cur_node_2];
+ lz_matchptr++;
+ }
+ #endif
+ }
+
+ cur_node = mf->hash4_tab[hash4];
+ mf->hash4_tab[hash4] = cur_pos;
pending_lt_ptr = TEMPLATED(bt_left_child)(mf, cur_pos);
pending_gt_ptr = TEMPLATED(bt_right_child)(mf, cur_pos);
matchptr = &in_begin[cur_node];
if (matchptr[len] == in_next[len]) {
- len = lz_extend(in_next, matchptr, len + 1,
- (record_matches ? max_len : nice_len));
+ len = lz_extend(in_next, matchptr, len + 1, max_len);
if (!record_matches || len > best_len) {
if (record_matches) {
best_len = len;
* @in_begin
* Pointer to the beginning of the input buffer.
* @cur_pos
- * The current position in the input buffer (the position of the sequence
- * being matched against).
+ * The current position in the input buffer relative to @in_begin (the
+ * position of the sequence being matched against).
* @max_len
- * The maximum permissible match length at this position.
+ * The maximum permissible match length at this position. Must be >=
+ * BT_MATCHFINDER_REQUIRED_NBYTES.
* @nice_len
* Stop searching if a match of at least this length is found.
* Must be <= @max_len.
* @max_search_depth
* Limit on the number of potential matches to consider. Must be >= 1.
- * @next_hash
- * Pointer to the hash code for the current sequence, which was computed
- * one position in advance so that the binary tree root could be
- * prefetched. This is an input/output parameter.
+ * @next_hashes
+ * The precomputed hash codes for the sequence beginning at @in_next.
+ * These will be used and then updated with the precomputed hashcodes for
+ * the sequence beginning at @in_next + 1.
* @best_len_ret
- * If a match of length >= 3 was found, then the length of the longest such
- * match is written here; otherwise 2 is written here. (Note: this is
+ * If a match of length >= 4 was found, then the length of the longest such
+ * match is written here; otherwise 3 is written here. (Note: this is
* redundant with the 'struct lz_match' array, but this is easier for the
* compiler to optimize when inlined and the caller immediately does a
* check against 'best_len'.)
* @lz_matchptr
* An array in which this function will record the matches. The recorded
- * matches will be sorted by strictly increasing length and increasing
- * offset. The maximum number of matches that may be found is
- * 'min(nice_len, max_len) - 2 + 1', or one less if length 2 matches are
- * disabled.
+ * matches will be sorted by strictly increasing length and (non-strictly)
+ * increasing offset. The maximum number of matches that may be found is
+ * 'nice_len - 1', or one less if length 2 matches are disabled.
*
* The return value is a pointer to the next available slot in the @lz_matchptr
* array. (If no matches were found, this will be the same as @lz_matchptr.)
*/
-static inline struct lz_match *
+static forceinline struct lz_match *
TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) *mf,
const u8 *in_begin,
ptrdiff_t cur_pos,
u32 max_len,
u32 nice_len,
u32 max_search_depth,
- u32 *next_hash,
+ u32 next_hashes[2],
u32 *best_len_ret,
struct lz_match *lz_matchptr)
{
max_len,
nice_len,
max_search_depth,
- next_hash,
+ next_hashes,
best_len_ret,
lz_matchptr,
true);
/*
* Advance the matchfinder, but don't record any matches.
*
- * @mf
- * The matchfinder structure.
- * @in_begin
- * Pointer to the beginning of the input buffer.
- * @cur_pos
- * The current position in the input buffer.
- * @max_len
- * The maximum permissible match length at this position.
- * @nice_len
- * Stop searching if a match of at least this length is found.
- * @max_search_depth
- * Limit on the number of potential matches to consider.
- * @next_hash
- * Pointer to the hash code for the current sequence, which was computed
- * one position in advance so that the binary tree root could be
- * prefetched. This is an input/output parameter.
- *
- * Note: this is very similar to bt_matchfinder_get_matches() because both
- * functions must do hashing and tree re-rooting. This version just doesn't
- * actually record any matches.
+ * This is very similar to bt_matchfinder_get_matches() because both functions
+ * must do hashing and tree re-rooting.
*/
-static inline void
-TEMPLATED(bt_matchfinder_skip_position)(struct TEMPLATED(bt_matchfinder) *mf,
- const u8 *in_begin,
- ptrdiff_t cur_pos,
- u32 max_len,
- u32 nice_len,
- u32 max_search_depth,
- u32 *next_hash)
+static forceinline void
+TEMPLATED(bt_matchfinder_skip_byte)(struct TEMPLATED(bt_matchfinder) *mf,
+ const u8 *in_begin,
+ ptrdiff_t cur_pos,
+ u32 nice_len,
+ u32 max_search_depth,
+ u32 next_hashes[2])
{
u32 best_len;
TEMPLATED(bt_matchfinder_advance_one_byte)(mf,
in_begin,
cur_pos,
- max_len,
+ nice_len,
nice_len,
max_search_depth,
- next_hash,
+ next_hashes,
&best_len,
NULL,
false);