/*
- * bt_matchfinder.h
+ * bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees
*
- * Author: Eric Biggers
- * Year: 2014, 2015
+ * The following copying information applies to this specific source code file:
*
- * The author dedicates this file to the public domain.
- * You can do whatever you want with this file.
+ * Written in 2014-2016 by Eric Biggers <ebiggers3@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
+ * Dedication (the "CC0").
+ *
+ * This software is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
+ *
+ * You should have received a copy of the CC0 along with this software; if not
+ * see <http://creativecommons.org/publicdomain/zero/1.0/>.
*
* ----------------------------------------------------------------------------
*
* This is a Binary Trees (bt) based matchfinder.
*
* The main data structure is a hash table where each hash bucket contains a
- * binary tree of sequences whose first 3 bytes share the same hash code. Each
+ * binary tree of sequences whose first 4 bytes share the same hash code. Each
* sequence is identified by its starting position in the input buffer. Each
* binary tree is always sorted such that each left child represents a sequence
* lexicographically lesser than its parent and each right child represents a
* sequence lexicographically greater than its parent.
*
* The algorithm processes the input buffer sequentially. At each byte
- * position, the hash code of the first 3 bytes of the sequence beginning at
+ * position, the hash code of the first 4 bytes of the sequence beginning at
* that position (the sequence being matched against) is computed. This
* identifies the hash bucket to use for that position. Then, a new binary tree
* node is created to represent the current sequence. Then, in a single tree
#include "wimlib/lz_extend.h"
#include "wimlib/lz_hash.h"
-#define BT_MATCHFINDER_HASH3_ORDER 16
+#define BT_MATCHFINDER_HASH3_ORDER 15
+#define BT_MATCHFINDER_HASH3_WAYS 2
+#define BT_MATCHFINDER_HASH4_ORDER 16
/* TEMPLATED functions and structures have MF_SUFFIX appended to their name. */
#undef TEMPLATED
mf_pos_t hash2_tab[1UL << BT_MATCHFINDER_HASH2_ORDER];
#endif
+ /* The hash table for finding length 3 matches */
+ mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
+
/* The hash table which contains the roots of the binary trees for
- * finding length 3 matches */
- mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER];
+ * finding length 4+ matches */
+ mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER];
/* The child node references for the binary trees. The left and right
* children of the node for the sequence with position 'pos' are
return &mf->child_tab[(node << 1) + 1];
}
+/* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches()
+ * and bt_matchfinder_skip_position(). There must be sufficiently many bytes
+ * remaining to load a 32-bit integer from the *next* position. */
+#define BT_MATCHFINDER_REQUIRED_NBYTES 5
+
/* Advance the binary tree matchfinder by one byte, optionally recording
* matches. @record_matches should be a compile-time constant. */
static inline struct lz_match *
const u32 max_len,
const u32 nice_len,
const u32 max_search_depth,
- u32 * const restrict next_hash,
+ u32 next_hashes[const restrict static 2],
u32 * const restrict best_len_ret,
struct lz_match * restrict lz_matchptr,
const bool record_matches)
{
const u8 *in_next = in_begin + cur_pos;
u32 depth_remaining = max_search_depth;
+ u32 next_seq4;
+ u32 next_seq3;
+ u32 hash3;
+ u32 hash4;
#ifdef BT_MATCHFINDER_HASH2_ORDER
u16 seq2;
u32 hash2;
#endif
- u32 hash3;
+ STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
+ BT_MATCHFINDER_HASH3_WAYS <= 2);
u32 cur_node;
+#if BT_MATCHFINDER_HASH3_WAYS >= 2
+ u32 cur_node_2;
+#endif
const u8 *matchptr;
mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
u32 best_lt_len, best_gt_len;
u32 len;
- u32 best_len = 2;
+ u32 best_len = 3;
- if (unlikely(max_len < LZ_HASH3_REQUIRED_NBYTES + 1)) {
- *best_len_ret = best_len;
- return lz_matchptr;
- }
+ next_seq4 = load_u32_unaligned(in_next + 1);
+ next_seq3 = loaded_u32_to_u24(next_seq4);
- hash3 = *next_hash;
- *next_hash = lz_hash(load_u24_unaligned(in_next + 1), BT_MATCHFINDER_HASH3_ORDER);
- prefetchw(&mf->hash3_tab[*next_hash]);
+ hash3 = next_hashes[0];
+ hash4 = next_hashes[1];
+
+ next_hashes[0] = lz_hash(next_seq3, BT_MATCHFINDER_HASH3_ORDER);
+ next_hashes[1] = lz_hash(next_seq4, BT_MATCHFINDER_HASH4_ORDER);
+ prefetchw(&mf->hash3_tab[next_hashes[0]]);
+ prefetchw(&mf->hash4_tab[next_hashes[1]]);
#ifdef BT_MATCHFINDER_HASH2_ORDER
seq2 = load_u16_unaligned(in_next);
}
#endif
- cur_node = mf->hash3_tab[hash3];
- mf->hash3_tab[hash3] = cur_pos;
+ cur_node = mf->hash3_tab[hash3][0];
+ mf->hash3_tab[hash3][0] = cur_pos;
+#if BT_MATCHFINDER_HASH3_WAYS >= 2
+ cur_node_2 = mf->hash3_tab[hash3][1];
+ mf->hash3_tab[hash3][1] = cur_node;
+#endif
+ if (record_matches && likely(in_next != in_begin)) {
+ u32 seq3 = load_u24_unaligned(in_next);
+ if (seq3 == load_u24_unaligned(&in_begin[cur_node])) {
+ lz_matchptr->length = 3;
+ lz_matchptr->offset = in_next - &in_begin[cur_node];
+ lz_matchptr++;
+ }
+ #if BT_MATCHFINDER_HASH3_WAYS >= 2
+ else if (seq3 == load_u24_unaligned(&in_begin[cur_node_2])) {
+ lz_matchptr->length = 3;
+ lz_matchptr->offset = in_next - &in_begin[cur_node_2];
+ lz_matchptr++;
+ }
+ #endif
+ }
+
+ cur_node = mf->hash4_tab[hash4];
+ mf->hash4_tab[hash4] = cur_pos;
pending_lt_ptr = TEMPLATED(bt_left_child)(mf, cur_pos);
pending_gt_ptr = TEMPLATED(bt_right_child)(mf, cur_pos);
matchptr = &in_begin[cur_node];
if (matchptr[len] == in_next[len]) {
- len = lz_extend(in_next, matchptr, len + 1,
- (record_matches ? max_len : nice_len));
+ len = lz_extend(in_next, matchptr, len + 1, max_len);
if (!record_matches || len > best_len) {
if (record_matches) {
best_len = len;
* The current position in the input buffer (the position of the sequence
* being matched against).
* @max_len
- * The maximum permissible match length at this position.
+ * The maximum permissible match length at this position. Must be >=
+ * BT_MATCHFINDER_REQUIRED_NBYTES.
* @nice_len
* Stop searching if a match of at least this length is found.
* Must be <= @max_len.
* @max_search_depth
* Limit on the number of potential matches to consider. Must be >= 1.
- * @next_hash
- * Pointer to the hash code for the current sequence, which was computed
- * one position in advance so that the binary tree root could be
- * prefetched. This is an input/output parameter.
+ * @next_hashes
+ * The precomputed hash codes for the sequence beginning at @in_next.
+ * These will be used and then updated with the precomputed hashcodes for
+ * the sequence beginning at @in_next + 1.
* @best_len_ret
- * If a match of length >= 3 was found, then the length of the longest such
- * match is written here; otherwise 2 is written here. (Note: this is
+ * If a match of length >= 4 was found, then the length of the longest such
+ * match is written here; otherwise 3 is written here. (Note: this is
* redundant with the 'struct lz_match' array, but this is easier for the
* compiler to optimize when inlined and the caller immediately does a
* check against 'best_len'.)
* @lz_matchptr
* An array in which this function will record the matches. The recorded
- * matches will be sorted by strictly increasing length and increasing
- * offset. The maximum number of matches that may be found is
- * 'min(nice_len, max_len) - 2 + 1', or one less if length 2 matches are
- * disabled.
+ * matches will be sorted by strictly increasing length and (non-strictly)
+ * increasing offset. The maximum number of matches that may be found is
+ * 'nice_len - 1', or one less if length 2 matches are disabled.
*
* The return value is a pointer to the next available slot in the @lz_matchptr
* array. (If no matches were found, this will be the same as @lz_matchptr.)
u32 max_len,
u32 nice_len,
u32 max_search_depth,
- u32 *next_hash,
+ u32 next_hashes[static 2],
u32 *best_len_ret,
struct lz_match *lz_matchptr)
{
max_len,
nice_len,
max_search_depth,
- next_hash,
+ next_hashes,
best_len_ret,
lz_matchptr,
true);
/*
* Advance the matchfinder, but don't record any matches.
*
- * @mf
- * The matchfinder structure.
- * @in_begin
- * Pointer to the beginning of the input buffer.
- * @cur_pos
- * The current position in the input buffer.
- * @max_len
- * The maximum permissible match length at this position.
- * @nice_len
- * Stop searching if a match of at least this length is found.
- * @max_search_depth
- * Limit on the number of potential matches to consider.
- * @next_hash
- * Pointer to the hash code for the current sequence, which was computed
- * one position in advance so that the binary tree root could be
- * prefetched. This is an input/output parameter.
- *
- * Note: this is very similar to bt_matchfinder_get_matches() because both
- * functions must do hashing and tree re-rooting. This version just doesn't
- * actually record any matches.
+ * This is very similar to bt_matchfinder_get_matches() because both functions
+ * must do hashing and tree re-rooting.
*/
static inline void
TEMPLATED(bt_matchfinder_skip_position)(struct TEMPLATED(bt_matchfinder) *mf,
const u8 *in_begin,
ptrdiff_t cur_pos,
- u32 max_len,
u32 nice_len,
u32 max_search_depth,
- u32 *next_hash)
+ u32 next_hashes[static 2])
{
u32 best_len;
TEMPLATED(bt_matchfinder_advance_one_byte)(mf,
in_begin,
cur_pos,
- max_len,
+ nice_len,
nice_len,
max_search_depth,
- next_hash,
+ next_hashes,
&best_len,
NULL,
false);