/*
* hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists
*
- * The following copying information applies to this specific source code file:
- *
- * Written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
- *
- * To the extent possible under law, the author(s) have dedicated all copyright
- * and related and neighboring rights to this software to the public domain
- * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
- * Dedication (the "CC0").
- *
- * This software is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
- *
- * You should have received a copy of the CC0 along with this software; if not
- * see <http://creativecommons.org/publicdomain/zero/1.0/>.
+ * Copyright 2022 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*
* ---------------------------------------------------------------------------
*
* chain for length 3+ matches, the algorithm just checks for one close length 3
* match, then focuses on finding length 4+ matches.
*
- * The longest_match() and skip_positions() functions are inlined into the
+ * The longest_match() and skip_bytes() functions are inlined into the
* compressors that use them. This isn't just about saving the overhead of a
* function call. These functions are intended to be called from the inner
* loops of compressors, where giving the compiler more control over register
#include "wimlib/lz_hash.h"
#include "wimlib/unaligned.h"
-#define HC_MATCHFINDER_HASH3_ORDER 14
-#define HC_MATCHFINDER_HASH4_ORDER 15
+#define HC_MATCHFINDER_HASH3_ORDER 15
+#define HC_MATCHFINDER_HASH4_ORDER 16
/* TEMPLATED functions and structures have MF_SUFFIX appended to their name. */
#undef TEMPLATED
/* Return the number of bytes that must be allocated for a 'hc_matchfinder' that
* can work with buffers up to the specified size. */
-static inline size_t
+static forceinline size_t
TEMPLATED(hc_matchfinder_size)(size_t max_bufsize)
{
return sizeof(struct TEMPLATED(hc_matchfinder)) +
}
/* Prepare the matchfinder for a new input buffer. */
-static inline void
+static forceinline void
TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf)
{
memset(mf, 0, sizeof(*mf));
* The matchfinder structure.
* @in_begin
* Pointer to the beginning of the input buffer.
- * @cur_pos
- * The current position in the input buffer (the position of the sequence
- * being matched against).
+ * @in_next
+ * Pointer to the next position in the input buffer, i.e. the sequence
+ * being matched against.
* @best_len
* Require a match longer than this length.
* @max_len
* Return the length of the match found, or 'best_len' if no match longer than
* 'best_len' was found.
*/
-static inline u32
-TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const restrict mf,
- const u8 * const restrict in_begin,
- const ptrdiff_t cur_pos,
+static forceinline u32
+TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const mf,
+ const u8 * const in_begin,
+ const u8 * const in_next,
u32 best_len,
const u32 max_len,
const u32 nice_len,
const u32 max_search_depth,
- u32 next_hashes[const restrict static 2],
- u32 * const restrict offset_ret)
+ u32 * const next_hashes,
+ u32 * const offset_ret)
{
- const u8 *in_next = in_begin + cur_pos;
u32 depth_remaining = max_search_depth;
- const u8 *best_matchptr = best_matchptr; /* uninitialized */
+ const u8 *best_matchptr = in_next;
mf_pos_t cur_node3, cur_node4;
u32 hash3, hash4;
- u32 next_seq3, next_seq4;
+ u32 next_hashseq;
u32 seq4;
const u8 *matchptr;
u32 len;
+ u32 cur_pos = in_next - in_begin;
if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
goto out;
mf->next_tab[cur_pos] = cur_node4;
/* Compute the next hash codes. */
- next_seq4 = load_u32_unaligned(in_next + 1);
- next_seq3 = loaded_u32_to_u24(next_seq4);
- next_hashes[0] = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
- next_hashes[1] = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
+ next_hashseq = get_unaligned_le32(in_next + 1);
+ next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
+ next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
prefetchw(&mf->hash3_tab[next_hashes[0]]);
prefetchw(&mf->hash4_tab[next_hashes[1]]);
* The matchfinder structure.
* @in_begin
* Pointer to the beginning of the input buffer.
- * @cur_pos
- * The current position in the input buffer (the position of the sequence
- * being matched against).
- * @end_pos
- * The length of the input buffer.
+ * @in_next
+ * Pointer to the next position in the input buffer.
+ * @in_end
+ * Pointer to the end of the input buffer.
+ * @count
+ * The number of bytes to advance. Must be > 0.
* @next_hashes
* The precomputed hash codes for the sequence beginning at @in_next.
* These will be used and then updated with the precomputed hashcodes for
* the sequence beginning at @in_next + @count.
- * @count
- * The number of bytes to advance. Must be > 0.
- *
- * Returns @in_next + @count.
*/
-static inline const u8 *
-TEMPLATED(hc_matchfinder_skip_positions)(struct TEMPLATED(hc_matchfinder) * const restrict mf,
- const u8 * const restrict in_begin,
- const ptrdiff_t cur_pos,
- const ptrdiff_t end_pos,
- const u32 count,
- u32 next_hashes[const restrict static 2])
+static forceinline void
+TEMPLATED(hc_matchfinder_skip_bytes)(struct TEMPLATED(hc_matchfinder) * const mf,
+ const u8 * const in_begin,
+ const u8 *in_next,
+ const u8 * const in_end,
+ const u32 count,
+ u32 * const next_hashes)
{
- const u8 *in_next = in_begin + cur_pos;
- const u8 * const stop_ptr = in_next + count;
-
- if (likely(count + 5 <= end_pos - cur_pos)) {
- u32 hash3, hash4;
- u32 next_seq3, next_seq4;
-
- hash3 = next_hashes[0];
- hash4 = next_hashes[1];
- do {
- mf->hash3_tab[hash3] = in_next - in_begin;
- mf->next_tab[in_next - in_begin] = mf->hash4_tab[hash4];
- mf->hash4_tab[hash4] = in_next - in_begin;
-
- next_seq4 = load_u32_unaligned(++in_next);
- next_seq3 = loaded_u32_to_u24(next_seq4);
- hash3 = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
- hash4 = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
-
- } while (in_next != stop_ptr);
-
- prefetchw(&mf->hash3_tab[hash3]);
- prefetchw(&mf->hash4_tab[hash4]);
- next_hashes[0] = hash3;
- next_hashes[1] = hash4;
- }
+ u32 cur_pos;
+ u32 hash3, hash4;
+ u32 next_hashseq;
+ u32 remaining = count;
- return stop_ptr;
+ if (unlikely(count + 5 > in_end - in_next))
+ return;
+
+ cur_pos = in_next - in_begin;
+ hash3 = next_hashes[0];
+ hash4 = next_hashes[1];
+ do {
+ mf->hash3_tab[hash3] = cur_pos;
+ mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
+ mf->hash4_tab[hash4] = cur_pos;
+
+ next_hashseq = get_unaligned_le32(++in_next);
+ hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
+ hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
+ cur_pos++;
+ } while (--remaining);
+
+ prefetchw(&mf->hash3_tab[hash3]);
+ prefetchw(&mf->hash4_tab[hash4]);
+ next_hashes[0] = hash3;
+ next_hashes[1] = hash4;
}