--- /dev/null
+/*
+ * hc_matchfinder.h
+ *
+ * Copyright (c) 2014 Eric Biggers. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include "wimlib/lz_extend.h"
+#include "wimlib/lz_hash3.h"
+#include "wimlib/matchfinder_common.h"
+#include "wimlib/unaligned.h"
+
+#ifndef HC_MATCHFINDER_HASH_ORDER
+# if MATCHFINDER_WINDOW_ORDER < 14
+# define HC_MATCHFINDER_HASH_ORDER 14
+# else
+# define HC_MATCHFINDER_HASH_ORDER 15
+# endif
+#endif
+
+#define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER)
+
+#define HC_MATCHFINDER_TOTAL_LENGTH \
+ (HC_MATCHFINDER_HASH_LENGTH + MATCHFINDER_WINDOW_SIZE)
+
+struct hc_matchfinder {
+ union {
+ pos_t mf_data[HC_MATCHFINDER_TOTAL_LENGTH];
+ struct {
+ pos_t hash_tab[HC_MATCHFINDER_HASH_LENGTH];
+ pos_t child_tab[MATCHFINDER_WINDOW_SIZE];
+ };
+ };
+} _aligned_attribute(MATCHFINDER_ALIGNMENT);
+
+/*
+ * Call before running the first byte through the matchfinder.
+ */
+static inline void
+hc_matchfinder_init(struct hc_matchfinder *mf)
+{
+ matchfinder_init(mf->hash_tab, HC_MATCHFINDER_HASH_LENGTH);
+}
+
+#if MATCHFINDER_IS_SLIDING
+static inline void
+hc_matchfinder_slide_window(struct hc_matchfinder *mf)
+{
+ matchfinder_rebase(mf->mf_data, HC_MATCHFINDER_TOTAL_LENGTH);
+}
+#endif
+
+/*
+ * Find the longest match longer than 'best_len'.
+ *
+ * @mf
+ * The matchfinder structure.
+ * @in_base
+ * Pointer to the next byte in the input buffer to process _at the last
+ * time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
+ * @in_next
+ * Pointer to the next byte in the input buffer to process. This is the
+ * pointer to the bytes being matched against.
+ * @best_len
+ * Require a match at least this long.
+ * @max_len
+ * Maximum match length to return.
+ * @nice_len
+ * Stop searching if a match of at least this length is found.
+ * @max_search_depth
+ * Limit on the number of potential matches to consider.
+ * @offset_ret
+ * The match offset is returned here.
+ *
+ * Return the length of the match found, or 'best_len' if no match longer than
+ * 'best_len' was found.
+ */
+static inline unsigned
+hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
+ const u8 * const in_base,
+ const u8 * const in_next,
+ unsigned best_len,
+ const unsigned max_len,
+ const unsigned nice_len,
+ const unsigned max_search_depth,
+ unsigned *offset_ret)
+{
+ unsigned depth_remaining = max_search_depth;
+ const u8 *best_matchptr = best_matchptr; /* uninitialized */
+ const u8 *matchptr;
+ unsigned len;
+ unsigned hash;
+ pos_t cur_match;
+ u32 first_3_bytes;
+
+ /* Insert the current sequence into the appropriate hash chain. */
+ if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES))
+ goto out;
+ first_3_bytes = load_u24_unaligned(in_next);
+ hash = lz_hash_u24(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
+ cur_match = mf->hash_tab[hash];
+ mf->child_tab[in_next - in_base] = cur_match;
+ mf->hash_tab[hash] = in_next - in_base;
+
+ if (unlikely(best_len >= max_len))
+ goto out;
+
+ /* Search the appropriate hash chain for matches. */
+
+ if (!(matchfinder_match_in_window(cur_match, in_base, in_next)))
+ goto out;
+
+ if (best_len < 3) {
+ for (;;) {
+ /* No length 3 match found yet.
+ * Check the first 3 bytes. */
+ matchptr = &in_base[cur_match];
+
+ if (load_u24_unaligned(matchptr) == first_3_bytes)
+ break;
+
+ /* Not a match; keep trying. */
+ cur_match = mf->child_tab[
+ matchfinder_slot_for_match(cur_match)];
+ if (!matchfinder_match_in_window(cur_match,
+ in_base, in_next))
+ goto out;
+ if (!--depth_remaining)
+ goto out;
+ }
+
+ /* Found a length 3 match. */
+ best_matchptr = matchptr;
+ best_len = lz_extend(in_next, best_matchptr, 3, max_len);
+ if (best_len >= nice_len)
+ goto out;
+ cur_match = mf->child_tab[matchfinder_slot_for_match(cur_match)];
+ if (!matchfinder_match_in_window(cur_match, in_base, in_next))
+ goto out;
+ if (!--depth_remaining)
+ goto out;
+ }
+
+ for (;;) {
+ for (;;) {
+ matchptr = &in_base[cur_match];
+
+ /* Already found a length 3 match. Try for a longer match;
+ * start by checking the last 2 bytes and the first 4 bytes. */
+ #if UNALIGNED_ACCESS_IS_FAST
+ if ((load_u32_unaligned(matchptr + best_len - 3) ==
+ load_u32_unaligned(in_next + best_len - 3)) &&
+ (load_u32_unaligned(matchptr) ==
+ load_u32_unaligned(in_next)))
+ #else
+ if (matchptr[best_len] == in_next[best_len])
+ #endif
+ break;
+
+ cur_match = mf->child_tab[matchfinder_slot_for_match(cur_match)];
+ if (!matchfinder_match_in_window(cur_match, in_base, in_next))
+ goto out;
+ if (!--depth_remaining)
+ goto out;
+ }
+
+ #if UNALIGNED_ACCESS_IS_FAST
+ len = 4;
+ #else
+ len = 0;
+ #endif
+ len = lz_extend(in_next, matchptr, len, max_len);
+ if (len > best_len) {
+ best_len = len;
+ best_matchptr = matchptr;
+ if (best_len >= nice_len)
+ goto out;
+ }
+ cur_match = mf->child_tab[matchfinder_slot_for_match(cur_match)];
+ if (!matchfinder_match_in_window(cur_match, in_base, in_next))
+ goto out;
+ if (!--depth_remaining)
+ goto out;
+ }
+out:
+ *offset_ret = in_next - best_matchptr;
+ return best_len;
+}
+
+/*
+ * Advance the match-finder, but don't search for matches.
+ *
+ * @mf
+ * The matchfinder structure.
+ * @in_base
+ * Pointer to the next byte in the input buffer to process _at the last
+ * time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
+ * @in_next
+ * Pointer to the next byte in the input buffer to process.
+ * @in_end
+ * Pointer to the end of the input buffer.
+ * @count
+ * Number of bytes to skip; must be > 0.
+ */
+static inline void
+hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
+ const u8 *in_base,
+ const u8 *in_next,
+ const u8 *in_end,
+ unsigned count)
+{
+ unsigned hash;
+
+ if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES))
+ return;
+
+ do {
+ hash = lz_hash(in_next, HC_MATCHFINDER_HASH_ORDER);
+ mf->child_tab[in_next - in_base] = mf->hash_tab[hash];
+ mf->hash_tab[hash] = in_next - in_base;
+ in_next++;
+ } while (--count);
+}