* A match-finder for Lempel-Ziv compression based on bottom-up construction and
* traversal of the Longest Common Prefix (LCP) interval tree.
*
- * Author: Eric Biggers
- * Year: 2014, 2015
+ * The following copying information applies to this specific source code file:
*
- * The author dedicates this file to the public domain.
- * You can do whatever you want with this file.
+ * Written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
+ * Dedication (the "CC0").
+ *
+ * This software is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
+ *
+ * You should have received a copy of the CC0 along with this software; if not
+ * see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifdef HAVE_CONFIG_H
* around by just continuing until we get to a link that actually takes us
* higher in the tree. This can be described as a lazy-update scheme.
*/
-static inline u32
+static forceinline u32
lcpit_advance_one_byte(const u32 cur_pos,
u32 pos_data[restrict],
u32 intervals[restrict],
+ u32 next[restrict],
struct lz_match matches[restrict],
const bool record_matches)
{
/* Get the deepest lcp-interval containing the current suffix. */
ref = pos_data[cur_pos];
- /* Prefetch the deepest lcp-interval containing the *next* suffix. */
- prefetchw(&intervals[pos_data[cur_pos + 1] & POS_MASK]);
+ /* Prefetch upcoming data, up to 3 positions ahead. Assume the
+ * intervals are already visited. */
+
+ /* Prefetch the superinterval via a suffix link for the deepest
+ * lcp-interval containing the suffix starting 1 position from now. */
+ prefetchw(&intervals[pos_data[next[0]] & POS_MASK]);
+
+ /* Prefetch suffix link for the deepest lcp-interval containing the
+ * suffix starting 2 positions from now. */
+ next[0] = intervals[next[1]] & POS_MASK;
+ prefetchw(&pos_data[next[0]]);
+
+ /* Prefetch the deepest lcp-interval containing the suffix starting 3
+ * positions from now. */
+ next[1] = pos_data[cur_pos + 3] & POS_MASK;
+ prefetchw(&intervals[next[1]]);
/* There is no "next suffix" after the current one. */
pos_data[cur_pos] = 0;
/* Like lcpit_advance_one_byte(), but for buffers larger than
* MAX_NORMAL_BUFSIZE. */
-static inline u32
+static forceinline u32
lcpit_advance_one_byte_huge(const u32 cur_pos,
u32 pos_data[restrict],
u64 intervals64[restrict],
+ u32 prefetch_next[restrict],
struct lz_match matches[restrict],
const bool record_matches)
{
struct lz_match *matchptr;
interval_idx = pos_data[cur_pos];
- prefetchw(&pos_data[intervals64[pos_data[cur_pos + 1]] & HUGE_POS_MASK]);
- prefetchw(&intervals64[pos_data[cur_pos + 2]]);
+
+ prefetchw(&intervals64[pos_data[prefetch_next[0]] & HUGE_POS_MASK]);
+
+ prefetch_next[0] = intervals64[prefetch_next[1]] & HUGE_POS_MASK;
+ prefetchw(&pos_data[prefetch_next[0]]);
+
+ prefetch_next[1] = pos_data[cur_pos + 3] & HUGE_POS_MASK;
+ prefetchw(&intervals64[prefetch_next[1]]);
+
pos_data[cur_pos] = 0;
while ((next = intervals64[interval_idx]) & HUGE_UNVISITED_TAG) {
return matchptr - matches;
}
-static inline u64
+static forceinline u64
get_pos_data_size(size_t max_bufsize)
{
return (u64)max((u64)max_bufsize + PREFETCH_SAFETY,
DIVSUFSORT_TMP_LEN) * sizeof(u32);
}
-static inline u64
+static forceinline u64
get_intervals_size(size_t max_bufsize)
{
return ((u64)max_bufsize + PREFETCH_SAFETY) *
* References:
*
* Y. Mori. libdivsufsort, a lightweight suffix-sorting library.
- * https://code.google.com/p/libdivsufsort/.
+ * https://github.com/y-256/libdivsufsort
*
* G. Nong, S. Zhang, and W.H. Chan. 2009. Linear Suffix Array
* Construction by Almost Pure Induced-Sorting. Data Compression
mf->huge_mode = true;
}
mf->cur_pos = 0; /* starting at beginning of input buffer */
+ for (u32 i = 0; i < ARRAY_LEN(mf->next); i++)
+ mf->next[i] = 0;
}
/*
{
if (mf->huge_mode)
return lcpit_advance_one_byte_huge(mf->cur_pos++, mf->pos_data,
- mf->intervals64, matches, true);
+ mf->intervals64, mf->next,
+ matches, true);
else
return lcpit_advance_one_byte(mf->cur_pos++, mf->pos_data,
- mf->intervals, matches, true);
+ mf->intervals, mf->next,
+ matches, true);
}
/*
if (mf->huge_mode) {
do {
lcpit_advance_one_byte_huge(mf->cur_pos++, mf->pos_data,
- mf->intervals64, NULL, false);
+ mf->intervals64, mf->next,
+ NULL, false);
} while (--count);
} else {
do {
lcpit_advance_one_byte(mf->cur_pos++, mf->pos_data,
- mf->intervals, NULL, false);
+ mf->intervals, mf->next,
+ NULL, false);
} while (--count);
}
}