src/lz_linked_suffix_array.c \
src/lz_mf.c \
src/lz_null.c \
+ src/lz_repsearch.c \
src/lz_suffix_array_utils.c \
src/lzms-common.c \
src/lzms-compress.c \
include/wimlib/lz_hash3.h \
include/wimlib/lz_mf.h \
include/wimlib/lz_mf_ops.h \
+ include/wimlib/lz_repsearch.h \
include/wimlib/lz_suffix_array_utils.h \
include/wimlib/lzms.h \
include/wimlib/lzx.h \
--- /dev/null
+/*
+ * lz_repsearch.h
+ *
+ * Fast searching for repeat offset matches.
+ *
+ * The author dedicates this file to the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#ifndef _LZ_REPSEARCH_H
+#define _LZ_REPSEARCH_H
+
+#include "wimlib/lz_extend.h"
+#include "wimlib/util.h"
+
+extern u32
+lz_extend_repmatch(const u8 *strptr, const u8 *matchptr, u32 max_len);
+
+/*
+ * Find the longest repeat offset match.
+ *
+ * If no match of at least 2 bytes is found, then return 0.
+ *
+ * If a match of at least 2 bytes is found, then return its length and set
+ * *slot_ret to the index of its offset in @queue.
+ */
+static inline u32
+lz_repsearch(const u8 * const strptr, const u32 bytes_remaining,
+ const u32 max_match_len, const u32 repeat_offsets[],
+ const unsigned num_repeat_offsets, unsigned *slot_ret)
+{
+ u32 best_len = 0;
+
+ if (likely(bytes_remaining >= 2)) {
+ const u32 max_len = min(max_match_len, bytes_remaining);
+ const u16 str = *(const u16 *)strptr;
+
+ for (unsigned i = 0; i < num_repeat_offsets; i++) {
+ const u8 * const matchptr = strptr - repeat_offsets[i];
+
+ /* Check the first two bytes. If they match, then
+ * extend the match to its full length. */
+ if (*(const u16 *)matchptr == str) {
+ const u32 len = lz_extend_repmatch(strptr, matchptr, max_len);
+ if (len > best_len) {
+ best_len = len;
+ *slot_ret = i;
+ }
+ }
+ }
+ }
+ return best_len;
+}
+
+#endif /* _LZ_REPSEARCH_H */
--- /dev/null
+/*
+ * lz_repsearch.c
+ *
+ * Fast searching for repeat offset matches.
+ *
+ * The author dedicates this file to the public domain.
+ * You can do whatever you want with this file.
+ *
+ */
+
+#include "wimlib/lz_repsearch.h"
+#include "wimlib/lz_extend.h"
+
+u32
+lz_extend_repmatch(const u8 *strptr, const u8 *matchptr, u32 max_len)
+{
+ return lz_extend(strptr, matchptr, 2, max_len);
+}
#include "wimlib/endianness.h"
#include "wimlib/error.h"
#include "wimlib/lz_mf.h"
+#include "wimlib/lz_repsearch.h"
#include "wimlib/lzms.h"
#include "wimlib/util.h"
lzms_get_length_cost(&ctx->length_encoder, length);
}
+static inline u32
+lzms_repsearch(const u8 * const strptr, const u32 bytes_remaining,
+ const struct lzms_lz_lru_queues *queue, u32 *offset_ret)
+{
+ u32 len;
+ unsigned slot = 0;
+
+ len = lz_repsearch(strptr, bytes_remaining, UINT32_MAX,
+ queue->recent_offsets, LZMS_NUM_RECENT_OFFSETS, &slot);
+ *offset_ret = queue->recent_offsets[slot];
+ return len;
+}
+
+
static struct lz_match
lzms_match_chooser_reverse_list(struct lzms_compressor *ctx, unsigned cur_pos)
{
ctx->optimum_cur_idx = 0;
ctx->optimum_end_idx = 0;
- longest_rep_len = ctx->params.min_match_length - 1;
if (lz_mf_get_position(ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) {
- u32 limit = lz_mf_get_bytes_remaining(ctx->mf);
- for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) {
- u32 offset = ctx->lru.lz.recent_offsets[i];
- const u8 *strptr = lz_mf_get_window_ptr(ctx->mf);
- const u8 *matchptr = strptr - offset;
- u32 len = 0;
- while (len < limit && strptr[len] == matchptr[len])
- len++;
- if (len > longest_rep_len) {
- longest_rep_len = len;
- longest_rep_offset = offset;
- }
- }
+ longest_rep_len = lzms_repsearch(lz_mf_get_window_ptr(ctx->mf),
+ lz_mf_get_bytes_remaining(ctx->mf),
+ &ctx->lru.lz, &longest_rep_offset);
+ } else {
+ longest_rep_len = 0;
}
if (longest_rep_len >= ctx->params.nice_match_length) {
}
end_pos = longest_len;
- if (longest_rep_len >= ctx->params.min_match_length) {
+ if (longest_rep_len) {
struct lzms_adaptive_state state;
u32 cost;
if (cur_pos == end_pos || cur_pos == ctx->params.optim_array_length)
return lzms_match_chooser_reverse_list(ctx, cur_pos);
- longest_rep_len = ctx->params.min_match_length - 1;
if (lz_mf_get_position(ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) {
- u32 limit = lz_mf_get_bytes_remaining(ctx->mf);
- for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) {
- u32 offset = ctx->optimum[cur_pos].state.lru.recent_offsets[i];
- const u8 *strptr = lz_mf_get_window_ptr(ctx->mf);
- const u8 *matchptr = strptr - offset;
- u32 len = 0;
- while (len < limit && strptr[len] == matchptr[len])
- len++;
- if (len > longest_rep_len) {
- longest_rep_len = len;
- longest_rep_offset = offset;
- }
- }
+ longest_rep_len = lzms_repsearch(lz_mf_get_window_ptr(ctx->mf),
+ lz_mf_get_bytes_remaining(ctx->mf),
+ &ctx->optimum[cur_pos].state.lru,
+ &longest_rep_offset);
+ } else {
+ longest_rep_len = 0;
}
if (longest_rep_len >= ctx->params.nice_match_length) {
#include "wimlib/compress_common.h"
#include "wimlib/endianness.h"
#include "wimlib/error.h"
-#include "wimlib/lz_extend.h"
#include "wimlib/lz_mf.h"
+#include "wimlib/lz_repsearch.h"
#include "wimlib/lzx.h"
#include "wimlib/util.h"
#include <string.h>
lzx_repsearch(const u8 * const strptr, const u32 bytes_remaining,
const struct lzx_lru_queue *queue, unsigned *slot_ret)
{
- u32 best_len = 0;
-
BUILD_BUG_ON(LZX_MIN_MATCH_LEN != 2);
- if (likely(bytes_remaining >= 2)) {
- const u32 max_len = min(LZX_MAX_MATCH_LEN, bytes_remaining);
- const u16 str = *(const u16 *)strptr;
- for (unsigned i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) {
- const u8 * const matchptr = strptr - queue->R[i];
-
- /* Check the first two bytes. If they match, then
- * extend the match to its full length. */
- if (*(const u16 *)matchptr == str) {
- const u32 len = lz_extend(strptr, matchptr,
- 2, max_len);
- if (len > best_len) {
- best_len = len;
- *slot_ret = i;
- }
- }
- }
- }
- return best_len;
+ return lz_repsearch(strptr, bytes_remaining, LZX_MAX_MATCH_LEN,
+ queue->R, LZX_NUM_RECENT_OFFSETS, slot_ret);
}
/*