Factor out lz_repsearch() and also use for LZMS
authorEric Biggers <ebiggers3@gmail.com>
Fri, 29 Aug 2014 03:31:13 +0000 (22:31 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Fri, 29 Aug 2014 03:31:13 +0000 (22:31 -0500)
Makefile.am
include/wimlib/lz_repsearch.h [new file with mode: 0644]
src/lz_repsearch.c [new file with mode: 0644]
src/lzms-compress.c
src/lzx-compress.c

index 1ce745a..18c7f31 100644 (file)
@@ -47,6 +47,7 @@ libwim_la_SOURCES =           \
        src/lz_linked_suffix_array.c    \
        src/lz_mf.c             \
        src/lz_null.c           \
+       src/lz_repsearch.c      \
        src/lz_suffix_array_utils.c     \
        src/lzms-common.c       \
        src/lzms-compress.c     \
@@ -106,6 +107,7 @@ libwim_la_SOURCES =         \
        include/wimlib/lz_hash3.h       \
        include/wimlib/lz_mf.h          \
        include/wimlib/lz_mf_ops.h      \
+       include/wimlib/lz_repsearch.h   \
        include/wimlib/lz_suffix_array_utils.h  \
        include/wimlib/lzms.h           \
        include/wimlib/lzx.h            \
diff --git a/include/wimlib/lz_repsearch.h b/include/wimlib/lz_repsearch.h
new file mode 100644 (file)
index 0000000..fe59558
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * lz_repsearch.h
+ *
+ * Fast searching for repeat offset matches.
+ *
+ * The author dedicates this file to the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#ifndef _LZ_REPSEARCH_H
+#define _LZ_REPSEARCH_H
+
+#include "wimlib/lz_extend.h"
+#include "wimlib/util.h"
+
+extern u32
+lz_extend_repmatch(const u8 *strptr, const u8 *matchptr, u32 max_len);
+
+/*
+ * Find the longest repeat offset match.
+ *
+ * If no match of at least 2 bytes is found, then return 0.
+ *
+ * If a match of at least 2 bytes is found, then return its length and set
+ * *slot_ret to the index of its offset in @queue.
+ */
+static inline u32
+lz_repsearch(const u8 * const strptr, const u32 bytes_remaining,
+            const u32 max_match_len, const u32 repeat_offsets[],
+            const unsigned num_repeat_offsets, unsigned *slot_ret)
+{
+       u32 best_len = 0;
+
+       if (likely(bytes_remaining >= 2)) {
+               const u32 max_len = min(max_match_len, bytes_remaining);
+               const u16 str = *(const u16 *)strptr;
+
+               for (unsigned i = 0; i < num_repeat_offsets; i++) {
+                       const u8 * const matchptr = strptr - repeat_offsets[i];
+
+                       /* Check the first two bytes.  If they match, then
+                        * extend the match to its full length.  */
+                       if (*(const u16 *)matchptr == str) {
+                               const u32 len = lz_extend_repmatch(strptr, matchptr, max_len);
+                               if (len > best_len) {
+                                       best_len = len;
+                                       *slot_ret = i;
+                               }
+                       }
+               }
+       }
+       return best_len;
+}
+
+#endif /* _LZ_REPSEARCH_H */
diff --git a/src/lz_repsearch.c b/src/lz_repsearch.c
new file mode 100644 (file)
index 0000000..2f78a61
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * lz_repsearch.c
+ *
+ * Fast searching for repeat offset matches.
+ *
+ * The author dedicates this file to the public domain.
+ * You can do whatever you want with this file.
+ *
+ */
+
+#include "wimlib/lz_repsearch.h"
+#include "wimlib/lz_extend.h"
+
+u32
+lz_extend_repmatch(const u8 *strptr, const u8 *matchptr, u32 max_len)
+{
+       return lz_extend(strptr, matchptr, 2, max_len);
+}
index 21c82ea..8ad19bc 100644 (file)
@@ -41,6 +41,7 @@
 #include "wimlib/endianness.h"
 #include "wimlib/error.h"
 #include "wimlib/lz_mf.h"
+#include "wimlib/lz_repsearch.h"
 #include "wimlib/lzms.h"
 #include "wimlib/util.h"
 
@@ -840,6 +841,20 @@ lzms_get_lz_match_cost(struct lzms_compressor *ctx,
               lzms_get_length_cost(&ctx->length_encoder, length);
 }
 
+static inline u32
+lzms_repsearch(const u8 * const strptr, const u32 bytes_remaining,
+              const struct lzms_lz_lru_queues *queue, u32 *offset_ret)
+{
+       u32 len;
+       unsigned slot = 0;
+
+       len = lz_repsearch(strptr, bytes_remaining, UINT32_MAX,
+                          queue->recent_offsets, LZMS_NUM_RECENT_OFFSETS, &slot);
+       *offset_ret = queue->recent_offsets[slot];
+       return len;
+}
+
+
 static struct lz_match
 lzms_match_chooser_reverse_list(struct lzms_compressor *ctx, unsigned cur_pos)
 {
@@ -899,21 +914,12 @@ lzms_get_near_optimal_item(struct lzms_compressor *ctx)
        ctx->optimum_cur_idx = 0;
        ctx->optimum_end_idx = 0;
 
-       longest_rep_len = ctx->params.min_match_length - 1;
        if (lz_mf_get_position(ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) {
-               u32 limit = lz_mf_get_bytes_remaining(ctx->mf);
-               for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) {
-                       u32 offset = ctx->lru.lz.recent_offsets[i];
-                       const u8 *strptr = lz_mf_get_window_ptr(ctx->mf);
-                       const u8 *matchptr = strptr - offset;
-                       u32 len = 0;
-                       while (len < limit && strptr[len] == matchptr[len])
-                               len++;
-                       if (len > longest_rep_len) {
-                               longest_rep_len = len;
-                               longest_rep_offset = offset;
-                       }
-               }
+               longest_rep_len = lzms_repsearch(lz_mf_get_window_ptr(ctx->mf),
+                                                lz_mf_get_bytes_remaining(ctx->mf),
+                                                &ctx->lru.lz, &longest_rep_offset);
+       } else {
+               longest_rep_len = 0;
        }
 
        if (longest_rep_len >= ctx->params.nice_match_length) {
@@ -972,7 +978,7 @@ lzms_get_near_optimal_item(struct lzms_compressor *ctx)
        }
        end_pos = longest_len;
 
-       if (longest_rep_len >= ctx->params.min_match_length) {
+       if (longest_rep_len) {
                struct lzms_adaptive_state state;
                u32 cost;
 
@@ -1002,21 +1008,13 @@ lzms_get_near_optimal_item(struct lzms_compressor *ctx)
                if (cur_pos == end_pos || cur_pos == ctx->params.optim_array_length)
                        return lzms_match_chooser_reverse_list(ctx, cur_pos);
 
-               longest_rep_len = ctx->params.min_match_length - 1;
                if (lz_mf_get_position(ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) {
-                       u32 limit = lz_mf_get_bytes_remaining(ctx->mf);
-                       for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) {
-                               u32 offset = ctx->optimum[cur_pos].state.lru.recent_offsets[i];
-                               const u8 *strptr = lz_mf_get_window_ptr(ctx->mf);
-                               const u8 *matchptr = strptr - offset;
-                               u32 len = 0;
-                               while (len < limit && strptr[len] == matchptr[len])
-                                       len++;
-                               if (len > longest_rep_len) {
-                                       longest_rep_len = len;
-                                       longest_rep_offset = offset;
-                               }
-                       }
+                       longest_rep_len = lzms_repsearch(lz_mf_get_window_ptr(ctx->mf),
+                                                        lz_mf_get_bytes_remaining(ctx->mf),
+                                                        &ctx->optimum[cur_pos].state.lru,
+                                                        &longest_rep_offset);
+               } else {
+                       longest_rep_len = 0;
                }
 
                if (longest_rep_len >= ctx->params.nice_match_length) {
index 9851faa..cf5ea48 100644 (file)
 #include "wimlib/compress_common.h"
 #include "wimlib/endianness.h"
 #include "wimlib/error.h"
-#include "wimlib/lz_extend.h"
 #include "wimlib/lz_mf.h"
+#include "wimlib/lz_repsearch.h"
 #include "wimlib/lzx.h"
 #include "wimlib/util.h"
 #include <string.h>
@@ -1501,28 +1501,9 @@ static inline u32
 lzx_repsearch(const u8 * const strptr, const u32 bytes_remaining,
              const struct lzx_lru_queue *queue, unsigned *slot_ret)
 {
-       u32 best_len = 0;
-
        BUILD_BUG_ON(LZX_MIN_MATCH_LEN != 2);
-       if (likely(bytes_remaining >= 2)) {
-               const u32 max_len = min(LZX_MAX_MATCH_LEN, bytes_remaining);
-               const u16 str = *(const u16 *)strptr;
-               for (unsigned i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) {
-                       const u8 * const matchptr = strptr - queue->R[i];
-
-                       /* Check the first two bytes.  If they match, then
-                        * extend the match to its full length.  */
-                       if (*(const u16 *)matchptr == str) {
-                               const u32 len = lz_extend(strptr, matchptr,
-                                                         2, max_len);
-                               if (len > best_len) {
-                                       best_len = len;
-                                       *slot_ret = i;
-                               }
-                       }
-               }
-       }
-       return best_len;
+       return lz_repsearch(strptr, bytes_remaining, LZX_MAX_MATCH_LEN,
+                           queue->R, LZX_NUM_RECENT_OFFSETS, slot_ret);
 }
 
 /*