This saves 4 bytes of memory per position and only decreases the
compression ratio on very repetitive files (with matches of length >
65535), and even then only slightly. Performance is not significantly
affected but may be slightly improved due to better use of cache.
Only the most important changes more recent than version 0.6 are noted here.
Version 1.6.1:
Only the most important changes more recent than version 0.6 are noted here.
Version 1.6.1:
- This release is minor bug-fixes only:
-
Stored files with size exactly 4 GiB (4,294,967,296 bytes) are now
decompressed correctly.
Stored files with size exactly 4 GiB (4,294,967,296 bytes) are now
decompressed correctly.
Fixed a potential stack overflow when extracting solid archives (packed
streams) containing more than about 100000 files.
Fixed a potential stack overflow when extracting solid archives (packed
streams) containing more than about 100000 files.
+ Memory usage for LZMS and LZX compression has been decreased slightly.
+
Version 1.6.0:
Support for extracting and updating the new version 3584 WIMs has been
added. These WIMs typically pack many streams ("files") together into a
Version 1.6.0:
Support for extracting and updating the new version 3584 WIMs has been
added. These WIMs typically pack many streams ("files") together into a
- * Copyright (c) 2013 Eric Biggers. All rights reserved.
+ * Copyright (c) 2013, 2014 Eric Biggers. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
#define _WIMLIB_LZ_SARRAY_H
#include "wimlib/compiler.h" /* must define '_always_inline_attribute' */
#define _WIMLIB_LZ_SARRAY_H
#include "wimlib/compiler.h" /* must define '_always_inline_attribute' */
-#include "wimlib/lz.h" /* must define 'struct raw_match', 'input_idx_t',
- and LZ_ASSERT(). */
-#include "wimlib/types.h" /* must define 'bool', 'u8', 'u32' */
+#include "wimlib/lz.h" /* must define 'struct raw_match' and LZ_ASSERT() */
+#include "wimlib/types.h" /* must define 'bool', 'u8', 'u16, and 'u32' */
+/* Position type --- must be an unsigned type large enough to hold the length of
+ * longest window for which the suffix array match-finder will be used. */
+typedef u32 lz_sarray_pos_t;
+
+/* Length type --- must be an unsigned type large enough to hold the maximum
+ * match length. */
+typedef u16 lz_sarray_len_t;
+
+/* Cost type, for the user-provided match cost evaluation function. */
+typedef lz_sarray_pos_t lz_sarray_cost_t;
+
+#define LZ_SARRAY_LEN_MAX ((lz_sarray_len_t)~0UL)
+#define LZ_SARRAY_POS_MAX ((lz_sarray_pos_t)~0UL)
+#define LZ_SARRAY_INFINITE_COST ((lz_sarray_cost_t)~0UL)
+
/* State of the suffix array LZ (Lempel-Ziv) match-finder.
*
* This is defined here for benefit of the inlined code. It's not intended for
/* State of the suffix array LZ (Lempel-Ziv) match-finder.
*
* This is defined here for benefit of the inlined code. It's not intended for
* sufficient to find matches. This number is the maximum length of
* those arrays, or also the maximum window (block) size that can be
* passed to lz_sarray_load_window(). */
* sufficient to find matches. This number is the maximum length of
* those arrays, or also the maximum window (block) size that can be
* passed to lz_sarray_load_window(). */
- input_idx_t max_window_size;
+ lz_sarray_pos_t max_window_size;
- /* Minimum match length to return. */
- input_idx_t min_match_len;
+ /* Minimum length of matches to return. */
+ lz_sarray_len_t min_match_len;
- /* Maximum match length to return. */
- input_idx_t max_match_len;
+ /* Maximum length of matches to return. */
+ lz_sarray_len_t max_match_len;
/* Maximum matches to consider at each position (max search depth). */
u32 max_matches_to_consider;
/* Maximum matches to consider at each position (max search depth). */
u32 max_matches_to_consider;
u32 max_matches_to_return;
/* Current position in the window. */
u32 max_matches_to_return;
/* Current position in the window. */
+ lz_sarray_pos_t cur_pos;
/* Current window size. */
/* Current window size. */
- input_idx_t window_size;
+ lz_sarray_pos_t window_size;
/* Suffix array for the current window.
* This is a mapping from suffix rank to suffix position. */
/* Suffix array for the current window.
* This is a mapping from suffix rank to suffix position. */
/* Inverse suffix array for the current window.
* This is a mapping from suffix position to suffix rank.
* If 0 <= r < window_size, then ISA[SA[r]] == r. */
/* Inverse suffix array for the current window.
* This is a mapping from suffix position to suffix rank.
* If 0 <= r < window_size, then ISA[SA[r]] == r. */
/* Rank of highest ranked suffix that has rank lower than the suffix
* corresponding to this structure and either has a lower position
* (initially) or has a position lower than the highest position at
/* Rank of highest ranked suffix that has rank lower than the suffix
* corresponding to this structure and either has a lower position
* (initially) or has a position lower than the highest position at
- * which matches have been searched for so far, or ~(input_idx_t)0 if
+ * which matches have been searched for so far, or LZ_SARRAY_POS_MAX if
* there is no such suffix.
*
* Think of this as a pointer to the closest position in the suffix
* array to the left that corresponds to a suffix that begins at a
* position in the current dictionary (i.e. before the current position
* in the window). */
* there is no such suffix.
*
* Think of this as a pointer to the closest position in the suffix
* array to the left that corresponds to a suffix that begins at a
* position in the current dictionary (i.e. before the current position
* in the window). */
/* Rank of lowest ranked suffix that has rank greater than the suffix
* corresponding to this structure and either has a lower position
* (intially) or has a position lower than the highest position at which
/* Rank of lowest ranked suffix that has rank greater than the suffix
* corresponding to this structure and either has a lower position
* (intially) or has a position lower than the highest position at which
- * matches have been searched for so far, or ~(input_idx_t)0 if there is
- * no such suffix.
+ * matches have been searched for so far, or LZ_SARRAY_POS_MAX if there
+ * is no such suffix.
* Think of this as a pointer to the closest position in the suffix
* array to the right that corresponds to a suffix that begins at a
* position in the current dictionary (i.e. before the current position
* in the window). */
* Think of this as a pointer to the closest position in the suffix
* array to the right that corresponds to a suffix that begins at a
* position in the current dictionary (i.e. before the current position
* in the window). */
/* Length of longest common prefix between the suffix corresponding to
* this structure and the suffix with rank @prev, or 0 if @prev is
/* Length of longest common prefix between the suffix corresponding to
* this structure and the suffix with rank @prev, or 0 if @prev is
- * ~(input_idx_t)0. */
- input_idx_t lcpprev;
+ * LZ_SARRAY_POS_MAX. Capped to the maximum match length. */
+ lz_sarray_len_t lcpprev;
/* Length of longest common prefix between the suffix corresponding to
* this structure and the suffix with rank @next, or 0 if @next is
/* Length of longest common prefix between the suffix corresponding to
* this structure and the suffix with rank @next, or 0 if @next is
- * ~(input_idx_t)0. */
- input_idx_t lcpnext;
+ * LZ_SARRAY_POS_MAX. Capped to the maximum match length. */
+ lz_sarray_len_t lcpnext;
};
/*-----------------------------------*/
};
/*-----------------------------------*/
extern bool
lz_sarray_init(struct lz_sarray *mf,
extern bool
lz_sarray_init(struct lz_sarray *mf,
- input_idx_t max_window_size,
- input_idx_t min_match_len,
- input_idx_t max_match_len,
+ lz_sarray_pos_t max_window_size,
+ lz_sarray_len_t min_match_len,
+ lz_sarray_len_t max_match_len,
u32 max_matches_to_consider,
u32 max_matches_to_return);
extern u64
u32 max_matches_to_consider,
u32 max_matches_to_return);
extern u64
-lz_sarray_get_needed_memory(input_idx_t max_window_size);
+lz_sarray_get_needed_memory(lz_sarray_pos_t max_window_size);
extern void
lz_sarray_destroy(struct lz_sarray *mf);
extern void
extern void
lz_sarray_destroy(struct lz_sarray *mf);
extern void
-lz_sarray_load_window(struct lz_sarray *mf, const u8 T[], input_idx_t n);
+lz_sarray_load_window(struct lz_sarray *mf, const u8 T[], lz_sarray_pos_t n);
/*-------------------*/
/* Inline functions */
/*-------------------*/
/*-------------------*/
/* Inline functions */
/*-------------------*/
-static _always_inline_attribute input_idx_t
+static _always_inline_attribute lz_sarray_pos_t
lz_sarray_get_pos(const struct lz_sarray *mf)
{
return mf->cur_pos;
lz_sarray_get_pos(const struct lz_sarray *mf)
{
return mf->cur_pos;
/* Advance the suffix array match-finder to the next position. */
static _always_inline_attribute void
/* Advance the suffix array match-finder to the next position. */
static _always_inline_attribute void
-lz_sarray_update_salink(const input_idx_t r, struct salink link[])
+lz_sarray_update_salink(const lz_sarray_pos_t r, struct salink link[])
{
/* next = rank of LOWEST ranked suffix that is ranked HIGHER than the
{
/* next = rank of LOWEST ranked suffix that is ranked HIGHER than the
- * current suffix AND has a LOWER position, or ~(input_idx_t)0 if none
+ * current suffix AND has a LOWER position, or LZ_SARRAY_POS_MAX if none
- const input_idx_t next = link[r].next;
+ const lz_sarray_pos_t next = link[r].next;
/* prev = rank of HIGHEST ranked suffix that is ranked LOWER than the
/* prev = rank of HIGHEST ranked suffix that is ranked LOWER than the
- * current suffix AND has a LOWER position, or ~(input_idx_t)0 if none
+ * current suffix AND has a LOWER position, or LZ_SARRAY_POS_MAX if none
- const input_idx_t prev = link[r].prev;
+ const lz_sarray_pos_t prev = link[r].prev;
/* Link the suffix at the current position into the linked list that
* contains all suffixes referenced by the suffix array that appear at
* or before the current position, sorted by rank. */
/* Link the suffix at the current position into the linked list that
* contains all suffixes referenced by the suffix array that appear at
* or before the current position, sorted by rank. */
- if (next != ~(input_idx_t)0) {
+ if (next != LZ_SARRAY_POS_MAX) {
link[next].prev = r;
link[next].lcpprev = link[r].lcpnext;
}
link[next].prev = r;
link[next].lcpprev = link[r].lcpnext;
}
- if (prev != ~(input_idx_t)0) {
+ if (prev != LZ_SARRAY_POS_MAX) {
link[prev].next = r;
link[prev].lcpnext = link[r].lcpprev;
}
link[prev].next = r;
link[prev].lcpnext = link[r].lcpprev;
}
lz_sarray_update_salink(mf->ISA[mf->cur_pos++], mf->salink);
}
lz_sarray_update_salink(mf->ISA[mf->cur_pos++], mf->salink);
}
-typedef input_idx_t lz_sarray_cost_t;
-#define LZ_SARRAY_INFINITE_COST (~(lz_sarray_cost_t)0)
-
/*
* Use the suffix array match-finder to retrieve a list of matches at the
* current position.
/*
* Use the suffix array match-finder to retrieve a list of matches at the
* current position.
lz_sarray_get_matches(struct lz_sarray *mf,
struct raw_match matches[],
lz_sarray_cost_t (*eval_match_cost)
lz_sarray_get_matches(struct lz_sarray *mf,
struct raw_match matches[],
lz_sarray_cost_t (*eval_match_cost)
- (input_idx_t length,
- input_idx_t offset,
+ (lz_sarray_pos_t length,
+ lz_sarray_pos_t offset,
const void *ctx),
const void *eval_match_cost_ctx)
{
LZ_ASSERT(mf->cur_pos < mf->window_size);
const void *ctx),
const void *eval_match_cost_ctx)
{
LZ_ASSERT(mf->cur_pos < mf->window_size);
- const input_idx_t i = mf->cur_pos++;
+ const lz_sarray_pos_t i = mf->cur_pos++;
- const input_idx_t * const restrict SA = mf->SA;
- const input_idx_t * const restrict ISA = mf->ISA;
+ const lz_sarray_pos_t * const restrict SA = mf->SA;
+ const lz_sarray_pos_t * const restrict ISA = mf->ISA;
struct salink * const restrict link = mf->salink;
struct salink * const restrict link = mf->salink;
- const input_idx_t min_match_len = mf->min_match_len;
+ const lz_sarray_pos_t min_match_len = mf->min_match_len;
const u32 max_matches_to_consider = mf->max_matches_to_consider;
const u32 max_matches_to_return = mf->max_matches_to_return;
/* r = Rank of the suffix at the current position. */
const u32 max_matches_to_consider = mf->max_matches_to_consider;
const u32 max_matches_to_return = mf->max_matches_to_return;
/* r = Rank of the suffix at the current position. */
- const input_idx_t r = ISA[i];
+ const lz_sarray_pos_t r = ISA[i];
/* Prepare for searching the current position. */
lz_sarray_update_salink(r, link);
/* Prepare for searching the current position. */
lz_sarray_update_salink(r, link);
* length on both sides in sync in order to choose the lowest-cost match
* of each length.
*/
* length on both sides in sync in order to choose the lowest-cost match
* of each length.
*/
- input_idx_t L = link[r].prev;
- input_idx_t R = link[r].next;
- input_idx_t lenL = link[r].lcpprev;
- input_idx_t lenR = link[r].lcpnext;
+ lz_sarray_pos_t L = link[r].prev;
+ lz_sarray_pos_t R = link[r].next;
+ lz_sarray_pos_t lenL = link[r].lcpprev;
+ lz_sarray_pos_t lenR = link[r].lcpnext;
/* nmatches = number of matches found so far. */
u32 nmatches = 0;
/* nmatches = number of matches found so far. */
u32 nmatches = 0;
if (--count_remaining == 0)
goto out_save_pending;
if (--count_remaining == 0)
goto out_save_pending;
- input_idx_t offset = i - SA[L];
+ lz_sarray_pos_t offset = i - SA[L];
/* Save match if it has smaller cost. */
cost = (*eval_match_cost)(lenL, offset,
/* Save match if it has smaller cost. */
cost = (*eval_match_cost)(lenL, offset,
if (--count_remaining == 0)
goto out_save_pending;
if (--count_remaining == 0)
goto out_save_pending;
- input_idx_t offset = i - SA[R];
+ lz_sarray_pos_t offset = i - SA[R];
/* Save match if it has smaller cost. */
cost = (*eval_match_cost)(lenR,
/* Save match if it has smaller cost. */
cost = (*eval_match_cost)(lenR,
- * Copyright (c) 2013 Eric Biggers. All rights reserved.
+ * Copyright (c) 2013, 2014 Eric Biggers. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* WARNING: this is for debug use only as it does not necessarily run in linear
* time!!! */
static void
* WARNING: this is for debug use only as it does not necessarily run in linear
* time!!! */
static void
-verify_suffix_array(const input_idx_t SA[restrict],
+verify_suffix_array(const lz_sarray_pos_t SA[restrict],
const u8 T[restrict],
bool found[restrict],
const u8 T[restrict],
bool found[restrict],
{
#ifdef ENABLE_LZ_DEBUG
/* Ensure the SA contains exactly one of each i in [0, n - 1]. */
{
#ifdef ENABLE_LZ_DEBUG
/* Ensure the SA contains exactly one of each i in [0, n - 1]. */
- for (input_idx_t i = 0; i < n; i++)
+ for (lz_sarray_pos_t i = 0; i < n; i++)
- for (input_idx_t r = 0; r < n; r++) {
- input_idx_t i = SA[r];
+ for (lz_sarray_pos_t r = 0; r < n; r++) {
+ lz_sarray_pos_t i = SA[r];
LZ_ASSERT(i < n);
LZ_ASSERT(!found[i]);
found[i] = true;
LZ_ASSERT(i < n);
LZ_ASSERT(!found[i]);
found[i] = true;
/* Ensure the suffix with rank r is lexicographically lesser than the
* suffix with rank (r + 1) for all r in [0, n - 2]. */
/* Ensure the suffix with rank r is lexicographically lesser than the
* suffix with rank (r + 1) for all r in [0, n - 2]. */
- for (input_idx_t r = 0; r < n - 1; r++) {
+ for (lz_sarray_pos_t r = 0; r < n - 1; r++) {
- input_idx_t i1 = SA[r];
- input_idx_t i2 = SA[r + 1];
+ lz_sarray_pos_t i1 = SA[r];
+ lz_sarray_pos_t i2 = SA[r + 1];
- input_idx_t n1 = n - i1;
- input_idx_t n2 = n - i2;
+ lz_sarray_pos_t n1 = n - i1;
+ lz_sarray_pos_t n2 = n - i2;
int res = memcmp(&T[i1], &T[i2], min(n1, n2));
LZ_ASSERT(res < 0 || (res == 0 && n1 < n2));
int res = memcmp(&T[i1], &T[i2], min(n1, n2));
LZ_ASSERT(res < 0 || (res == 0 && n1 < n2));
* the inverse suffix array is a mapping from suffix position to suffix rank.
*/
static void
* the inverse suffix array is a mapping from suffix position to suffix rank.
*/
static void
-compute_inverse_suffix_array(input_idx_t ISA[restrict],
- const input_idx_t SA[restrict],
- input_idx_t n)
+compute_inverse_suffix_array(lz_sarray_pos_t ISA[restrict],
+ const lz_sarray_pos_t SA[restrict],
+ lz_sarray_pos_t n)
for (r = 0; r < n; r++)
ISA[SA[r]] = r;
for (r = 0; r < n; r++)
ISA[SA[r]] = r;
* take into account that with bytes in the real world, there is no unique
* symbol at the end of the string. */
static void
* take into account that with bytes in the real world, there is no unique
* symbol at the end of the string. */
static void
-compute_lcp_array(input_idx_t LCP[restrict],
- const input_idx_t SA[restrict],
- const input_idx_t ISA[restrict],
+compute_lcp_array(lz_sarray_pos_t LCP[restrict],
+ const lz_sarray_pos_t SA[restrict],
+ const lz_sarray_pos_t ISA[restrict],
- input_idx_t h, i, r, j, lim;
+ lz_sarray_pos_t h, i, r, j, lim;
h = 0;
for (i = 0; i < n; i++) {
h = 0;
for (i = 0; i < n; i++) {
* WARNING: this is for debug use only as it does not necessarily run in linear
* time!!! */
static void
* WARNING: this is for debug use only as it does not necessarily run in linear
* time!!! */
static void
-verify_lcp_array(input_idx_t LCP[restrict],
- const input_idx_t SA[restrict],
+verify_lcp_array(lz_sarray_pos_t LCP[restrict],
+ const lz_sarray_pos_t SA[restrict],
- for (input_idx_t r = 0; r < n - 1; r++) {
- input_idx_t i1 = SA[r];
- input_idx_t i2 = SA[r + 1];
- input_idx_t lcp = LCP[r + 1];
+ for (lz_sarray_pos_t r = 0; r < n - 1; r++) {
+ lz_sarray_pos_t i1 = SA[r];
+ lz_sarray_pos_t i2 = SA[r + 1];
+ lz_sarray_pos_t lcp = LCP[r + 1];
- input_idx_t n1 = n - i1;
- input_idx_t n2 = n - i2;
+ lz_sarray_pos_t n1 = n - i1;
+ lz_sarray_pos_t n2 = n - i2;
LZ_ASSERT(lcp <= min(n1, n2));
LZ_ASSERT(lcp <= min(n1, n2));
*/
static void
init_salink(struct salink link[restrict],
*/
static void
init_salink(struct salink link[restrict],
- const input_idx_t LCP[restrict],
- const input_idx_t SA[restrict],
+ const lz_sarray_pos_t LCP[restrict],
+ const lz_sarray_pos_t SA[restrict],
- input_idx_t n,
- input_idx_t max_match_len)
+ lz_sarray_pos_t n,
+ lz_sarray_len_t max_match_len)
{
/* Compute salink.next and salink.lcpnext. */
{
/* Compute salink.next and salink.lcpnext. */
- link[n - 1].next = ~(input_idx_t)0;
+ link[n - 1].next = LZ_SARRAY_POS_MAX;
- for (input_idx_t r = n - 2; r != ~(input_idx_t)0; r--) {
- input_idx_t t = r + 1;
- input_idx_t l = LCP[t];
- while (t != ~(input_idx_t)0 && SA[t] > SA[r]) {
+ for (lz_sarray_pos_t r = n - 2; r != LZ_SARRAY_POS_MAX; r--) {
+ lz_sarray_pos_t t = r + 1;
+ lz_sarray_pos_t l = LCP[t];
+ while (t != LZ_SARRAY_POS_MAX && SA[t] > SA[r]) {
l = min(l, link[t].lcpnext);
t = link[t].next;
}
l = min(l, link[t].lcpnext);
t = link[t].next;
}
}
/* Compute salink.prev and salink.lcpprev. */
}
/* Compute salink.prev and salink.lcpprev. */
- link[0].prev = ~(input_idx_t)0;
+ link[0].prev = LZ_SARRAY_POS_MAX;
- for (input_idx_t r = 1; r < n; r++) {
- input_idx_t t = r - 1;
- input_idx_t l = LCP[r];
- while (t != ~(input_idx_t)0 && SA[t] > SA[r]) {
+ for (lz_sarray_pos_t r = 1; r < n; r++) {
+ lz_sarray_pos_t t = r - 1;
+ lz_sarray_pos_t l = LCP[r];
+ while (t != LZ_SARRAY_POS_MAX && SA[t] > SA[r]) {
l = min(l, link[t].lcpprev);
t = link[t].prev;
}
l = min(l, link[t].lcpprev);
t = link[t].prev;
}
* time!!! */
static void
verify_salink(const struct salink link[],
* time!!! */
static void
verify_salink(const struct salink link[],
- const input_idx_t SA[],
+ const lz_sarray_pos_t SA[],
- input_idx_t n,
- input_idx_t max_match_len)
+ lz_sarray_pos_t n,
+ lz_sarray_len_t max_match_len)
- for (input_idx_t r = 0; r < n; r++) {
- for (input_idx_t prev = r; ; ) {
+ for (lz_sarray_pos_t r = 0; r < n; r++) {
+ for (lz_sarray_pos_t prev = r; ; ) {
- LZ_ASSERT(link[r].prev == ~(input_idx_t)0);
+ LZ_ASSERT(link[r].prev == LZ_SARRAY_POS_MAX);
LZ_ASSERT(link[r].lcpprev == 0);
break;
}
LZ_ASSERT(link[r].lcpprev == 0);
break;
}
- for (input_idx_t next = r; ; ) {
+ for (lz_sarray_pos_t next = r; ; ) {
- LZ_ASSERT(link[r].next == ~(input_idx_t)0);
+ LZ_ASSERT(link[r].next == LZ_SARRAY_POS_MAX);
LZ_ASSERT(link[r].lcpnext == 0);
break;
}
LZ_ASSERT(link[r].lcpnext == 0);
break;
}
* any memory that may have been allocated.
*
* @max_window_size
* any memory that may have been allocated.
*
* @max_window_size
- * The maximum window size to support.
+ * The maximum window size to support. This must be greater than 0.
- * In the current implementation, the memory needed will be
- * (6 * sizeof(input_idx_t) * @max_window_size) bytes.
- * For (sizeof(input_idx_t) == 4) that's 24 times the window size.
- *
- * Memory is saved by saving neither the original window nor the LCP
- * (Longest Common Prefix) array; otherwise 29 times the window size would
- * be required. (In practice the compressor will likely keep the original
- * window around anyway, although based on this property of the
- * match-finder it theoretically it could overwrite it with the compressed
- * data.)
+ * The amount of needed memory will depend on this value; see
+ * lz_sarray_get_needed_memory() for details.
- * The minimum length of each match to be found.
+ * The minimum length of each match to be found. Must be greater than 0.
- * The maximum length of each match to be found.
+ * The maximum length of each match to be found. Must be greater than or
+ * equal to @min_match_len.
*
* @max_matches_to_consider
* The maximum number of matches to consider at each position. This should
*
* @max_matches_to_consider
* The maximum number of matches to consider at each position. This should
*/
bool
lz_sarray_init(struct lz_sarray *mf,
*/
bool
lz_sarray_init(struct lz_sarray *mf,
- input_idx_t max_window_size,
- input_idx_t min_match_len,
- input_idx_t max_match_len,
+ lz_sarray_pos_t max_window_size,
+ lz_sarray_len_t min_match_len,
+ lz_sarray_len_t max_match_len,
u32 max_matches_to_consider,
u32 max_matches_to_return)
{
u32 max_matches_to_consider,
u32 max_matches_to_return)
{
+ LZ_ASSERT(min_match_len > 0);
+ LZ_ASSERT(max_window_size > 0);
+ LZ_ASSERT(max_match_len >= min_match_len);
+
mf->max_window_size = max_window_size;
mf->min_match_len = min_match_len;
mf->max_match_len = max_match_len;
mf->max_window_size = max_window_size;
mf->min_match_len = min_match_len;
mf->max_match_len = max_match_len;
mf->max_matches_to_return = max_matches_to_return;
/* SA and ISA will share the same storage block. */
mf->max_matches_to_return = max_matches_to_return;
/* SA and ISA will share the same storage block. */
+ if ((u64)2 * max_window_size * sizeof(mf->SA[0]) !=
+ 2 * max_window_size * sizeof(mf->SA[0]))
+ return false;
mf->SA = MALLOC(2 * max_window_size * sizeof(mf->SA[0]));
if (mf->SA == NULL)
return false;
mf->SA = MALLOC(2 * max_window_size * sizeof(mf->SA[0]));
if (mf->SA == NULL)
return false;
+ if ((u64)max_window_size * sizeof(mf->salink[0]) !=
+ max_window_size * sizeof(mf->salink[0]))
+ return false;
mf->salink = MALLOC(max_window_size * sizeof(mf->salink[0]));
if (mf->salink == NULL)
return false;
mf->salink = MALLOC(max_window_size * sizeof(mf->salink[0]));
if (mf->salink == NULL)
return false;
+/*
+ * Return the number of bytes of memory that lz_sarray_init() would allocate for
+ * the specified maximum window size.
+ *
+ * This should be (20 * @max_window_size) unless the type definitions have been
+ * changed.
+ */
-lz_sarray_get_needed_memory(input_idx_t max_window_size)
+lz_sarray_get_needed_memory(lz_sarray_pos_t max_window_size)
- return (u64)6 * sizeof(input_idx_t) * max_window_size;
+ u64 size = 0;
+
+ /* SA and ISA: 8 bytes per position */
+ size += (u64)max_window_size * 2 * sizeof(((struct lz_sarray*)0)->SA[0]);
+
+ /* salink: 12 bytes per position */
+ size += (u64)max_window_size * sizeof(((struct lz_sarray*)0)->salink[0]);
+
+ return size;
* This function runs in linear time (relative to @n).
*/
void
* This function runs in linear time (relative to @n).
*/
void
-lz_sarray_load_window(struct lz_sarray *mf, const u8 T[], input_idx_t n)
+lz_sarray_load_window(struct lz_sarray *mf, const u8 T[], lz_sarray_pos_t n)
- input_idx_t *ISA, *LCP;
+ lz_sarray_pos_t *ISA, *LCP;
LZ_ASSERT(n > 0 && n <= mf->max_window_size);
LZ_ASSERT(n > 0 && n <= mf->max_window_size);
>= 256 * sizeof(saidx_t));
LZ_ASSERT(mf->max_window_size * sizeof(mf->salink[0])
>= 256 * 256 * sizeof(saidx_t));
>= 256 * sizeof(saidx_t));
LZ_ASSERT(mf->max_window_size * sizeof(mf->salink[0])
>= 256 * 256 * sizeof(saidx_t));
- BUILD_BUG_ON(sizeof(input_idx_t) != sizeof(saidx_t));
+ BUILD_BUG_ON(sizeof(lz_sarray_pos_t) != sizeof(saidx_t));
divsufsort(T, mf->SA, n, (saidx_t*)&mf->SA[n], (saidx_t*)mf->salink);
divsufsort(T, mf->SA, n, (saidx_t*)&mf->SA[n], (saidx_t*)mf->salink);
* end. This is probably worth it because computing the ISA from the SA
* is very fast, and since this match-finder is memory-hungry we'd like
* to save as much memory as possible. */
* end. This is probably worth it because computing the ISA from the SA
* is very fast, and since this match-finder is memory-hungry we'd like
* to save as much memory as possible. */
- ISA = (input_idx_t*)mf->salink;
+ ISA = (lz_sarray_pos_t*)mf->salink;
compute_inverse_suffix_array(ISA, mf->SA, n);
/* Compute LCP (Longest Common Prefix) array. */
compute_inverse_suffix_array(ISA, mf->SA, n);
/* Compute LCP (Longest Common Prefix) array. */
.optim_array_length = 1024,
};
.optim_array_length = 1024,
};
+static bool
+lzms_params_valid(const struct wimlib_compressor_params_header *);
+
static const struct wimlib_lzms_compressor_params *
lzms_get_params(const struct wimlib_compressor_params_header *_params)
{
static const struct wimlib_lzms_compressor_params *
lzms_get_params(const struct wimlib_compressor_params_header *_params)
{
if (params == NULL)
params = &lzms_default;
if (params == NULL)
params = &lzms_default;
+ LZMS_ASSERT(lzms_params_valid(¶ms->hdr));
+
if (!lz_sarray_init(&ctx->lz_sarray, max_block_size,
params->min_match_length,
if (!lz_sarray_init(&ctx->lz_sarray, max_block_size,
params->min_match_length,
- params->max_match_length,
+ min(params->max_match_length, LZ_SARRAY_LEN_MAX),
params->max_search_depth,
params->max_matches_per_pos))
goto oom;
params->max_search_depth,
params->max_matches_per_pos))
goto oom;