From e940fda88a92ff9e931ec88fb4c0e1ebd6fa2dfb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 29 Dec 2013 19:37:39 -0600 Subject: [PATCH] Separate suffix array match-finder from LZX compressor --- Makefile.am | 6 +- README | 6 +- include/wimlib/compress_common.h | 23 -- include/wimlib/lz.h | 30 ++ include/wimlib/lz_hash.h | 30 ++ include/wimlib/lz_sarray.h | 362 ++++++++++++++++++++++ src/{lz77.c => lz_hash.c} | 4 +- src/lz_sarray.c | 237 +++++++++++++++ src/lzms-compress.c | 1 + src/lzx-compress.c | 496 ++----------------------------- src/xpress-compress.c | 1 + tests/test-imagex | 12 +- tests/test-imagex-mount | 16 +- 13 files changed, 713 insertions(+), 511 deletions(-) create mode 100644 include/wimlib/lz.h create mode 100644 include/wimlib/lz_hash.h create mode 100644 include/wimlib/lz_sarray.h rename src/{lz77.c => lz_hash.c} (99%) create mode 100644 src/lz_sarray.c diff --git a/Makefile.am b/Makefile.am index 29071d12..52ba9670 100644 --- a/Makefile.am +++ b/Makefile.am @@ -41,7 +41,8 @@ libwim_la_SOURCES = \ src/lzms-common.c \ src/lzms-compress.c \ src/lzms-decompress.c \ - src/lz77.c \ + src/lz_hash.c \ + src/lz_sarray.c \ src/divsufsort/divsufsort.c \ src/divsufsort/divsufsort.h \ src/divsufsort/divsufsort_private.h \ @@ -90,6 +91,9 @@ libwim_la_SOURCES = \ include/wimlib/integrity.h \ include/wimlib/list.h \ include/wimlib/lookup_table.h \ + include/wimlib/lz.h \ + include/wimlib/lz_hash.h \ + include/wimlib/lz_sarray.h \ include/wimlib/lzms.h \ include/wimlib/lzx.h \ include/wimlib/metadata.h \ diff --git a/README b/README index ab94e55c..abfbeafb 100644 --- a/README +++ b/README @@ -276,9 +276,9 @@ suffix array construction code from divsufsort (https://code.google.com/p/libdivsufsort/) and algorithms from 7-Zip as well as several published papers. -lz77.c contains a hash-table-based LZ77 matchfinder that is based on code from -zlib but has been rewritten. This code is applicable to XPRESS, LZX, and LZMS, -all of which are partly based on LZ77 compression. +lz_hash.c contains a hash-table-based LZ77 matchfinder that is based on code +from zlib but has been rewritten. This code is applicable to XPRESS, LZX, and +LZMS, all of which are partly based on LZ77 compression. A limited number of other free programs can handle some parts of the WIM file format: diff --git a/include/wimlib/compress_common.h b/include/wimlib/compress_common.h index 14b32f48..666f10dc 100644 --- a/include/wimlib/compress_common.h +++ b/include/wimlib/compress_common.h @@ -64,29 +64,6 @@ bitstream_put_bits(struct output_bitstream *ostream, extern void bitstream_put_byte(struct output_bitstream *ostream, u8 n); -struct lz_params { - unsigned min_match; - unsigned max_match; - unsigned max_offset; - unsigned nice_match; - unsigned good_match; - unsigned max_chain_len; - unsigned max_lazy_match; - unsigned too_far; -}; - -typedef void (*lz_record_match_t)(unsigned len, unsigned offset, void *ctx); -typedef void (*lz_record_literal_t)(u8 lit, void *ctx); - -extern void -lz_analyze_block(const u8 window[restrict], - input_idx_t window_size, - lz_record_match_t record_match, - lz_record_literal_t record_literal, - void *record_ctx, - const struct lz_params *params, - input_idx_t prev_tab[restrict]); - extern void make_canonical_huffman_code(unsigned num_syms, unsigned max_codeword_len, diff --git a/include/wimlib/lz.h b/include/wimlib/lz.h new file mode 100644 index 00000000..4206f144 --- /dev/null +++ b/include/wimlib/lz.h @@ -0,0 +1,30 @@ +#ifndef _WIMLIB_LZ_H +#define _WIMLIB_LZ_H + +#include "wimlib/compress_common.h" + +//#define ENABLE_LZ_DEBUG +#ifdef ENABLE_LZ_DEBUG +# define LZ_DEBUG DEBUG +# define LZ_ASSERT wimlib_assert +# include "wimlib/assert.h" +# include "wimlib/error.h" +#else +# define LZ_DEBUG(...) +# define LZ_ASSERT(...) +#endif + + +/* Raw LZ match/literal format: just a length and offset. + * + * The length is the number of bytes of the match, and the offset is the number + * of bytes back in the input the match is from the current position. + * + * This can alternatively be used to represent a literal byte if @len is less + * than the minimum match length. */ +struct raw_match { + input_idx_t len; + input_idx_t offset; +}; + +#endif /* _WIMLIB_LZ_H */ diff --git a/include/wimlib/lz_hash.h b/include/wimlib/lz_hash.h new file mode 100644 index 00000000..9d097ae6 --- /dev/null +++ b/include/wimlib/lz_hash.h @@ -0,0 +1,30 @@ +#ifndef _WIMLIB_LZ_HASH_H +#define _WIMLIB_LZ_HASH_H + +#include "wimlib/compress_common.h" + +struct lz_params { + unsigned min_match; + unsigned max_match; + unsigned max_offset; + unsigned nice_match; + unsigned good_match; + unsigned max_chain_len; + unsigned max_lazy_match; + unsigned too_far; +}; + +typedef void (*lz_record_match_t)(unsigned len, unsigned offset, void *ctx); +typedef void (*lz_record_literal_t)(u8 lit, void *ctx); + +extern void +lz_analyze_block(const u8 window[restrict], + input_idx_t window_size, + lz_record_match_t record_match, + lz_record_literal_t record_literal, + void *record_ctx, + const struct lz_params *params, + input_idx_t prev_tab[restrict]); + + +#endif /* _WIMLIB_LZ_HASH_H */ diff --git a/include/wimlib/lz_sarray.h b/include/wimlib/lz_sarray.h new file mode 100644 index 00000000..d2d49d31 --- /dev/null +++ b/include/wimlib/lz_sarray.h @@ -0,0 +1,362 @@ +/* + * lz_sarray.h + * + * Suffix array match-finder for LZ (Lempel-Ziv) compression. + */ + +/* + * Copyright (C) 2013 Eric Biggers + * + * This file is part of wimlib, a library for working with WIM files. + * + * wimlib is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. + * + * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with wimlib; if not, see http://www.gnu.org/licenses/. + */ + +#ifndef _WIMLIB_LZ_SARRAY_H +#define _WIMLIB_LZ_SARRAY_H + +#include "wimlib/compiler.h" +#include "wimlib/lz.h" +#include "wimlib/types.h" + +struct salink; + +/* Suffix array LZ (Lempel-Ziv) match-finder. */ +struct lz_sarray { + /* Allocated window size for the match-finder. */ + input_idx_t max_window_size; + + /* Minimum match length to return. */ + input_idx_t min_match_len; + + /* Maximum match length to return. */ + input_idx_t max_match_len; + + /* Maximum matches to consider at each position (max search depth). */ + u32 max_matches_to_consider; + + /* Maximum number of matches to return at each position. */ + u32 max_matches_to_return; + + /* Current position in the window */ + input_idx_t cur_pos; + + /* Current window size. */ + input_idx_t window_size; + + /* Suffix array for window. + * This is a mapping from suffix rank to suffix position. */ + input_idx_t *SA; + + /* Inverse suffix array for window. + * This is a mapping from suffix position to suffix rank. + * If 0 <= r < window_size, then ISA[SA[r]] == r. */ + input_idx_t *ISA; + + /* Longest common prefix array corresponding to the suffix array SA. + * LCP[i] is the length of the longest common prefix between the + * suffixes with positions SA[i - 1] and SA[i]. LCP[0] is undefined. + */ + input_idx_t *LCP; + + /* Suffix array links. + * + * During a linear scan of the input string to find matches, this array + * used to keep track of which rank suffixes in the suffix array appear + * before the current position. Instead of searching in the original + * suffix array, scans for matches at a given position traverse a linked + * list containing only suffixes that appear before that position. */ + struct salink *salink; +}; + +/* Suffix array link */ +struct salink { + /* Rank of highest ranked suffix that has rank lower than the suffix + * corresponding to this structure and either has a lower position + * (initially) or has a position lower than the highest position at + * which matches have been searched for so far, or -1 if there is no + * such suffix. */ + input_idx_t prev; + + /* Rank of lowest ranked suffix that has rank greater than the suffix + * corresponding to this structure and either has a lower position + * (intially) or has a position lower than the highest position at which + * matches have been searched for so far, or -1 if there is no such + * suffix. */ + input_idx_t next; + + /* Length of longest common prefix between the suffix corresponding to + * this structure and the suffix with rank @prev, or 0 if @prev is -1. + */ + input_idx_t lcpprev; + + /* Length of longest common prefix between the suffix corresponding to + * this structure and the suffix with rank @next, or 0 if @next is -1. + */ + input_idx_t lcpnext; +}; + +extern bool +lz_sarray_init(struct lz_sarray *mf, + input_idx_t max_window_size, + input_idx_t min_match_len, + input_idx_t max_match_len, + u32 max_matches_to_consider, + u32 max_matches_to_return); + +extern void +lz_sarray_destroy(struct lz_sarray *mf); + +extern void +lz_sarray_load_window(struct lz_sarray *mf, const u8 window[], + input_idx_t window_size); + +static inline input_idx_t +lz_sarray_get_pos(const struct lz_sarray *mf) +{ + return mf->cur_pos; +} + +/* Advance the suffix array match-finder to the next position. */ +static _always_inline_attribute void +lz_sarray_update_salink(const input_idx_t i, + const input_idx_t SA[const restrict], + const input_idx_t ISA[const restrict], + struct salink link[const restrict]) +{ + /* r = Rank of the suffix at the current position. */ + const input_idx_t r = ISA[i]; + + /* next = rank of LOWEST ranked suffix that is ranked HIGHER than the + * current suffix AND has a LOWER position, or -1 if none exists. */ + const input_idx_t next = link[r].next; + + /* prev = rank of HIGHEST ranked suffix that is ranked LOWER than the + * current suffix AND has a LOWER position, or -1 if none exists. */ + const input_idx_t prev = link[r].prev; + + /* Link the suffix at the current position into the linked list that + * contains all suffixes in the suffix array that are appear at or + * before the current position, sorted by rank. + * + * Save the values of all fields we overwrite so that rollback is + * possible. */ + if (next != ~(input_idx_t)0) { + + link[next].prev = r; + link[next].lcpprev = link[r].lcpnext; + } + + if (prev != ~(input_idx_t)0) { + + link[prev].next = r; + link[prev].lcpnext = link[r].lcpprev; + } +} + +/* Skip the current position in the suffix array match-finder. */ +static _always_inline_attribute void +lz_sarray_skip_position(struct lz_sarray *mf) +{ + LZ_ASSERT(mf->cur_pos < mf->window_size); + lz_sarray_update_salink(mf->cur_pos++, mf->SA, mf->ISA, mf->salink); +} + +typedef input_idx_t lz_sarray_cost_t; +#define LZ_SARRAY_INFINITE_COST (~(lz_sarray_cost_t)0) + +/* + * Use the suffix array match-finder to retrieve a list of LZ matches at the + * current position. + * + * Returns the number of matches written into @matches. The matches are + * returned in decreasing order by length, and each will be of unique length + * between the minimum and maximum match lengths passed to lz_sarray_init(). Up + * to @max_matches_to_return (passed to lz_sarray_init()) matches will be + * returned. + * + * @eval_match_cost is a function for evaluating the cost of a match when + * deciding which ones to return. It is only used for comparing matches of the + * same length. It needs to be fast, and need not be exact; an implementation + * might simply rank matches by their offset, for example, although + * implementations may choose to take into account additional information such + * as repeat offsets. + */ +static _always_inline_attribute u32 +lz_sarray_get_matches(struct lz_sarray *mf, + struct raw_match matches[], + lz_sarray_cost_t (*eval_match_cost) + (input_idx_t length, + input_idx_t offset, + const void *ctx), + const void *eval_match_cost_ctx) +{ + LZ_ASSERT(mf->cur_pos < mf->window_size); + const input_idx_t i = mf->cur_pos++; + + const input_idx_t * const restrict SA = mf->SA; + const input_idx_t * const restrict ISA = mf->ISA; + struct salink * const restrict link = mf->salink; + const input_idx_t min_match_len = mf->min_match_len; + const u32 max_matches_to_consider = mf->max_matches_to_consider; + const u32 max_matches_to_return = mf->max_matches_to_return; + + /* r = Rank of the suffix at the current position. */ + const input_idx_t r = ISA[i]; + + /* Prepare for searching the current position. */ + lz_sarray_update_salink(i, SA, ISA, link); + + /* L = rank of next suffix to the left; + * R = rank of next suffix to the right; + * lenL = length of match between current position and the suffix with rank L; + * lenR = length of match between current position and the suffix with rank R. + * + * This is left and right relative to the rank of the current suffix. + * Since the suffixes in the suffix array are sorted, the longest + * matches are immediately to the left and right (using the linked list + * to ignore all suffixes that occur later in the window). The match + * length decreases the farther left and right we go. We shall keep the + * length on both sides in sync in order to choose the lowest-cost match + * of each length. + */ + input_idx_t L = link[r].prev; + input_idx_t R = link[r].next; + input_idx_t lenL = link[r].lcpprev; + input_idx_t lenR = link[r].lcpnext; + + /* nmatches = number of matches found so far. */ + u32 nmatches = 0; + + /* best_cost = cost of lowest-cost match found so far. + * + * We keep track of this so that we can ignore shorter matches that do + * not have lower costs than a longer matches already found. + */ + lz_sarray_cost_t best_cost = LZ_SARRAY_INFINITE_COST; + + /* count_remaining = maximum number of possible matches remaining to be + * considered. */ + u32 count_remaining = max_matches_to_consider; + + /* pending = match currently being considered for a specific length. */ + struct raw_match pending; + lz_sarray_cost_t pending_cost; + + while (lenL >= min_match_len || lenR >= min_match_len) + { + pending.len = lenL; + pending_cost = LZ_SARRAY_INFINITE_COST; + lz_sarray_cost_t cost; + + /* Extend left. */ + if (lenL >= min_match_len && lenL >= lenR) { + for (;;) { + + if (--count_remaining == 0) + goto out_save_pending; + + input_idx_t offset = i - SA[L]; + + /* Save match if it has smaller cost. */ + cost = (*eval_match_cost)(lenL, offset, + eval_match_cost_ctx); + if (cost < pending_cost) { + pending.offset = offset; + pending_cost = cost; + } + + if (link[L].lcpprev < lenL) { + /* Match length decreased. */ + + lenL = link[L].lcpprev; + + /* Save the pending match unless the + * right side still may have matches of + * this length to be scanned, or if a + * previous (longer) match had lower + * cost. */ + if (pending.len > lenR) { + if (pending_cost < best_cost) { + best_cost = pending_cost; + matches[nmatches++] = pending; + if (nmatches == max_matches_to_return) + return nmatches; + } + pending.len = lenL; + pending_cost = LZ_SARRAY_INFINITE_COST; + } + if (lenL < min_match_len || lenL < lenR) + break; + } + L = link[L].prev; + } + } + + pending.len = lenR; + + /* Extend right. */ + if (lenR >= min_match_len && lenR > lenL) { + for (;;) { + + if (--count_remaining == 0) + goto out_save_pending; + + input_idx_t offset = i - SA[R]; + + /* Save match if it has smaller cost. */ + cost = (*eval_match_cost)(lenR, + offset, + eval_match_cost_ctx); + if (cost < pending_cost) { + pending.offset = offset; + pending_cost = cost; + } + + if (link[R].lcpnext < lenR) { + /* Match length decreased. */ + + lenR = link[R].lcpnext; + + /* Save the pending match unless a + * previous (longer) match had lower + * cost. */ + if (pending_cost < best_cost) { + matches[nmatches++] = pending; + best_cost = pending_cost; + if (nmatches == max_matches_to_return) + return nmatches; + } + + if (lenR < min_match_len || lenR <= lenL) + break; + + pending.len = lenR; + pending_cost = LZ_SARRAY_INFINITE_COST; + } + R = link[R].next; + } + } + } + goto out; + +out_save_pending: + if (pending_cost != LZ_SARRAY_INFINITE_COST) + matches[nmatches++] = pending; + +out: + return nmatches; +} + +#endif /* _WIMLIB_LZ_SARRAY_H */ diff --git a/src/lz77.c b/src/lz_hash.c similarity index 99% rename from src/lz77.c rename to src/lz_hash.c index b5495da7..ac469f20 100644 --- a/src/lz77.c +++ b/src/lz_hash.c @@ -1,5 +1,5 @@ /* - * lz77.c + * lz_hash.c * * This file provides the code to analyze a buffer of uncompressed data for * matches, as per the LZ77 algorithm. It uses a hash table to accelerate the @@ -30,7 +30,7 @@ # include #endif -#include "wimlib/compress_common.h" +#include "wimlib/lz_hash.h" #include "wimlib/util.h" #include diff --git a/src/lz_sarray.c b/src/lz_sarray.c new file mode 100644 index 00000000..79fca69f --- /dev/null +++ b/src/lz_sarray.c @@ -0,0 +1,237 @@ +/* + * lz_sarray.c + * + * Suffix array match-finder for LZ (Lempel-Ziv) compression. + */ + +/* + * Copyright (C) 2013 Eric Biggers + * + * This file is part of wimlib, a library for working with WIM files. + * + * wimlib is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. + * + * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with wimlib; if not, see http://www.gnu.org/licenses/. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wimlib/lz_sarray.h" +#include "wimlib/util.h" +#include "divsufsort/divsufsort.h" +#include + +/* Initialize the suffix array match-finder with the specified parameters. + * + * After initialization, it can be used for any number of input strings of + * length less than or equal to @max_window_size. */ +bool +lz_sarray_init(struct lz_sarray *mf, + input_idx_t max_window_size, + input_idx_t min_match_len, + input_idx_t max_match_len, + u32 max_matches_to_consider, + u32 max_matches_to_return) +{ + mf->max_window_size = max_window_size; + mf->min_match_len = min_match_len; + mf->max_match_len = max_match_len; + mf->max_matches_to_consider = max_matches_to_consider; + mf->max_matches_to_return = max_matches_to_return; + + mf->SA = MALLOC(3U * max_window_size * sizeof(mf->SA[0])); + if (mf->SA == NULL) + return false; + + mf->salink = MALLOC(max_window_size * sizeof(mf->salink[0])); + if (mf->salink == NULL) + return false; + + return true; +} + +/* Free memory allocated for the suffix array match-finder. */ +void +lz_sarray_destroy(struct lz_sarray *mf) +{ + FREE(mf->SA); + FREE(mf->salink); +} + +/* Initialize the suffix array match-finder for the specified input. */ +void +lz_sarray_load_window(struct lz_sarray *mf, const u8 window[], + input_idx_t window_size) +{ + /* Load variables */ + const u8 * const restrict T = window; + const input_idx_t n = window_size; + const input_idx_t max_match_len = mf->max_match_len; + input_idx_t * const restrict SA = mf->SA; + input_idx_t * const restrict ISA = mf->ISA = SA + window_size; + input_idx_t * const restrict LCP = mf->LCP = ISA + window_size; + struct salink * const restrict link = mf->salink; + + /* Compute SA (Suffix Array). */ + { + /* ISA and link are used as temporary space. */ + LZ_ASSERT(mf->max_window_size * sizeof(ISA[0]) >= 256 * sizeof(saidx_t)); + LZ_ASSERT(mf->max_window_size * 2 * sizeof(link[0]) >= 256 * 256 * sizeof(saidx_t)); + + if (sizeof(input_idx_t) == sizeof(saidx_t)) { + divsufsort(T, SA, n, (saidx_t*)ISA, (saidx_t*)link); + } else { + saidx_t sa[n]; + divsufsort(T, sa, n, (saidx_t*)ISA, (saidx_t*)link); + for (input_idx_t i = 0; i < n; i++) + SA[i] = sa[i]; + } + } + +#ifdef ENABLE_LZ_DEBUG + + LZ_ASSERT(n > 0); + + /* Verify suffix array. */ + { + bool found[n]; + ZERO_ARRAY(found); + for (input_idx_t r = 0; r < n; r++) { + input_idx_t i = SA[r]; + LZ_ASSERT(i < n); + LZ_ASSERT(!found[i]); + found[i] = true; + } + } + + for (input_idx_t r = 0; r < n - 1; r++) { + + input_idx_t i1 = SA[r]; + input_idx_t i2 = SA[r + 1]; + + input_idx_t n1 = n - i1; + input_idx_t n2 = n - i2; + + LZ_ASSERT(memcmp(&T[i1], &T[i2], min(n1, n2)) <= 0); + } + LZ_DEBUG("Verified SA (len %u)", n); +#endif /* ENABLE_LZ_DEBUG */ + + /* Compute ISA (Inverse Suffix Array) */ + for (input_idx_t r = 0; r < n; r++) + ISA[SA[r]] = r; + + /* Compute LCP (longest common prefix) array. + * + * Algorithm adapted from Kasai et al. 2001: "Linear-Time + * Longest-Common-Prefix Computation in Suffix Arrays and Its + * Applications". */ + { + input_idx_t h = 0; + for (input_idx_t i = 0; i < n; i++) { + input_idx_t r = ISA[i]; + if (r > 0) { + input_idx_t j = SA[r - 1]; + + input_idx_t lim = min(n - i, n - j); + + while (h < lim && T[i + h] == T[j + h]) + h++; + LCP[r] = h; + if (h > 0) + h--; + } + } + } + +#ifdef ENABLE_LZ_DEBUG + /* Verify LCP array. */ + for (input_idx_t r = 0; r < n - 1; r++) { + LZ_ASSERT(ISA[SA[r]] == r); + LZ_ASSERT(ISA[SA[r + 1]] == r + 1); + + input_idx_t i1 = SA[r]; + input_idx_t i2 = SA[r + 1]; + input_idx_t lcp = LCP[r + 1]; + + input_idx_t n1 = n - i1; + input_idx_t n2 = n - i2; + + LZ_ASSERT(lcp <= min(n1, n2)); + + LZ_ASSERT(memcmp(&T[i1], &T[i2], lcp) == 0); + if (lcp < min(n1, n2)) + LZ_ASSERT(T[i1 + lcp] != T[i2 + lcp]); + } +#endif /* ENABLE_LZ_DEBUG */ + + /* Compute salink.next and salink.lcpnext. + * + * Algorithm adapted from Crochemore et al. 2009: + * "LPF computation revisited". + * + * Note: we cap lcpnext to the maximum match length so that the + * match-finder need not worry about it later. */ + link[n - 1].next = ~(input_idx_t)0; + link[n - 1].prev = ~(input_idx_t)0; + link[n - 1].lcpnext = 0; + link[n - 1].lcpprev = 0; + for (input_idx_t r = n - 2; r != ~(input_idx_t)0; r--) { + input_idx_t t = r + 1; + input_idx_t l = LCP[t]; + while (t != ~(input_idx_t)0 && SA[t] > SA[r]) { + l = min(l, link[t].lcpnext); + t = link[t].next; + } + link[r].next = t; + link[r].lcpnext = min(l, max_match_len); + LZ_ASSERT(t == ~(input_idx_t)0 || l <= n - SA[t]); + LZ_ASSERT(l <= n - SA[r]); + if (t == ~(input_idx_t)0) + LZ_ASSERT(l == 0); + else + LZ_ASSERT(memcmp(&T[SA[r]], &T[SA[t]], l) == 0); + } + + /* Compute salink.prev and salink.lcpprev. + * + * Algorithm adapted from Crochemore et al. 2009: + * "LPF computation revisited". + * + * Note: we cap lcpprev to the maximum match length so that the + * match-finder need not worry about it later. */ + link[0].prev = ~(input_idx_t)0; + link[0].next = ~(input_idx_t)0; + link[0].lcpprev = 0; + link[0].lcpnext = 0; + for (input_idx_t r = 1; r < n; r++) { + input_idx_t t = r - 1; + input_idx_t l = LCP[r]; + while (t != ~(input_idx_t)0 && SA[t] > SA[r]) { + l = min(l, link[t].lcpprev); + t = link[t].prev; + } + link[r].prev = t; + link[r].lcpprev = min(l, max_match_len); + LZ_ASSERT(t == ~(input_idx_t)0 || l <= n - SA[t]); + LZ_ASSERT(l <= n - SA[r]); + if (t == ~(input_idx_t)0) + LZ_ASSERT(l == 0); + else + LZ_ASSERT(memcmp(&T[SA[r]], &T[SA[t]], l) == 0); + } + + mf->cur_pos = 0; + mf->window_size = n; +} diff --git a/src/lzms-compress.c b/src/lzms-compress.c index af7f85b6..f7ef633e 100644 --- a/src/lzms-compress.c +++ b/src/lzms-compress.c @@ -39,6 +39,7 @@ #include "wimlib/compress_common.h" #include "wimlib/endianness.h" #include "wimlib/error.h" +#include "wimlib/lz_hash.h" #include "wimlib/lzms.h" #include "wimlib/util.h" diff --git a/src/lzx-compress.c b/src/lzx-compress.c index 6b520ae5..58806632 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -158,6 +158,8 @@ #include "wimlib/compress_common.h" #include "wimlib/endianness.h" #include "wimlib/error.h" +#include "wimlib/lz_hash.h" +#include "wimlib/lz_sarray.h" #include "wimlib/lzx.h" #include "wimlib/util.h" #include @@ -168,8 +170,6 @@ # include "wimlib/decompress_common.h" #endif -#include "divsufsort/divsufsort.h" - typedef u32 block_cost_t; #define INFINITE_BLOCK_COST ((block_cost_t)~0U) @@ -239,18 +239,6 @@ struct lzx_match { u32 data; }; -/* Raw LZ match/literal format: just a length and offset. - * - * The length is the number of bytes of the match, and the offset is the number - * of bytes back in the input the match is from the current position. - * - * If @len < LZX_MIN_MATCH_LEN, then it's really just a literal byte and @offset is - * meaningless. */ -struct raw_match { - u16 len; - input_idx_t offset; -}; - /* Specification for an LZX block. */ struct lzx_block_spec { @@ -321,33 +309,6 @@ struct lzx_optimal { struct lzx_lru_queue queue; }; -/* Suffix array link */ -struct salink { - /* Rank of highest ranked suffix that has rank lower than the suffix - * corresponding to this structure and either has a lower position - * (initially) or has a position lower than the highest position at - * which matches have been searched for so far, or -1 if there is no - * such suffix. */ - input_idx_t prev; - - /* Rank of lowest ranked suffix that has rank greater than the suffix - * corresponding to this structure and either has a lower position - * (intially) or has a position lower than the highest position at which - * matches have been searched for so far, or -1 if there is no such - * suffix. */ - input_idx_t next; - - /* Length of longest common prefix between the suffix corresponding to - * this structure and the suffix with rank @prev, or 0 if @prev is -1. - */ - input_idx_t lcpprev; - - /* Length of longest common prefix between the suffix corresponding to - * this structure and the suffix with rank @next, or 0 if @next is -1. - */ - input_idx_t lcpnext; -}; - /* State of the LZX compressor. */ struct lzx_compressor { @@ -407,29 +368,8 @@ struct lzx_compressor { /* Fast algorithm only: Array of hash table links. */ input_idx_t *prev_tab; - /* Suffix array for window. - * This is a mapping from suffix rank to suffix position. */ - input_idx_t *SA; - - /* Inverse suffix array for window. - * This is a mapping from suffix position to suffix rank. - * If 0 <= r < window_size, then ISA[SA[r]] == r. */ - input_idx_t *ISA; - - /* Longest common prefix array corresponding to the suffix array SA. - * LCP[i] is the length of the longest common prefix between the - * suffixes with positions SA[i - 1] and SA[i]. LCP[0] is undefined. - */ - input_idx_t *LCP; - - /* Suffix array links. - * - * During a linear scan of the input string to find matches, this array - * used to keep track of which rank suffixes in the suffix array appear - * before the current position. Instead of searching in the original - * suffix array, scans for matches at a given position traverse a linked - * list containing only suffixes that appear before that position. */ - struct salink *salink; + /* Slow algorithm only: Suffix array match-finder. */ + struct lz_sarray lz_sarray; /* Position in window of next match to return. */ input_idx_t match_window_pos; @@ -1178,16 +1118,17 @@ lzx_match_cost(unsigned length, unsigned offset, const struct lzx_costs *costs, /* Fast heuristic cost evaluation to use in the inner loop of the match-finder. * Unlike lzx_match_cost() which does a true cost evaluation, this simply * prioritize matches based on their offset. */ -static block_cost_t -lzx_match_cost_fast(unsigned offset, const struct lzx_lru_queue *queue) +static input_idx_t +lzx_match_cost_fast(input_idx_t length, input_idx_t offset, const void *_queue) { + const struct lzx_lru_queue *queue = _queue; + /* It seems well worth it to take the time to give priority to recently * used offsets. */ - for (unsigned i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) + for (input_idx_t i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) if (offset == queue->R[i]) return i; - BUILD_BUG_ON(LZX_MAX_WINDOW_SIZE >= (block_cost_t)~0U); return offset; } @@ -1228,219 +1169,6 @@ lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens) } } -/* Advance the suffix array match-finder to the next position. */ -static void -lzx_lz_update_salink(input_idx_t i, - const input_idx_t SA[restrict], - const input_idx_t ISA[restrict], - struct salink link[restrict]) -{ - /* r = Rank of the suffix at the current position. */ - const input_idx_t r = ISA[i]; - - /* next = rank of LOWEST ranked suffix that is ranked HIGHER than the - * current suffix AND has a LOWER position, or -1 if none exists. */ - const input_idx_t next = link[r].next; - - /* prev = rank of HIGHEST ranked suffix that is ranked LOWER than the - * current suffix AND has a LOWER position, or -1 if none exists. */ - const input_idx_t prev = link[r].prev; - - /* Link the suffix at the current position into the linked list that - * contains all suffixes in the suffix array that are appear at or - * before the current position, sorted by rank. - * - * Save the values of all fields we overwrite so that rollback is - * possible. */ - if (next != (input_idx_t)~0U) { - - link[next].prev = r; - link[next].lcpprev = link[r].lcpnext; - } - - if (prev != (input_idx_t)~0U) { - - link[prev].next = r; - link[prev].lcpnext = link[r].lcpprev; - } -} - -/* - * Use the suffix array match-finder to retrieve a list of LZ matches at the - * current position. - * - * [in] @i Current position in the window. - * [in] @SA Suffix array for the window. - * [in] @ISA Inverse suffix array for the window. - * [inout] @link Suffix array links used internally by the match-finder. - * [out] @matches The (length, offset) pairs of the resulting matches will - * be written here, sorted in decreasing order by - * length. All returned lengths will be unique. - * [in] @queue Recently used match offsets, used when evaluating the - * cost of matches. - * [in] @min_match_len Minimum match length to return. - * [in] @max_matches_to_consider Maximum number of matches to consider at - * the position. - * [in] @max_matches_to_return Maximum number of matches to return. - * - * The return value is the number of matches found and written to @matches. - */ -static unsigned -lzx_lz_get_matches(const input_idx_t i, - const input_idx_t SA[const restrict], - const input_idx_t ISA[const restrict], - struct salink link[const restrict], - struct raw_match matches[const restrict], - const struct lzx_lru_queue * const restrict queue, - const unsigned min_match_len, - const u32 max_matches_to_consider, - const u32 max_matches_to_return) -{ - /* r = Rank of the suffix at the current position. */ - const input_idx_t r = ISA[i]; - - /* Prepare for searching the current position. */ - lzx_lz_update_salink(i, SA, ISA, link); - - /* L = rank of next suffix to the left; - * R = rank of next suffix to the right; - * lenL = length of match between current position and the suffix with rank L; - * lenR = length of match between current position and the suffix with rank R. - * - * This is left and right relative to the rank of the current suffix. - * Since the suffixes in the suffix array are sorted, the longest - * matches are immediately to the left and right (using the linked list - * to ignore all suffixes that occur later in the window). The match - * length decreases the farther left and right we go. We shall keep the - * length on both sides in sync in order to choose the lowest-cost match - * of each length. - */ - input_idx_t L = link[r].prev; - input_idx_t R = link[r].next; - input_idx_t lenL = link[r].lcpprev; - input_idx_t lenR = link[r].lcpnext; - - /* nmatches = number of matches found so far. */ - unsigned nmatches = 0; - - /* best_cost = cost of lowest-cost match found so far. - * - * We keep track of this so that we can ignore shorter matches that do - * not have lower costs than a longer matches already found. - */ - block_cost_t best_cost = INFINITE_BLOCK_COST; - - /* count_remaining = maximum number of possible matches remaining to be - * considered. */ - u32 count_remaining = max_matches_to_consider; - - /* pending = match currently being considered for a specific length. */ - struct raw_match pending; - block_cost_t pending_cost; - - while (lenL >= min_match_len || lenR >= min_match_len) - { - pending.len = lenL; - pending_cost = INFINITE_BLOCK_COST; - block_cost_t cost; - - /* Extend left. */ - if (lenL >= min_match_len && lenL >= lenR) { - for (;;) { - - if (--count_remaining == 0) - goto out_save_pending; - - input_idx_t offset = i - SA[L]; - - /* Save match if it has smaller cost. */ - cost = lzx_match_cost_fast(offset, queue); - if (cost < pending_cost) { - pending.offset = offset; - pending_cost = cost; - } - - if (link[L].lcpprev < lenL) { - /* Match length decreased. */ - - lenL = link[L].lcpprev; - - /* Save the pending match unless the - * right side still may have matches of - * this length to be scanned, or if a - * previous (longer) match had lower - * cost. */ - if (pending.len > lenR) { - if (pending_cost < best_cost) { - best_cost = pending_cost; - matches[nmatches++] = pending; - if (nmatches == max_matches_to_return) - return nmatches; - } - pending.len = lenL; - pending_cost = INFINITE_BLOCK_COST; - } - if (lenL < min_match_len || lenL < lenR) - break; - } - L = link[L].prev; - } - } - - pending.len = lenR; - - /* Extend right. */ - if (lenR >= min_match_len && lenR > lenL) { - for (;;) { - - if (--count_remaining == 0) - goto out_save_pending; - - input_idx_t offset = i - SA[R]; - - /* Save match if it has smaller cost. */ - cost = lzx_match_cost_fast(offset, queue); - if (cost < pending_cost) { - pending.offset = offset; - pending_cost = cost; - } - - if (link[R].lcpnext < lenR) { - /* Match length decreased. */ - - lenR = link[R].lcpnext; - - /* Save the pending match unless a - * previous (longer) match had lower - * cost. */ - if (pending_cost < best_cost) { - matches[nmatches++] = pending; - best_cost = pending_cost; - if (nmatches == max_matches_to_return) - return nmatches; - } - - if (lenR < min_match_len || lenR <= lenL) - break; - - pending.len = lenR; - pending_cost = INFINITE_BLOCK_COST; - } - R = link[R].next; - } - } - } - goto out; - -out_save_pending: - if (pending_cost != INFINITE_BLOCK_COST) - matches[nmatches++] = pending; - -out: - return nmatches; -} - - /* Tell the match-finder to skip the specified number of bytes (@n) in the * input. */ static void @@ -1456,9 +1184,10 @@ lzx_lz_skip_bytes(struct lzx_compressor *ctx, unsigned n) } else { while (n--) { ctx->cached_matches[ctx->cached_matches_pos++].len = 0; - lzx_lz_update_salink(ctx->match_window_pos++, ctx->SA, - ctx->ISA, ctx->salink); + lz_sarray_skip_position(&ctx->lz_sarray); + ctx->match_window_pos++; } + LZX_ASSERT(lz_sarray_get_pos(&ctx->lz_sarray) == ctx->match_window_pos); } } @@ -1481,24 +1210,11 @@ lzx_lz_get_matches_caching(struct lzx_compressor *ctx, if (ctx->matches_cached) { num_matches = matches[-1].len; } else { - unsigned min_match_len = LZX_MIN_MATCH_LEN; - if (!ctx->params.alg_params.slow.use_len2_matches) - min_match_len = max(min_match_len, 3); - const u32 max_search_depth = ctx->params.alg_params.slow.max_search_depth; - const u32 max_matches_per_pos = ctx->params.alg_params.slow.max_matches_per_pos; - - if (unlikely(max_search_depth == 0 || max_matches_per_pos == 0)) - num_matches = 0; - else - num_matches = lzx_lz_get_matches(ctx->match_window_pos, - ctx->SA, - ctx->ISA, - ctx->salink, - matches, - queue, - min_match_len, - max_search_depth, - max_matches_per_pos); + LZX_ASSERT(lz_sarray_get_pos(&ctx->lz_sarray) == ctx->match_window_pos); + num_matches = lz_sarray_get_matches(&ctx->lz_sarray, + matches, + lzx_match_cost_fast, + queue); matches[-1].len = num_matches; } ctx->cached_matches_pos += num_matches + 1; @@ -1898,169 +1614,12 @@ lzx_optimize_blocks(struct lzx_compressor *ctx) lzx_optimize_block(ctx, &ctx->block_specs[i], num_passes); } -/* Initialize the suffix array match-finder for the specified input. */ -static void -lzx_lz_init_matchfinder(const u8 T[const restrict], - const input_idx_t n, - input_idx_t SA[const restrict], - input_idx_t ISA[const restrict], - input_idx_t LCP[const restrict], - struct salink link[const restrict], - const unsigned max_match_len) -{ - /* Compute SA (Suffix Array). */ - - { - /* ISA and link are used as temporary space. */ - BUILD_BUG_ON(LZX_MIN_WINDOW_SIZE * sizeof(ISA[0]) < 256 * sizeof(saidx_t)); - BUILD_BUG_ON(LZX_MIN_WINDOW_SIZE * 2 * sizeof(link[0]) < 256 * 256 * sizeof(saidx_t)); - - if (sizeof(input_idx_t) == sizeof(saidx_t)) { - divsufsort(T, SA, n, (saidx_t*)ISA, (saidx_t*)link); - } else { - saidx_t sa[n]; - divsufsort(T, sa, n, (saidx_t*)ISA, (saidx_t*)link); - for (input_idx_t i = 0; i < n; i++) - SA[i] = sa[i]; - } - } - -#ifdef ENABLE_LZX_DEBUG - - LZX_ASSERT(n > 0); - - /* Verify suffix array. */ - { - bool found[n]; - ZERO_ARRAY(found); - for (input_idx_t r = 0; r < n; r++) { - input_idx_t i = SA[r]; - LZX_ASSERT(i < n); - LZX_ASSERT(!found[i]); - found[i] = true; - } - } - - for (input_idx_t r = 0; r < n - 1; r++) { - - input_idx_t i1 = SA[r]; - input_idx_t i2 = SA[r + 1]; - - input_idx_t n1 = n - i1; - input_idx_t n2 = n - i2; - - LZX_ASSERT(memcmp(&T[i1], &T[i2], min(n1, n2)) <= 0); - } - LZX_DEBUG("Verified SA (len %u)", n); -#endif /* ENABLE_LZX_DEBUG */ - - /* Compute ISA (Inverse Suffix Array) */ - for (input_idx_t r = 0; r < n; r++) - ISA[SA[r]] = r; - - /* Compute LCP (longest common prefix) array. - * - * Algorithm adapted from Kasai et al. 2001: "Linear-Time - * Longest-Common-Prefix Computation in Suffix Arrays and Its - * Applications". */ - { - input_idx_t h = 0; - for (input_idx_t i = 0; i < n; i++) { - input_idx_t r = ISA[i]; - if (r > 0) { - input_idx_t j = SA[r - 1]; - - input_idx_t lim = min(n - i, n - j); - - while (h < lim && T[i + h] == T[j + h]) - h++; - LCP[r] = h; - if (h > 0) - h--; - } - } - } - -#ifdef ENABLE_LZX_DEBUG - /* Verify LCP array. */ - for (input_idx_t r = 0; r < n - 1; r++) { - LZX_ASSERT(ISA[SA[r]] == r); - LZX_ASSERT(ISA[SA[r + 1]] == r + 1); - - input_idx_t i1 = SA[r]; - input_idx_t i2 = SA[r + 1]; - input_idx_t lcp = LCP[r + 1]; - - input_idx_t n1 = n - i1; - input_idx_t n2 = n - i2; - - LZX_ASSERT(lcp <= min(n1, n2)); - - LZX_ASSERT(memcmp(&T[i1], &T[i2], lcp) == 0); - if (lcp < min(n1, n2)) - LZX_ASSERT(T[i1 + lcp] != T[i2 + lcp]); - } -#endif /* ENABLE_LZX_DEBUG */ - - /* Compute salink.next and salink.lcpnext. - * - * Algorithm adapted from Crochemore et al. 2009: - * "LPF computation revisited". - * - * Note: we cap lcpnext to the maximum match length so that the - * match-finder need not worry about it later. */ - link[n - 1].next = (input_idx_t)~0U; - link[n - 1].prev = (input_idx_t)~0U; - link[n - 1].lcpnext = 0; - link[n - 1].lcpprev = 0; - for (input_idx_t r = n - 2; r != (input_idx_t)~0U; r--) { - input_idx_t t = r + 1; - input_idx_t l = LCP[t]; - while (t != (input_idx_t)~0 && SA[t] > SA[r]) { - l = min(l, link[t].lcpnext); - t = link[t].next; - } - link[r].next = t; - link[r].lcpnext = min(l, max_match_len); - LZX_ASSERT(t == (input_idx_t)~0U || l <= n - SA[t]); - LZX_ASSERT(l <= n - SA[r]); - LZX_ASSERT(memcmp(&T[SA[r]], &T[SA[t]], l) == 0); - } - - /* Compute salink.prev and salink.lcpprev. - * - * Algorithm adapted from Crochemore et al. 2009: - * "LPF computation revisited". - * - * Note: we cap lcpprev to the maximum match length so that the - * match-finder need not worry about it later. */ - link[0].prev = (input_idx_t)~0; - link[0].next = (input_idx_t)~0; - link[0].lcpprev = 0; - link[0].lcpnext = 0; - for (input_idx_t r = 1; r < n; r++) { - input_idx_t t = r - 1; - input_idx_t l = LCP[r]; - while (t != (input_idx_t)~0 && SA[t] > SA[r]) { - l = min(l, link[t].lcpprev); - t = link[t].prev; - } - link[r].prev = t; - link[r].lcpprev = min(l, max_match_len); - LZX_ASSERT(t == (input_idx_t)~0 || l <= n - SA[t]); - LZX_ASSERT(l <= n - SA[r]); - LZX_ASSERT(memcmp(&T[SA[r]], &T[SA[t]], l) == 0); - } -} - /* Prepare the input window into one or more LZX blocks ready to be output. */ static void lzx_prepare_blocks(struct lzx_compressor * ctx) { /* Initialize the match-finder. */ - lzx_lz_init_matchfinder(ctx->window, ctx->window_size, - ctx->SA, ctx->ISA, ctx->LCP, ctx->salink, - LZX_MAX_MATCH_LEN); + lz_sarray_load_window(&ctx->lz_sarray, ctx->window, ctx->window_size); ctx->cached_matches_pos = 0; ctx->matches_cached = false; ctx->match_window_pos = 0; @@ -2341,8 +1900,7 @@ lzx_free_compressor(void *_ctx) FREE(ctx->chosen_matches); FREE(ctx->cached_matches); FREE(ctx->optimum); - FREE(ctx->salink); - FREE(ctx->SA); + lz_sarray_destroy(&ctx->lz_sarray); FREE(ctx->block_specs); FREE(ctx->prev_tab); FREE(ctx->window); @@ -2434,14 +1992,16 @@ lzx_create_compressor(size_t window_size, goto oom; if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) { - ctx->SA = MALLOC(3U * window_size * sizeof(ctx->SA[0])); - if (ctx->SA == NULL) - goto oom; - ctx->ISA = ctx->SA + window_size; - ctx->LCP = ctx->ISA + window_size; + unsigned min_match_len = LZX_MIN_MATCH_LEN; + if (!ctx->params.alg_params.slow.use_len2_matches) + min_match_len = max(min_match_len, 3); - ctx->salink = MALLOC(window_size * sizeof(ctx->salink[0])); - if (ctx->salink == NULL) + if (!lz_sarray_init(&ctx->lz_sarray, + window_size, + min_match_len, + LZX_MAX_MATCH_LEN, + params->alg_params.slow.max_search_depth, + params->alg_params.slow.max_matches_per_pos)) goto oom; } diff --git a/src/xpress-compress.c b/src/xpress-compress.c index 2637ce93..b1638877 100644 --- a/src/xpress-compress.c +++ b/src/xpress-compress.c @@ -34,6 +34,7 @@ #include "wimlib/compressor_ops.h" #include "wimlib/compress_common.h" #include "wimlib/error.h" +#include "wimlib/lz_hash.h" #include "wimlib/util.h" #include "wimlib/xpress.h" diff --git a/tests/test-imagex b/tests/test-imagex index 177cd2b3..4ae0540c 100755 --- a/tests/test-imagex +++ b/tests/test-imagex @@ -272,13 +272,13 @@ fi if ! diff -q -r tmp/dir tmp/myname || ! diff -q -r dir tmp/dir; then error "Recursive diff of applied WIM with original directory failed" fi -if test "`get_link_count tmp/dir/lz77.c`" != 1; then +if test "`get_link_count tmp/dir/write.c`" != 1; then error "Incorrect link count on extracted file" fi -if test "`get_link_count tmp/myname/lz77.c`" != 1; then +if test "`get_link_count tmp/myname/write.c`" != 1; then error "Incorrect link count on extracted file" fi -if test "`get_inode_number tmp/myname/lz77.c`" = "`get_inode_number tmp/dir/lz77.c`"; then +if test "`get_inode_number tmp/myname/write.c`" = "`get_inode_number tmp/dir/write.c`"; then error "Incorrect inode number" fi rm -rf tmp @@ -289,13 +289,13 @@ fi if ! diff -q -r tmp/dir tmp/myname || ! diff -q -r dir tmp/dir; then error "Recursive diff of applied WIM with original directory failed" fi -if test "`get_link_count tmp/dir/lz77.c`" != 2; then +if test "`get_link_count tmp/dir/write.c`" != 2; then error "Incorrect link count on extracted file" fi -if test "`get_link_count tmp/myname/lz77.c`" != 2; then +if test "`get_link_count tmp/myname/write.c`" != 2; then error "Incorrect link count on extracted file" fi -if test "`get_inode_number tmp/myname/lz77.c`" != "`get_inode_number tmp/dir/lz77.c`"; then +if test "`get_inode_number tmp/myname/write.c`" != "`get_inode_number tmp/dir/write.c`"; then error "Incorrect inode number" fi rm -rf tmp diff --git a/tests/test-imagex-mount b/tests/test-imagex-mount index d5bfbcad..49585e8a 100755 --- a/tests/test-imagex-mount +++ b/tests/test-imagex-mount @@ -75,24 +75,24 @@ for flag in "--compress=none" "--compress=maximum" "--compress=fast"; do "loaded, or you aren't a member of the FUSE group?" fi echo "Testing extracting file from mounted read-only WIM" - if ! cp tmp/lz77.c lz77.c; then + if ! cp tmp/write.c write.c; then error "Failed to extract file from read-only mounted WIM" fi - if ! diff -q dir/lz77.c lz77.c; then + if ! diff -q dir/write.c write.c; then error "Extracted file does not match copy in mounted WIM" fi - if ! diff -q tmp/lz77.c dir/lz77.c; then + if ! diff -q tmp/write.c dir/write.c; then error "Extractef file does not match original" fi - rm -f lz77.c + rm -f write.c echo "Testing modifying mounted read-only WIM (should fail)" - if rm tmp/lz77.c; then + if rm tmp/write.c; then error "Removing file from read-only mounted WIM didn't fail" fi if touch tmp/newfile; then error "Creating file on read-only mounted WIM didn't fail" fi - if echo 3 > tmp/lz77.c; then + if echo 3 > tmp/write.c; then error "Writing to file on read-only mounted WIM didn't fail" fi echo "Testing diff of mounted read-only WIM with original directory" @@ -137,10 +137,10 @@ echo "Testing removing file from mounted WIM" if ! imagex mountrw dir.wim dir tmp; then error "Failed to re-mount test WIM read-write" fi -if ! rm tmp/lz77.c; then +if ! rm tmp/write.c; then error "Failed to remove file from read-write mounted WIM" fi -if test -f tmp/lz77.c; then +if test -f tmp/write.c; then error "Removing file from read-write mounted WIM failed" fi echo "Testing making directory in mounted WIM" -- 2.43.0