X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=include%2Fwimlib%2Flz_sarray.h;h=5b63ae7c74e9dcafbb075d7ba2ab4559ac36284d;hp=76e6a08467f58c2996e3dec5497ea173d93ce1ef;hb=55d02ebb05358214d12cd98136fe55ca6019f33c;hpb=390e5c189dec754f8672d35947bd7405ddb29aac diff --git a/include/wimlib/lz_sarray.h b/include/wimlib/lz_sarray.h index 76e6a084..5b63ae7c 100644 --- a/include/wimlib/lz_sarray.h +++ b/include/wimlib/lz_sarray.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2013 Eric Biggers. All rights reserved. + * Copyright (c) 2013, 2014 Eric Biggers. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -35,12 +35,34 @@ #define _WIMLIB_LZ_SARRAY_H #include "wimlib/compiler.h" /* must define '_always_inline_attribute' */ -#include "wimlib/lz.h" /* must define 'struct raw_match', 'input_idx_t', - and LZ_ASSERT(). */ -#include "wimlib/types.h" /* must define 'bool', 'u8', 'u32' */ +#include "wimlib/lz.h" /* must define 'struct raw_match' and LZ_ASSERT() */ +#include "wimlib/types.h" /* must define 'bool', 'u8', 'u16, and 'u32' */ struct salink; +/* Position type --- must be an unsigned type large enough to hold the length of + * longest window for which the suffix array match-finder will be used. */ +typedef u32 lz_sarray_pos_t; + +/* Length type --- must be an unsigned type large enough to hold the maximum + * match length. */ +typedef u16 lz_sarray_len_t; + +/* Cost type, for the user-provided match cost evaluation function. */ +typedef lz_sarray_pos_t lz_sarray_cost_t; + +/* Type of distances in suffix array links. A larger type would allow skipping + * irrelevant suffixes more quickly, which is especially helpful towards the + * start of the window. However, even a single byte allows skipping 255 at a + * time, which where it matters is already a big improvement over the + * alternative of searching the suffixes consecutively. */ +typedef u8 lz_sarray_delta_t; + +#define LZ_SARRAY_LEN_MAX ((lz_sarray_len_t)~0UL) +#define LZ_SARRAY_POS_MAX ((lz_sarray_pos_t)~0UL) +#define LZ_SARRAY_DELTA_MAX ((lz_sarray_delta_t)~0UL) +#define LZ_SARRAY_INFINITE_COST ((lz_sarray_cost_t)~0UL) + /* State of the suffix array LZ (Lempel-Ziv) match-finder. * * This is defined here for benefit of the inlined code. It's not intended for @@ -54,13 +76,13 @@ struct lz_sarray { * sufficient to find matches. This number is the maximum length of * those arrays, or also the maximum window (block) size that can be * passed to lz_sarray_load_window(). */ - input_idx_t max_window_size; + lz_sarray_pos_t max_window_size; - /* Minimum match length to return. */ - input_idx_t min_match_len; + /* Minimum length of matches to return. */ + lz_sarray_len_t min_match_len; - /* Maximum match length to return. */ - input_idx_t max_match_len; + /* Maximum length of matches to return. */ + lz_sarray_len_t max_match_len; /* Maximum matches to consider at each position (max search depth). */ u32 max_matches_to_consider; @@ -69,19 +91,19 @@ struct lz_sarray { u32 max_matches_to_return; /* Current position in the window. */ - input_idx_t cur_pos; + lz_sarray_pos_t cur_pos; /* Current window size. */ - input_idx_t window_size; + lz_sarray_pos_t window_size; /* Suffix array for the current window. * This is a mapping from suffix rank to suffix position. */ - input_idx_t *SA; + lz_sarray_pos_t *SA; /* Inverse suffix array for the current window. * This is a mapping from suffix position to suffix rank. * If 0 <= r < window_size, then ISA[SA[r]] == r. */ - input_idx_t *ISA; + lz_sarray_pos_t *ISA; /* Suffix array links. * @@ -89,74 +111,109 @@ struct lz_sarray { * used to keep track of which rank suffixes in the suffix array appear * before the current position. Instead of searching in the original * suffix array, scans for matches at a given position traverse a linked - * list containing only suffixes that appear before that position. */ + * list containing (usually) only suffixes that appear before that + * position. */ struct salink *salink; }; -/* Suffix array link; one of these exists for each position in the suffix array. - */ +/* Suffix array link. An array of these structures, one per suffix rank, is + * used as a replacement for the raw LCP (Longest Common Prefix) array to allow + * skipping over suffixes that appear later in the window and hence cannot be + * used as LZ77 matches. */ struct salink { - /* Rank of highest ranked suffix that has rank lower than the suffix - * corresponding to this structure and either has a lower position - * (initially) or has a position lower than the highest position at - * which matches have been searched for so far, or ~(input_idx_t)0 if - * there is no such suffix. - * - * Think of this as a pointer to the closest position in the suffix - * array to the left that corresponds to a suffix that begins at a - * position in the current dictionary (i.e. before the current position - * in the window). */ - input_idx_t prev; - - /* Rank of lowest ranked suffix that has rank greater than the suffix - * corresponding to this structure and either has a lower position - * (intially) or has a position lower than the highest position at which - * matches have been searched for so far, or ~(input_idx_t)0 if there is - * no such suffix. - - * Think of this as a pointer to the closest position in the suffix - * array to the right that corresponds to a suffix that begins at a - * position in the current dictionary (i.e. before the current position - * in the window). */ - input_idx_t next; - - /* Length of longest common prefix between the suffix corresponding to - * this structure and the suffix with rank @prev, or 0 if @prev is - * ~(input_idx_t)0. */ - input_idx_t lcpprev; - - /* Length of longest common prefix between the suffix corresponding to - * this structure and the suffix with rank @next, or 0 if @next is - * ~(input_idx_t)0. */ - input_idx_t lcpnext; + union { + /* Temporary fields used while this structure is being + * initialized. + * + * Note: we want the entire `struct salink' to be only 6 bytes, + * even though this makes "next_initial" unaligned. */ + struct { + lz_sarray_pos_t next_initial; + lz_sarray_len_t lcpnext_initial; + } _packed_attribute; + + struct { + /* Intially, the length, in bytes, of the longest common + * prefix (LCP) between the suffix having this rank and + * the suffix with the the smallest larger rank that + * starts earlier in the window than the suffix having + * this rank. If no such suffix exists, this will be 0. + * + * Later, during match-finding, after the corresponding + * suffix has entered the LZ77 dictionary, this value + * may be updated by lz_sarray_update_salink() to refer + * instead to a lexicographically closer (but still + * larger) suffix that begins at a later position that + * has entered the LZ77 dictionary. */ + lz_sarray_len_t lcpnext; + + /* Initially, the length, in bytes, of the longest + * common prefix (LCP) between the suffix having this + * rank and the suffix with the the largest smaller rank + * that starts earlier in the window than the suffix + * having this rank. If no such suffix exists, this + * will be 0. + * + * Later, during match-finding, after the corresponding + * suffix has entered the LZ77 dictionary, this value + * may be updated by lz_sarray_update_salink() to refer + * instead to a lexicographically closer (but still + * smaller) suffix that begins at a later position that + * has entered the LZ77 dictionary. */ + lz_sarray_len_t lcpprev; + + /* Distance to the suffix referred to in the description + * of "lcpnext" above, but capped to a maximum value to + * save memory; or, 0 if no such suffix exists. If the + * true distance was truncated, this will give the + * distance to the rank of a suffix that is + * lexicographically closer to the current suffix than + * the desired suffix, but appears *later* in the window + * and hence cannot be used as the basis for a LZ77 + * match. */ + lz_sarray_delta_t dist_to_next; + + /* Distance to the suffix referred to in the description + * of "lcpprev" above, but capped to a maximum value to + * save memory; or, 0 if no such suffix exists. If the + * true distance was truncated, this will give the + * distance to the rank of a suffix that is + * lexicographically closer to the current suffix than + * the desired suffix, but appears *later* in the window + * and hence cannot be used as the basis for a LZ77 + * match. */ + lz_sarray_delta_t dist_to_prev; + }; + }; }; + /*-----------------------------------*/ /* Functions defined in lz_sarray.c */ /*-----------------------------------*/ extern bool lz_sarray_init(struct lz_sarray *mf, - input_idx_t max_window_size, - input_idx_t min_match_len, - input_idx_t max_match_len, + lz_sarray_pos_t max_window_size, + lz_sarray_len_t min_match_len, + lz_sarray_len_t max_match_len, u32 max_matches_to_consider, u32 max_matches_to_return); extern u64 -lz_sarray_get_needed_memory(input_idx_t max_window_size); +lz_sarray_get_needed_memory(lz_sarray_pos_t max_window_size); extern void lz_sarray_destroy(struct lz_sarray *mf); extern void -lz_sarray_load_window(struct lz_sarray *mf, const u8 T[], input_idx_t n); +lz_sarray_load_window(struct lz_sarray *mf, const u8 T[], lz_sarray_pos_t n); /*-------------------*/ /* Inline functions */ /*-------------------*/ -static _always_inline_attribute input_idx_t +static _always_inline_attribute lz_sarray_pos_t lz_sarray_get_pos(const struct lz_sarray *mf) { return mf->cur_pos; @@ -164,28 +221,18 @@ lz_sarray_get_pos(const struct lz_sarray *mf) /* Advance the suffix array match-finder to the next position. */ static _always_inline_attribute void -lz_sarray_update_salink(const input_idx_t r, struct salink link[]) +lz_sarray_update_salink(const lz_sarray_pos_t r, struct salink link[]) { - /* next = rank of LOWEST ranked suffix that is ranked HIGHER than the - * current suffix AND has a LOWER position, or ~(input_idx_t)0 if none - * exists. */ - const input_idx_t next = link[r].next; - - /* prev = rank of HIGHEST ranked suffix that is ranked LOWER than the - * current suffix AND has a LOWER position, or ~(input_idx_t)0 if none - * exists. */ - const input_idx_t prev = link[r].prev; - - /* Link the suffix at the current position into the linked list that - * contains all suffixes referenced by the suffix array that appear at - * or before the current position, sorted by rank. */ - if (next != ~(input_idx_t)0) { - link[next].prev = r; + const lz_sarray_pos_t next = r + link[r].dist_to_next; + const lz_sarray_pos_t prev = r - link[r].dist_to_prev; + + if (next != r && link[r].dist_to_next < link[next].dist_to_prev) { + link[next].dist_to_prev = link[r].dist_to_next; link[next].lcpprev = link[r].lcpnext; } - if (prev != ~(input_idx_t)0) { - link[prev].next = r; + if (prev != r && link[r].dist_to_prev < link[prev].dist_to_next) { + link[prev].dist_to_next = link[r].dist_to_prev; link[prev].lcpnext = link[r].lcpprev; } } @@ -198,9 +245,6 @@ lz_sarray_skip_position(struct lz_sarray *mf) lz_sarray_update_salink(mf->ISA[mf->cur_pos++], mf->salink); } -typedef input_idx_t lz_sarray_cost_t; -#define LZ_SARRAY_INFINITE_COST (~(lz_sarray_cost_t)0) - /* * Use the suffix array match-finder to retrieve a list of matches at the * current position. @@ -221,23 +265,22 @@ static _always_inline_attribute u32 lz_sarray_get_matches(struct lz_sarray *mf, struct raw_match matches[], lz_sarray_cost_t (*eval_match_cost) - (input_idx_t length, - input_idx_t offset, + (lz_sarray_pos_t length, + lz_sarray_pos_t offset, const void *ctx), const void *eval_match_cost_ctx) { LZ_ASSERT(mf->cur_pos < mf->window_size); - const input_idx_t i = mf->cur_pos++; + const lz_sarray_pos_t i = mf->cur_pos++; - const input_idx_t * const restrict SA = mf->SA; - const input_idx_t * const restrict ISA = mf->ISA; + const lz_sarray_pos_t * const restrict SA = mf->SA; + const lz_sarray_pos_t * const restrict ISA = mf->ISA; struct salink * const restrict link = mf->salink; - const input_idx_t min_match_len = mf->min_match_len; const u32 max_matches_to_consider = mf->max_matches_to_consider; const u32 max_matches_to_return = mf->max_matches_to_return; /* r = Rank of the suffix at the current position. */ - const input_idx_t r = ISA[i]; + const lz_sarray_pos_t r = ISA[i]; /* Prepare for searching the current position. */ lz_sarray_update_salink(r, link); @@ -255,131 +298,205 @@ lz_sarray_get_matches(struct lz_sarray *mf, * length on both sides in sync in order to choose the lowest-cost match * of each length. */ - input_idx_t L = link[r].prev; - input_idx_t R = link[r].next; - input_idx_t lenL = link[r].lcpprev; - input_idx_t lenR = link[r].lcpnext; + lz_sarray_pos_t L = r - link[r].dist_to_prev; + lz_sarray_pos_t R = r + link[r].dist_to_next; + lz_sarray_pos_t lenL = link[r].lcpprev; + lz_sarray_pos_t lenR = link[r].lcpnext; /* nmatches = number of matches found so far. */ u32 nmatches = 0; /* best_cost = cost of lowest-cost match found so far. * - * We keep track of this so that we can ignore shorter matches that do - * not have lower costs than longer matches already found. */ + * Shorter matches that do not have a lower cost than this are + * discarded, since presumably it would be cheaper to output the bytes + * from the longer match instead. */ lz_sarray_cost_t best_cost = LZ_SARRAY_INFINITE_COST; /* count_remaining = maximum number of possible matches remaining to be * considered. */ u32 count_remaining = max_matches_to_consider; - /* pending = match currently being considered for a specific length. */ - struct raw_match pending; - lz_sarray_cost_t pending_cost; - - while (lenL >= min_match_len || lenR >= min_match_len) - { - pending.len = lenL; - pending_cost = LZ_SARRAY_INFINITE_COST; + /* pending_offset = offset of lowest-cost match found for the current + * length, or 0 if none found yet. */ + lz_sarray_pos_t pending_offset = 0; + + /* Note: some 'goto' statements are used in the remainder of this + * function to remove unnecessary checks and create branches that the + * CPU may predict better. (This function is performance critical.) */ + + if (lenL != 0 && lenL >= lenR) + goto extend_left; + else if (lenR != 0) + goto extend_right; + else + return 0; + +extend_left: + /* Search suffixes on the left until the match length has decreased + * below the next match length on the right or to below the minimum + * match length. */ + for (;;) { + lz_sarray_pos_t offset; lz_sarray_cost_t cost; + lz_sarray_pos_t old_L; + lz_sarray_pos_t old_lenL; + + /* Check for hard cutoff on amount of work done. */ + if (count_remaining-- == 0) { + if (pending_offset != 0) { + /* Save pending match. */ + matches[nmatches++] = (struct raw_match){ + .len = lenL, + .offset = pending_offset, + }; + } + return nmatches; + } + + if (SA[L] < i) { + /* Suffix is in LZ77 dictionary. (Check was needed + * because the salink array caps distances to save + * memory.) */ + + offset = i - SA[L]; + + /* Save match offset if it results in lower cost. */ + cost = (*eval_match_cost)(lenL, offset, + eval_match_cost_ctx); + if (cost < best_cost) { + best_cost = cost; + pending_offset = offset; + } + } - /* Extend left. */ - if (lenL >= min_match_len && lenL >= lenR) { - for (;;) { + /* Advance left to previous suffix. */ - if (--count_remaining == 0) - goto out_save_pending; + old_L = L; + old_lenL = lenL; - input_idx_t offset = i - SA[L]; + L -= link[L].dist_to_prev; - /* Save match if it has smaller cost. */ - cost = (*eval_match_cost)(lenL, offset, - eval_match_cost_ctx); - if (cost < pending_cost) { - pending.offset = offset; - pending_cost = cost; + if (link[old_L].lcpprev < old_lenL) { + /* Match length decreased. */ + + lenL = link[old_L].lcpprev; + + if (old_lenL > lenR) { + /* Neither the right side nor the left size has + * any more matches of length @old_lenL. If a + * pending match exists, save it. */ + if (pending_offset != 0) { + matches[nmatches++] = (struct raw_match){ + .len = old_lenL, + .offset = pending_offset, + }; + if (nmatches == max_matches_to_return) + return nmatches; + + pending_offset = 0; } - if (link[L].lcpprev < lenL) { - /* Match length decreased. */ - - lenL = link[L].lcpprev; - - /* Save the pending match unless the - * right side still may have matches of - * this length to be scanned, or if a - * previous (longer) match had lower - * cost. */ - if (pending.len > lenR) { - if (pending_cost < best_cost) { - best_cost = pending_cost; - matches[nmatches++] = pending; - if (nmatches == max_matches_to_return) - return nmatches; - } - pending.len = lenL; - pending_cost = LZ_SARRAY_INFINITE_COST; - } - if (lenL < min_match_len || lenL < lenR) - break; + if (lenL >= lenR) { + /* New match length on left is still at + * least as large as the next match + * length on the right: Keep extending + * left, unless the minimum match length + * would be underrun. */ + if (lenL == 0) + return nmatches; + goto extend_left; } - L = link[L].prev; } + + /* Here we have lenL < lenR. Extend right. + * (No check for whether the minimum match length has + * been underrun is needed, provided that such lengths + * are marked as 0.) */ + goto extend_right; + } + } + +extend_right: + /* Search suffixes on the right until the match length has decreased to + * the next match length on the left or to below the minimum match + * length. */ + for (;;) { + lz_sarray_pos_t offset; + lz_sarray_cost_t cost; + lz_sarray_pos_t old_R; + lz_sarray_pos_t old_lenR; + + /* Check for hard cutoff on amount of work done. */ + if (count_remaining-- == 0) { + if (pending_offset != 0) { + /* Save pending match. */ + matches[nmatches++] = (struct raw_match){ + .len = lenR, + .offset = pending_offset, + }; + } + return nmatches; } - pending.len = lenR; + if (SA[R] < i) { + /* Suffix is in LZ77 dictionary. (Check was needed + * because the salink array caps distances to save + * memory.) */ - /* Extend right. */ - if (lenR >= min_match_len && lenR > lenL) { - for (;;) { + offset = i - SA[R]; - if (--count_remaining == 0) - goto out_save_pending; + /* Save match offset if it results in lower cost. */ + cost = (*eval_match_cost)(lenR, + offset, + eval_match_cost_ctx); + if (cost < best_cost) { + best_cost = cost; + pending_offset = offset; + } + } - input_idx_t offset = i - SA[R]; + /* Advance right to next suffix. */ - /* Save match if it has smaller cost. */ - cost = (*eval_match_cost)(lenR, - offset, - eval_match_cost_ctx); - if (cost < pending_cost) { - pending.offset = offset; - pending_cost = cost; - } + old_R = R; + old_lenR = lenR; - if (link[R].lcpnext < lenR) { - /* Match length decreased. */ + R += link[R].dist_to_next; - lenR = link[R].lcpnext; + if (link[old_R].lcpnext < lenR) { + /* Match length decreased. */ - /* Save the pending match unless a - * previous (longer) match had lower - * cost. */ - if (pending_cost < best_cost) { - matches[nmatches++] = pending; - best_cost = pending_cost; - if (nmatches == max_matches_to_return) - return nmatches; - } + lenR = link[old_R].lcpnext; - if (lenR < min_match_len || lenR <= lenL) - break; + /* Neither the right side nor the left size has any more + * matches of length @old_lenR. If a pending match + * exists, save it. */ + if (pending_offset != 0) { + matches[nmatches++] = (struct raw_match){ + .len = old_lenR, + .offset = pending_offset, + }; + if (nmatches == max_matches_to_return) + return nmatches; - pending.len = lenR; - pending_cost = LZ_SARRAY_INFINITE_COST; - } - R = link[R].next; + pending_offset = 0; } - } - } - goto out; -out_save_pending: - if (pending_cost != LZ_SARRAY_INFINITE_COST) - matches[nmatches++] = pending; + if (lenL >= lenR) { + /* lenL >= lenR: Extend left, unless the + * minimum match length would be underrun, in + * which case we are done. */ + if (lenL == 0) + return nmatches; -out: - return nmatches; + goto extend_left; + } + /* lenR > lenL: Keep extending right. + * (No check for whether the minimum match length has + * been underrun is needed, provided that such lengths + * are marked as 0.) */ + } + } } #endif /* _WIMLIB_LZ_SARRAY_H */