-static bool entropy_val_tab_inited = false;
-static double entropy_val_tab[LZX_MAX_WINDOW_SIZE];
-static pthread_mutex_t entropy_val_tab_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static double entropy_val(unsigned count)
-{
- /*return count * log(count);*/
- return entropy_val_tab[count];
-}
-
-/* Split a LZX block into several if it is advantageous to do so.
- *
- * TODO: This doesn't work very well yet. Should optimal parsing be done
- * before or after splitting? */
-static void
-lzx_block_split(const u32 matches[restrict],
- const input_idx_t n,
- const double epsilon,
- const unsigned max_num_blocks,
- const unsigned min_block_len,
- struct lzx_block_spec block_specs[restrict],
- unsigned * const restrict num_blocks_ret)
-{
- const double block_overhead = 1500;
-
- if (!entropy_val_tab_inited) {
- pthread_mutex_lock(&entropy_val_tab_mutex);
- if (!entropy_val_tab_inited) {
- entropy_val_tab[0] = 0;
- for (input_idx_t i = 1; i < LZX_MAX_WINDOW_SIZE; i++)
- entropy_val_tab[i] = i * log2(i);
- entropy_val_tab_inited = true;
- }
- pthread_mutex_unlock(&entropy_val_tab_mutex);
- }
-
- u16 main_syms[n];
- u8 len_syms[n];
- u8 aligned_syms[n];
- input_idx_t orig_input_indices[n + 1];
-
- LZX_ASSERT(epsilon >= 0);
- LZX_ASSERT(max_num_blocks >= 1);
-
- /* For convenience, extract the main, length, and aligned symbols from
- * the matches. Every position will have a main symbol, but not every
- * position will have a length and aligned symbol. Special values
- * larger than the valid symbols are used to indicate the absense of a
- * symbol. */
- orig_input_indices[0] = 0;
- for (input_idx_t i = 0, orig_input_idx = 0; i < n; i++) {
- u32 match = matches[i];
- u16 main_sym;
- u8 len_sym = LZX_LENCODE_NUM_SYMBOLS;
- u8 aligned_sym = LZX_ALIGNEDCODE_NUM_SYMBOLS;
- if (match & 0x80000000) {
- unsigned match_len_minus_2 = match & 0xff;
- unsigned position_footer = (match >> 8) & 0x1ffff;
- unsigned position_slot = (match >> 25) & 0x3f;
- unsigned len_header;
-
- if (match_len_minus_2 < LZX_NUM_PRIMARY_LENS) {
- len_header = match_len_minus_2;
- } else {
- len_header = LZX_NUM_PRIMARY_LENS;
- len_sym = match_len_minus_2 - LZX_NUM_PRIMARY_LENS;
- }
- main_sym = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
- if (position_slot >= 8)
- aligned_sym = position_footer & 7;
- orig_input_idx += match_len_minus_2 + 2;
- } else {
- main_sym = match;
- orig_input_idx++;
- }
- main_syms[i] = main_sym;
- len_syms[i] = len_sym;
- aligned_syms[i] = aligned_sym;
- orig_input_indices[i + 1] = orig_input_idx;
- }
-
- /* Compute the number of sliding windows that will be used for the
- * entropy calculations. */
- int num_windows = 0;
- unsigned window_len;
- {
- double e = min_block_len;
- do {
- window_len = e;
- num_windows++;
- e *= epsilon + 1;
- } while (window_len < n);
- }
-
- /* Compute the length of each sliding window. */
- unsigned window_lens[num_windows];
- {
- double e = min_block_len;
- unsigned window_idx = 0;
- do {
- window_len = e;
- window_lens[window_idx++] = min(window_len, n);
- e *= epsilon + 1;
- } while (window_len < n);
- }
-
- /* Best estimated compression size, in bits, found so far for the input
- * matches up to each position. */
- unsigned shortest_paths[n + 1];
-
- /* Pointers to follow to get the sequence of blocks that represents the
- * shortest path (in terms of estimated compressed size) up to each
- * position in the input matches. */
- input_idx_t back_ptrs[n + 1];
-
- for (input_idx_t i = 0; i < n + 1; i++) {
- shortest_paths[i] = ~0U;
- back_ptrs[i] = 0;
- }
- shortest_paths[0] = 0;
-
- {
- /* Initialize the per-window symbol and entropy counters */
- input_idx_t mainsym_ctrs[num_windows][LZX_MAINCODE_NUM_SYMBOLS];
- input_idx_t lensym_ctrs[num_windows][LZX_LENCODE_NUM_SYMBOLS + 1];
- input_idx_t alignedsym_ctrs[num_windows][LZX_ALIGNEDCODE_NUM_SYMBOLS + 1];
- ZERO_ARRAY(mainsym_ctrs);
- ZERO_ARRAY(lensym_ctrs);
- ZERO_ARRAY(alignedsym_ctrs);
-
- {
- int start_win_idx = 0;
- for (input_idx_t i = 0; i < n; i++) {
- while (i >= window_lens[start_win_idx])
- start_win_idx++;
- for (int j = start_win_idx; j < num_windows; j++) {
- mainsym_ctrs[j][main_syms[i]]++;
- lensym_ctrs[j][len_syms[i]]++;
- alignedsym_ctrs[j][aligned_syms[i]]++;
- }
- }
- }
-
- double entropy_ctrs[num_windows];
- for (int i = 0; i < num_windows; i++) {
- entropy_ctrs[i] = 0;
- for (unsigned j = 0; j < LZX_MAINCODE_NUM_SYMBOLS; j++)
- entropy_ctrs[i] += entropy_val(mainsym_ctrs[i][j]);
- for (unsigned j = 0; j < LZX_LENCODE_NUM_SYMBOLS; j++)
- entropy_ctrs[i] += entropy_val(lensym_ctrs[i][j]);
- for (unsigned j = 0; j < LZX_ALIGNEDCODE_NUM_SYMBOLS; j++)
- entropy_ctrs[i] += entropy_val(alignedsym_ctrs[i][j]);
- }
-
- /* Slide the windows along the input and compute the shortest
- * path to each position in the matches. */
- int end_window_idx = (int)num_windows - 1;
- for (input_idx_t i = 0; i < n; i++) {
- for (int j = 0; j <= end_window_idx; j++) {
- if (shortest_paths[i] == ~0U)
- continue;
- unsigned num_mainsyms = window_lens[j];
- unsigned num_lensyms = window_lens[j] -
- lensym_ctrs[j][LZX_LENCODE_NUM_SYMBOLS];
- unsigned num_alignedsyms = window_lens[j] -
- alignedsym_ctrs[j][LZX_ALIGNEDCODE_NUM_SYMBOLS];
- unsigned entropy = entropy_val(num_mainsyms) +
- entropy_val(num_lensyms) +
- entropy_val(num_alignedsyms) -
- entropy_ctrs[j];
- unsigned est_csize = entropy + block_overhead;
-
- unsigned end_idx = i + window_lens[j];
- if (est_csize + shortest_paths[i] < shortest_paths[end_idx]) {
- shortest_paths[end_idx] = est_csize + shortest_paths[i];
- back_ptrs[end_idx] = i;
- }
- }
- /* Remove left symbol from windows */
- for (int j = 0; j <= end_window_idx; j++) {
- input_idx_t orig_maincnt = mainsym_ctrs[j][main_syms[i]]--;
- entropy_ctrs[j] -= entropy_val(orig_maincnt);
- entropy_ctrs[j] += entropy_val(orig_maincnt - 1);
-
- input_idx_t orig_lencnt =
- lensym_ctrs[j][len_syms[i]]--;
- if (len_syms[i] != LZX_LENCODE_NUM_SYMBOLS) {
- entropy_ctrs[j] -= entropy_val(orig_lencnt);
- entropy_ctrs[j] += entropy_val(orig_lencnt - 1);
- }
-
- input_idx_t orig_alignedcnt =
- alignedsym_ctrs[j][aligned_syms[i]]--;
- if (aligned_syms[i] != LZX_ALIGNEDCODE_NUM_SYMBOLS) {
- entropy_ctrs[j] -= entropy_val(orig_alignedcnt);
- entropy_ctrs[j] += entropy_val(orig_alignedcnt - 1);
- }
- }
-
- /* Calculate index of longest window remaining */
- while (end_window_idx >= 0 && window_lens[end_window_idx] >= n - i)
- end_window_idx--;
-
- /* Append right symbol to windows */
- for (int j = 0; j <= end_window_idx; j++) {
- input_idx_t orig_maincnt = mainsym_ctrs[j][
- main_syms[i + window_lens[j]]]++;
- entropy_ctrs[j] -= entropy_val(orig_maincnt);
- entropy_ctrs[j] += entropy_val(orig_maincnt + 1);
-
- input_idx_t orig_lencnt =
- lensym_ctrs[j][len_syms[i + window_lens[j]]]++;
- if (len_syms[i + window_lens[j]] != LZX_LENCODE_NUM_SYMBOLS) {
- entropy_ctrs[j] -= entropy_val(orig_lencnt);
- entropy_ctrs[j] += entropy_val(orig_lencnt + 1);
- }
-
- input_idx_t orig_alignedcnt =
- alignedsym_ctrs[j][aligned_syms[i + window_lens[j]]]++;
- if (aligned_syms[i + window_lens[j]] != LZX_ALIGNEDCODE_NUM_SYMBOLS) {
- entropy_ctrs[j] -= entropy_val(orig_alignedcnt);
- entropy_ctrs[j] += entropy_val(orig_alignedcnt + 1);
- }
- }
- }
- }
-
-#if 0
- /* If no cost was computed for the first block (due to it being shorter
- * than all the windows), merge it with the second block. */
- for (input_idx_t i = n; i != 0; i = back_ptrs[i])
- if (back_ptrs[i] != 0 && shortest_paths[back_ptrs[i]] == ~0U)
- back_ptrs[i] = 0;
-#endif
-
- /* Calculate number of blocks */
- input_idx_t num_blocks = 0;
- for (input_idx_t i = n; i != 0; i = back_ptrs[i])
- num_blocks++;
-
- while (num_blocks > max_num_blocks) {
- LZX_DEBUG("Joining blocks to bring total under max_num_blucks=%u",
- max_num_blocks);
- back_ptrs[n] = back_ptrs[back_ptrs[n]];
- num_blocks--;
- }
-
- LZX_ASSERT(num_blocks != 0);
-
- /* fill in the 'struct lzx_block_spec' for each block */
- for (input_idx_t i = n, j = num_blocks - 1; i != 0; i = back_ptrs[i], j--) {
-
- block_specs[j].chosen_matches_start_pos = back_ptrs[i];
- block_specs[j].num_chosen_matches = i - back_ptrs[i];
- block_specs[j].window_pos = orig_input_indices[back_ptrs[i]];
- block_specs[j].block_size = orig_input_indices[i] -
- orig_input_indices[back_ptrs[i]];
- /*block_specs[j].est_csize = (shortest_paths[i] -*/
- /*shortest_paths[back_ptrs[i]]) / 8;*/
-
- LZX_DEBUG("block match_indices [%u, %u) est_csize %u bits\n",
- back_ptrs[i], i,
- shortest_paths[i] - shortest_paths[back_ptrs[i]]);
-
- struct lzx_freqs freqs = {};
-
- for (input_idx_t k = back_ptrs[i]; k < i; k++) {
- freqs.main[main_syms[k]]++;
- if (len_syms[k] != LZX_LENCODE_NUM_SYMBOLS)
- freqs.len[len_syms[k]]++;
- if (aligned_syms[k] != LZX_LENCODE_NUM_SYMBOLS)
- freqs.aligned[aligned_syms[k]]++;
- }
- lzx_make_huffman_codes(&freqs, &block_specs[j].codes);
-
- block_specs[j].block_type = lzx_choose_verbatim_or_aligned(&freqs,
- &block_specs[j].codes);
- }
- *num_blocks_ret = num_blocks;
-}
-
-
-/* Initialize the suffix array match-finder for the specified input. */
-static void
-lzx_lz_init_matchfinder(const u8 T[const restrict],
- const input_idx_t n,
- input_idx_t SA[const restrict],
- input_idx_t ISA[const restrict],
- input_idx_t LCP[const restrict],
- struct salink link[const restrict],
- const unsigned max_match_len)
-{
- /* Compute SA (Suffix Array). */
-
- {
- saidx_t sa[n];
- /* ISA and link are used as temporary space. */
- BUILD_BUG_ON(LZX_MAX_WINDOW_SIZE * sizeof(ISA[0]) < 256 * sizeof(saidx_t));
- BUILD_BUG_ON(LZX_MAX_WINDOW_SIZE * 2 * sizeof(link[0]) < 256 * 256 * sizeof(saidx_t));
- divsufsort(T, sa, n, (saidx_t*)ISA, (saidx_t*)link);
- for (input_idx_t i = 0; i < n; i++)
- SA[i] = sa[i];
- }
-
-#ifdef ENABLE_LZX_DEBUG
-
- LZX_ASSERT(n > 0);
-
- /* Verify suffix array. */
- {
- bool found[n];
- ZERO_ARRAY(found);
- for (input_idx_t r = 0; r < n; r++) {
- input_idx_t i = SA[r];
- LZX_ASSERT(i < n);
- LZX_ASSERT(!found[i]);
- found[i] = true;
- }
- }
-
- for (input_idx_t r = 0; r < n - 1; r++) {
-
- input_idx_t i1 = SA[r];
- input_idx_t i2 = SA[r + 1];
-
- input_idx_t n1 = n - i1;
- input_idx_t n2 = n - i2;
-
- LZX_ASSERT(memcmp(&T[i1], &T[i2], min(n1, n2)) <= 0);
- }
- LZX_DEBUG("Verified SA (len %u)", n);
-#endif /* ENABLE_LZX_DEBUG */
-
- /* Compute ISA (Inverse Suffix Array) */
- for (input_idx_t r = 0; r < n; r++)
- ISA[SA[r]] = r;
-
- /* Compute LCP (longest common prefix) array.
- *
- * Algorithm adapted from Kasai et al. 2001: "Linear-Time
- * Longest-Common-Prefix Computation in Suffix Arrays and Its
- * Applications". */
- {
- input_idx_t h = 0;
- for (input_idx_t i = 0; i < n; i++) {
- input_idx_t r = ISA[i];
- if (r > 0) {
- input_idx_t j = SA[r - 1];
-
- input_idx_t lim = min(n - i, n - j);
-
- while (h < lim && T[i + h] == T[j + h])
- h++;
- LCP[r] = h;
- if (h > 0)
- h--;
- }
- }
- }
-
-#ifdef ENABLE_LZX_DEBUG
- /* Verify LCP array. */
- for (input_idx_t r = 0; r < n - 1; r++) {
- LZX_ASSERT(ISA[SA[r]] == r);
- LZX_ASSERT(ISA[SA[r + 1]] == r + 1);
-
- input_idx_t i1 = SA[r];
- input_idx_t i2 = SA[r + 1];
- input_idx_t lcp = LCP[r + 1];
-
- input_idx_t n1 = n - i1;
- input_idx_t n2 = n - i2;
-
- LZX_ASSERT(lcp <= min(n1, n2));
-
- LZX_ASSERT(memcmp(&T[i1], &T[i2], lcp) == 0);
- if (lcp < min(n1, n2))
- LZX_ASSERT(T[i1 + lcp] != T[i2 + lcp]);
- }
-#endif /* ENABLE_LZX_DEBUG */
-
- /* Compute salink.next and salink.lcpnext.
- *
- * Algorithm adapted from Crochemore et al. 2009:
- * "LPF computation revisited".
- *
- * Note: we cap lcpnext to the maximum match length so that the
- * match-finder need not worry about it later. */
- link[n - 1].next = (input_idx_t)~0U;
- link[n - 1].prev = (input_idx_t)~0U;
- link[n - 1].lcpnext = 0;
- link[n - 1].lcpprev = 0;
- for (input_idx_t r = n - 2; r != (input_idx_t)~0U; r--) {
- input_idx_t t = r + 1;
- input_idx_t l = LCP[t];
- while (t != (input_idx_t)~0 && SA[t] > SA[r]) {
- l = min(l, link[t].lcpnext);
- t = link[t].next;
- }
- link[r].next = t;
- link[r].lcpnext = min(l, max_match_len);
- LZX_ASSERT(t == (input_idx_t)~0 || l <= n - SA[t]);
- LZX_ASSERT(l <= n - SA[r]);
- LZX_ASSERT(memcmp(&T[SA[r]], &T[SA[t]], l) == 0);
- }
-
- /* Compute salink.prev and salink.lcpprev.
- *
- * Algorithm adapted from Crochemore et al. 2009:
- * "LPF computation revisited".
- *
- * Note: we cap lcpprev to the maximum match length so that the
- * match-finder need not worry about it later. */
- link[0].prev = (input_idx_t)~0;
- link[0].next = (input_idx_t)~0;
- link[0].lcpprev = 0;
- link[0].lcpnext = 0;
- for (input_idx_t r = 1; r < n; r++) {
- input_idx_t t = r - 1;
- input_idx_t l = LCP[r];
- while (t != (input_idx_t)~0 && SA[t] > SA[r]) {
- l = min(l, link[t].lcpprev);
- t = link[t].prev;
- }
- link[r].prev = t;
- link[r].lcpprev = min(l, max_match_len);
- LZX_ASSERT(t == (input_idx_t)~0 || l <= n - SA[t]);
- LZX_ASSERT(l <= n - SA[r]);
- LZX_ASSERT(memcmp(&T[SA[r]], &T[SA[t]], l) == 0);
- }
-}
-