X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flz77.c;fp=src%2Flz77.c;h=0000000000000000000000000000000000000000;hp=b5495da74d1599cd59e12cc8ef65c495550eba55;hb=e940fda88a92ff9e931ec88fb4c0e1ebd6fa2dfb;hpb=867f1968015a36ccd260002c39ba4def91da4980

diff --git a/src/lz77.c b/src/lz77.c
deleted file mode 100644
index b5495da7..00000000
--- a/src/lz77.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * lz77.c
- *
- * This file provides the code to analyze a buffer of uncompressed data for
- * matches, as per the LZ77 algorithm.  It uses a hash table to accelerate the
- * process.  This is based on code from zlib (v. 1.2.5).
- */
-
-/*
- * Copyright (C) 2012, 2013 Eric Biggers
- * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
- *
- * This file is part of wimlib, a library for working with WIM files.
- *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
- */
-
-#ifdef HAVE_CONFIG_H
-#  include <config.h>
-#endif
-
-#include "wimlib/compress_common.h"
-#include "wimlib/util.h"
-
-#include <string.h>
-
-#define HASH_BITS	15
-#define HASH_SIZE	(1 << HASH_BITS)
-#define HASH_MASK	(HASH_SIZE - 1)
-#define HASH_SHIFT	5
-
-/* Hash function, based on code from zlib.  This function will update and return
- * the hash value @hash for the string ending on the additional input character
- * @c.  This function must be called for each consecutive character, because it
- * uses a running hash value rather than computing it separately for each
- * 3-character string.
- *
- * The AND operation guarantees that only 3 characters will affect the hash
- * value, so every identical 3-character string will have the same hash value.
- */
-static inline unsigned
-update_hash(unsigned hash, u8 c)
-{
-	return ((hash << HASH_SHIFT) ^ c) & HASH_MASK;
-}
-
-
-/* Insert a 3-character string at position @str_pos in @window and with hash
- * code @hash into the hash table described by @hash_tab and @prev_tab.  Based
- * on code from zlib.
- *
- * The hash table uses chains (linked lists) for the hash buckets, but there are
- * no real pointers involved.  Indexing `hash_tab' by hash value gives the index
- * within the window of the last string in the hash bucket.  To find the index
- * of the previous string in the hash chain, the `prev_tab' array is indexed by
- * the string index.  `prev_tab' can be indexed repeatedly by the string index
- * to walk through the hash chain, until the special index `0' is reached,
- * indicating the end of the hash chain.
- */
-static inline unsigned
-insert_string(input_idx_t hash_tab[], input_idx_t prev_tab[],
-	      const u8 window[], unsigned str_pos,
-	      unsigned hash)
-{
-	hash = update_hash(hash, window[str_pos + 2]);
-	prev_tab[str_pos] = hash_tab[hash];
-	hash_tab[hash] = str_pos;
-	return hash;
-}
-
-
-/*
- * Returns the longest match for a given input position.
- *
- * @window:		The window of uncompressed data.
- * @bytes_remaining:	The number of bytes remaining in the window.
- * @strstart:		The index of the start of the string in the window that
- *				we are trying to find a match for.
- * @prev_tab:		The array of prev pointers for the hash table.
- * @cur_match:		The index of the head of the hash chain for matches
- *				having the hash value of the string beginning
- *				at index @strstart.
- * @prev_len:		The length of the match that was found for the string
- *				beginning at (@strstart - 1).
- * @match_start_ret:	A location into which the index of the start of the
- *				match will be returned.
- * @params:		Parameters that affect how long the search will proceed
- *				before going with the best that has been found
- *				so far.
- * @min_start_pos:	If the chain reaches a match starting before this
- *			position (including the end-of-chain 0), the search will
- *			be terminated.
- *
- * Returns the length of the match that was found.
- */
-static unsigned
-longest_match(const u8 window[], unsigned bytes_remaining,
-	      unsigned strstart, const input_idx_t prev_tab[],
-	      unsigned cur_match, unsigned prev_len,
-	      unsigned *match_start_ret,
-	      const struct lz_params *params,
-	      unsigned min_start_pos)
-{
-	unsigned chain_len = params->max_chain_len;
-
-	const u8 *scan = window + strstart;
-	const u8 *match;
-	unsigned len;
-	unsigned best_len = prev_len;
-	unsigned match_start = cur_match;
-
-	unsigned nice_match = min(params->nice_match, bytes_remaining);
-
-	const u8 *strend = scan + min(params->max_match, bytes_remaining);
-
-	u8 scan_end1 = scan[best_len - 1];
-	u8 scan_end = scan[best_len];
-
-
-	/* Do not waste too much time if we already have a good match: */
-	if (best_len >= params->good_match)
-		chain_len >>= 2;
-
-	do {
-		match = &window[cur_match];
-
-		/* Skip to next match if the match length cannot increase or if
-		 * the match length is less than 2.  Note that the checks below
-		 * for insufficient lookahead only occur occasionally for
-		 * performance reasons.  Therefore uninitialized memory will be
-		 * accessed, and conditional jumps will be made that depend on
-		 * those values.  However the length of the match is limited to
-		 * the lookahead, so the output of lz_analyze_block() is not
-		 * affected by the uninitialized values.  */
-
-		if (match[best_len] != scan_end
-		    || match[best_len - 1] != scan_end1
-		    || *match != *scan
-		    || *++match != scan[1])
-			continue;
-		scan++;
-
-	#if 0
-		do {
-		} while (scan < strend && *++match == *++scan);
-	#else
-
-		do {
-		} while (
-			 *++match == *++scan && *++match == *++scan &&
-			 *++match == *++scan && *++match == *++scan &&
-			 *++match == *++scan && *++match == *++scan &&
-			 *++match == *++scan && *++match == *++scan &&
-			 scan < strend);
-	#endif
-		len = match - &window[cur_match];
-
-		scan = &window[strstart];
-
-		if (len > best_len) {
-			match_start = cur_match;
-			best_len = len;
-			if (len >= nice_match)
-				break;
-			scan_end1  = scan[best_len - 1];
-			scan_end   = scan[best_len];
-		}
-	} while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) >= min_start_pos);
-	*match_start_ret = match_start;
-	return min(min(best_len, bytes_remaining), params->max_match);
-}
-
-
-
-/*
- * Determines the sequence of matches and literals that a block of data will be
- * compressed to.
- *
- * @window:		The data that is to be compressed.
- * @window_size:	The length of @window, in bytes.
- * @record_match:	Consumer for matches.
- * @record_literal:	Consumer for literals.
- * @record_ctx:		Context passed to @record_match and @record_literal.
- * @params:		Structure that contains parameters that affect how the
- *				analysis proceeds (mainly how good the matches
- *				have to be).
- * @prev_tab:		Temporary space containing least @window_size elements.
- */
-void
-lz_analyze_block(const u8 window[restrict],
-		 input_idx_t window_size,
-		 lz_record_match_t record_match,
-		 lz_record_literal_t record_literal,
-		 void *record_ctx,
-		 const struct lz_params *params,
-		 input_idx_t prev_tab[restrict])
-{
-	unsigned cur_input_pos = 0;
-	unsigned hash          = 0;
-	unsigned hash_head     = 0;
-	unsigned prev_len      = params->min_match - 1;
-	unsigned prev_start;
-	unsigned match_len     = params->min_match - 1;
-	unsigned match_start   = 0;
-	bool match_available = false;
-	input_idx_t hash_tab[HASH_SIZE];
-	unsigned min_start_pos = 1;
-
-	ZERO_ARRAY(hash_tab);
-
-	do {
-		/* If there are at least 3 characters remaining in the input,
-		 * insert the 3-character string beginning at
-		 * window[cur_input_pos] into the hash table.
-		 *
-		 * hash_head is set to the index of the previous string in the
-		 * hash bucket, or 0 if there is no such string */
-		if (window_size - cur_input_pos >= params->min_match) {
-			hash = insert_string(hash_tab, prev_tab,
-					     window,
-					     cur_input_pos, hash);
-			hash_head = prev_tab[cur_input_pos];
-		} else {
-			hash_head = 0;
-		}
-
-
-		/* Find the longest match, discarding those <= prev_len. */
-		prev_len = match_len;
-		prev_start = match_start;
-		match_len = params->min_match - 1;
-
-		if (cur_input_pos > params->max_offset)
-			min_start_pos = cur_input_pos - params->max_offset;
-		else
-			min_start_pos = 1;
-
-		if (hash_head >= min_start_pos &&
-		    prev_len < params->max_lazy_match)
-		{
-			/* To simplify the code, we prevent matches with the
-			 * string of window index 0 (in particular we have to
-			 * avoid a match of the string with itself at the start
-			 * of the input file).  */
-			match_len = longest_match(window,
-						  window_size - cur_input_pos,
-						  cur_input_pos, prev_tab,
-						  hash_head, prev_len,
-						  &match_start, params,
-						  min_start_pos);
-
-			if (match_len == params->min_match &&
-			     cur_input_pos - match_start > params->too_far)
-				match_len = params->min_match - 1;
-		}
-
-		/* If there was a match at the previous step and the current
-		 * match is not better, output the previous match:
-		 */
-		if (prev_len >= params->min_match && match_len <= prev_len) {
-
-
-			(*record_match)(prev_len,
-					cur_input_pos - 1 - prev_start,
-					record_ctx);
-
-			/* Insert in hash table all strings up to the end of the
-			 * match.  strstart-1 and strstart are already inserted.
-			 * If there is not enough lookahead, the last two
-			 * strings are not inserted in the hash table.  */
-
-			/* Do not insert strings in hash table beyond this. */
-			unsigned max_insert = window_size - params->min_match;
-
-			prev_len -= 2;
-
-			do {
-				if (++cur_input_pos <= max_insert) {
-					hash = insert_string(hash_tab, prev_tab,
-							     window,
-							     cur_input_pos,
-							     hash);
-				}
-			} while (--prev_len != 0);
-			match_available = false;
-			match_len = params->min_match - 1;
-		} else if (match_available) {
-			/* If there was no match at the previous position,
-			 * output a single literal. If there was a match but the
-			 * current match is longer, truncate the previous match
-			 * to a single literal.  */
-			(*record_literal)(window[cur_input_pos - 1], record_ctx);
-		} else {
-			/* There is no previous match to compare with, wait for
-			 * the next step to decide.  */
-			match_available = true;
-		}
-	} while (++cur_input_pos < window_size);
-
-	if (match_available)
-		(*record_literal)(window[cur_input_pos - 1], record_ctx);
-}