X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fxpress-compress.c;h=fdcc2fca49e61a10b935e4ed3decf65479da9514;hb=0a97ffa6074b6b69b9982d83b398145c5ac860ab;hp=f39d54758969f2884f37ee2e8d50b3164152b7c4;hpb=93110bb18090d4d2c00294a56f819c7edeef318f;p=wimlib

diff --git a/src/xpress-compress.c b/src/xpress-compress.c
index f39d5475..fdcc2fca 100644
--- a/src/xpress-compress.c
+++ b/src/xpress-compress.c
@@ -8,31 +8,31 @@
 /*
  * Copyright (C) 2012, 2013, 2014 Eric Biggers
  *
- * This file is part of wimlib, a library for working with WIM files.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
  *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  * details.
  *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
  */
 
 #ifdef HAVE_CONFIG_H
 #  include "config.h"
 #endif
 
+#include "wimlib/bitops.h"
 #include "wimlib/compress_common.h"
 #include "wimlib/compressor_ops.h"
 #include "wimlib/endianness.h"
 #include "wimlib/error.h"
 #include "wimlib/lz_mf.h"
+#include "wimlib/unaligned.h"
 #include "wimlib/util.h"
 #include "wimlib/xpress.h"
 
@@ -136,7 +136,7 @@ struct xpress_mc_pos_data {
 	 */
 	u32 mc_item_data;
 #define MC_OFFSET_SHIFT 16
-#define MC_LEN_MASK ((1 << MC_OFFSET_SHIFT) - 1)
+#define MC_LEN_MASK (((u32)1 << MC_OFFSET_SHIFT) - 1)
 };
 
 
@@ -218,7 +218,7 @@ xpress_write_bits(struct xpress_output_bitstream *os,
 	if (os->bitcount > 16) {
 		os->bitcount -= 16;
 		if (os->end - os->next_byte >= 2) {
-			*(le16 *)os->next_bits = cpu_to_le16(os->bitbuf >> os->bitcount);
+			put_unaligned_u16_le(os->bitbuf >> os->bitcount, os->next_bits);
 			os->next_bits = os->next_bits2;
 			os->next_bits2 = os->next_byte;
 			os->next_byte += 2;
@@ -246,8 +246,8 @@ xpress_flush_output(struct xpress_output_bitstream *os)
 	if (unlikely(os->end - os->next_byte < 2))
 		return 0;
 
-	*(le16 *)os->next_bits = cpu_to_le16(os->bitbuf << (16 - os->bitcount));
-	*(le16 *)os->next_bits2 = cpu_to_le16(0);
+	put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next_bits);
+	put_unaligned_u16_le(0, os->next_bits2);
 
 	return os->next_byte - os->start;
 }
@@ -334,8 +334,8 @@ xpress_declare_match(struct xpress_compressor *c,
 {
 	unsigned adjusted_len = len - XPRESS_MIN_MATCH_LEN;
 	unsigned len_hdr = min(adjusted_len, 0xf);
-	unsigned offset_bsr = bsr32(offset);
-	unsigned sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
+	unsigned offset_high_bit = fls32(offset);
+	unsigned sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
 
 	c->freqs[sym]++;
 
@@ -343,8 +343,8 @@ xpress_declare_match(struct xpress_compressor *c,
 		*(*next_chosen_item)++ = (struct xpress_item) {
 			.data = (u64)sym |
 				((u64)adjusted_len << 9) |
-				((u64)offset_bsr << 25) |
-				((u64)(offset ^ (1U << offset_bsr)) << 29),
+				((u64)offset_high_bit << 25) |
+				((u64)(offset ^ (1U << offset_high_bit)) << 29),
 		};
 	}
 }
@@ -589,7 +589,7 @@ xpress_set_costs(u8 costs[], const u8 lens[])
  * @matches must be sorted by strictly increasing length and strictly
  * increasing offset.  This is guaranteed by the match-finder.
  *
- * We consider each length from the minimum (2) to the longest
+ * We consider each length from the minimum (3) to the longest
  * (matches[num_matches - 1].len).  For each length, we consider only
  * the smallest offset for which that length is available.  Although
  * this is not guaranteed to be optimal due to the possibility of a
@@ -607,7 +607,7 @@ xpress_consider_matches(struct xpress_compressor *c,
 	u32 cost;
 	u32 position_cost;
 	unsigned offset;
-	unsigned offset_bsr;
+	unsigned offset_high_bit;
 	unsigned adjusted_len;
 	unsigned len_hdr;
 	unsigned sym;
@@ -616,11 +616,11 @@ xpress_consider_matches(struct xpress_compressor *c,
 		/* All lengths are small.  Optimize accordingly.  */
 		do {
 			offset = matches[i].offset;
-			offset_bsr = bsr32(offset);
+			offset_high_bit = fls32(offset);
 			len_hdr = len - XPRESS_MIN_MATCH_LEN;
-			sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
+			sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
 
-			position_cost = cur_optimum_ptr->cost + offset_bsr;
+			position_cost = cur_optimum_ptr->cost + offset_high_bit;
 			do {
 				cost = position_cost + c->costs[sym];
 				if (cost < (cur_optimum_ptr + len)->cost) {
@@ -635,12 +635,12 @@ xpress_consider_matches(struct xpress_compressor *c,
 		/* Some lengths are big.  */
 		do {
 			offset = matches[i].offset;
-			offset_bsr = bsr32(offset);
-			position_cost = cur_optimum_ptr->cost + offset_bsr;
+			offset_high_bit = fls32(offset);
+			position_cost = cur_optimum_ptr->cost + offset_high_bit;
 			do {
 				adjusted_len = len - XPRESS_MIN_MATCH_LEN;
 				len_hdr = min(adjusted_len, 0xf);
-				sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
+				sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
 
 				cost = position_cost + c->costs[sym];
 				if (adjusted_len >= 0xf) {
@@ -793,8 +793,8 @@ begin:
 		 *	XPRESS_OPTIM_ARRAY_LENGTH is high enough that on most
 		 *	inputs this limit is never reached.
 		 *
-		 * Note: no check for end-of-block is needed because
-		 * end-of-block will trigger condition (1).
+		 * Note: no check for end-of-window is needed because
+		 * end-of-window will trigger condition (1).
 		 */
 		if (cur_optimum_ptr == end_optimum_ptr ||
 		    cur_optimum_ptr == &c->optimum[XPRESS_OPTIM_ARRAY_LENGTH])
@@ -1208,14 +1208,16 @@ xpress_compress(const void *uncompressed_data, size_t uncompressed_size,
 	c->cur_window_size = uncompressed_size;
 	lz_mf_load_window(c->mf, c->cur_window, c->cur_window_size);
 	memset(c->freqs, 0, sizeof(c->freqs));
+
 	num_chosen_items = xpress_choose_items(c);
+
 	c->freqs[XPRESS_END_OF_DATA]++;
 	xpress_make_huffman_code(c);
 
 	/* Output the Huffman code as a series of 512 4-bit lengths.  */
 	cptr = compressed_data;
 	for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i += 2)
-		*cptr++ = (c->lens[i] & 0xf) | (c->lens[i + 1] << 4);
+		*cptr++ = (c->lens[i + 1] << 4) | c->lens[i];
 
 	/* Output the encoded matches/literals.  */
 	xpress_init_output(&os, cptr,
@@ -1239,8 +1241,8 @@ xpress_free_compressor(void *_c)
 
 	if (c) {
 		lz_mf_free(c->mf);
-		FREE(c->cached_matches);
 		FREE(c->optimum);
+		FREE(c->cached_matches);
 		FREE(c->chosen_items);
 		FREE(c);
 	}