From 605df71262af15c229fa3b5f0f6f7611abc011d1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 30 Jul 2022 19:03:42 -0700 Subject: [PATCH] matchfinder_common: sync with libdeflate --- Makefile.am | 3 +- include/wimlib/bt_matchfinder.h | 3 +- include/wimlib/compiler.h | 12 +-- include/wimlib/endianness.h | 2 +- include/wimlib/hc_matchfinder.h | 4 +- include/wimlib/lz_extend.h | 60 ------------- include/wimlib/lz_hash.h | 46 ---------- include/wimlib/matchfinder_common.h | 130 ++++++++++++++++++++++++++++ include/wimlib/unaligned.h | 38 -------- src/lzms_compress.c | 3 +- src/lzms_decompress.c | 2 +- src/lzx_compress.c | 1 - 12 files changed, 142 insertions(+), 162 deletions(-) delete mode 100644 include/wimlib/lz_extend.h delete mode 100644 include/wimlib/lz_hash.h create mode 100644 include/wimlib/matchfinder_common.h diff --git a/Makefile.am b/Makefile.am index 1fc12a0c..7e43e3bc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -122,12 +122,11 @@ libwim_la_SOURCES = \ include/wimlib/integrity.h \ include/wimlib/lcpit_matchfinder.h \ include/wimlib/list.h \ - include/wimlib/lz_extend.h \ - include/wimlib/lz_hash.h \ include/wimlib/lzms_common.h \ include/wimlib/lzms_constants.h \ include/wimlib/lzx_common.h \ include/wimlib/lzx_constants.h \ + include/wimlib/matchfinder_common.h \ include/wimlib/metadata.h \ include/wimlib/object_id.h \ include/wimlib/pathlist.h \ diff --git a/include/wimlib/bt_matchfinder.h b/include/wimlib/bt_matchfinder.h index 07e0b456..14e9c5ca 100644 --- a/include/wimlib/bt_matchfinder.h +++ b/include/wimlib/bt_matchfinder.h @@ -65,8 +65,7 @@ #include -#include "wimlib/lz_extend.h" -#include "wimlib/lz_hash.h" +#include "wimlib/matchfinder_common.h" #define BT_MATCHFINDER_HASH3_ORDER 15 #define BT_MATCHFINDER_HASH3_WAYS 2 diff --git a/include/wimlib/compiler.h b/include/wimlib/compiler.h index 294108c3..1d596e20 100644 --- a/include/wimlib/compiler.h +++ b/include/wimlib/compiler.h @@ -117,21 +117,21 @@ * the case if the function contains only static assertions. */ #define _unused_attribute __attribute__((unused)) -/* Endianness definitions. Either CPU_IS_BIG_ENDIAN or CPU_IS_LITTLE_ENDIAN is - * set to 1. The other is set to 0. Note that newer gcc supports +/* Endianness definitions. Either CPU_IS_BIG_ENDIAN() or CPU_IS_LITTLE_ENDIAN() + * evaluates to 1. The other evaluates to 0. Note that newer gcc supports * __BYTE_ORDER__ for easily determining the endianness; older gcc doesn't. In * the latter case we fall back to a configure-time check. */ #ifdef __BYTE_ORDER__ -# define CPU_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define CPU_IS_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #elif defined(HAVE_CONFIG_H) # include "config.h" # ifdef WORDS_BIGENDIAN -# define CPU_IS_BIG_ENDIAN 1 +# define CPU_IS_BIG_ENDIAN() 1 # else -# define CPU_IS_BIG_ENDIAN 0 +# define CPU_IS_BIG_ENDIAN() 0 # endif #endif -#define CPU_IS_LITTLE_ENDIAN (!CPU_IS_BIG_ENDIAN) +#define CPU_IS_LITTLE_ENDIAN() (!CPU_IS_BIG_ENDIAN()) /* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses * can be performed efficiently on the target platform. */ diff --git a/include/wimlib/endianness.h b/include/wimlib/endianness.h index fa1f9465..c8a603c9 100644 --- a/include/wimlib/endianness.h +++ b/include/wimlib/endianness.h @@ -93,7 +93,7 @@ static forceinline u64 do_bswap64(u64 n) #define bswap32(n) (__builtin_constant_p(n) ? bswap32_const(n) : do_bswap32(n)) #define bswap64(n) (__builtin_constant_p(n) ? bswap64_const(n) : do_bswap64(n)) -#if CPU_IS_BIG_ENDIAN +#if CPU_IS_BIG_ENDIAN() # define cpu_to_le16(n) ((_force_attr le16)bswap16(n)) # define cpu_to_le32(n) ((_force_attr le32)bswap32(n)) # define cpu_to_le64(n) ((_force_attr le64)bswap64(n)) diff --git a/include/wimlib/hc_matchfinder.h b/include/wimlib/hc_matchfinder.h index 2c28a646..a96aa651 100644 --- a/include/wimlib/hc_matchfinder.h +++ b/include/wimlib/hc_matchfinder.h @@ -121,9 +121,7 @@ #include -#include "wimlib/lz_extend.h" -#include "wimlib/lz_hash.h" -#include "wimlib/unaligned.h" +#include "wimlib/matchfinder_common.h" #define HC_MATCHFINDER_HASH3_ORDER 15 #define HC_MATCHFINDER_HASH4_ORDER 16 diff --git a/include/wimlib/lz_extend.h b/include/wimlib/lz_extend.h deleted file mode 100644 index e095fd4e..00000000 --- a/include/wimlib/lz_extend.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * lz_extend.h - fast match extension for Lempel-Ziv matchfinding - * - * Copyright 2022 Eric Biggers - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef _WIMLIB_LZ_EXTEND_H -#define _WIMLIB_LZ_EXTEND_H - -#include "wimlib/bitops.h" -#include "wimlib/unaligned.h" - -/* - * Return the number of bytes at @matchptr that match the bytes at @strptr, up - * to a maximum of @max_len. Initially, @len bytes are matched. - */ -static forceinline u32 -lz_extend(const u8 * const strptr, const u8 * const matchptr, - u32 len, const u32 max_len) -{ - while (UNALIGNED_ACCESS_IS_FAST && len + WORDBYTES <= max_len) { - machine_word_t v = load_word_unaligned(matchptr + len) ^ - load_word_unaligned(strptr + len); - if (v != 0) { - if (CPU_IS_LITTLE_ENDIAN) - len += bsfw(v) >> 3; - else - len += (WORDBITS - 1 - bsrw(v)) >> 3; - return len; - } - len += WORDBYTES; - } - - while (len < max_len && matchptr[len] == strptr[len]) - len++; - return len; -} - -#endif /* _WIMLIB_LZ_EXTEND_H */ diff --git a/include/wimlib/lz_hash.h b/include/wimlib/lz_hash.h deleted file mode 100644 index ae2264ed..00000000 --- a/include/wimlib/lz_hash.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * lz_hash.h - hashing for Lempel-Ziv matchfinding - * - * Copyright 2022 Eric Biggers - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef _LZ_HASH_H -#define _LZ_HASH_H - -#include "wimlib/types.h" - -/* - * The hash function: given a sequence prefix held in the low-order bits of a - * 32-bit value, multiply by a carefully-chosen large constant. Discard any - * bits of the product that don't fit in a 32-bit value, but take the - * next-highest @num_bits bits of the product as the hash value, as those have - * the most randomness. - */ -static forceinline u32 -lz_hash(u32 seq, unsigned num_bits) -{ - return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits); -} - -#endif /* _LZ_HASH_H */ diff --git a/include/wimlib/matchfinder_common.h b/include/wimlib/matchfinder_common.h new file mode 100644 index 00000000..bbb61f64 --- /dev/null +++ b/include/wimlib/matchfinder_common.h @@ -0,0 +1,130 @@ +/* + * matchfinder_common.h - common code for Lempel-Ziv matchfinding + * + * Copyright 2022 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _WIMLIB_MATCHFINDER_COMMON_H +#define _WIMLIB_MATCHFINDER_COMMON_H + +#include "wimlib/bitops.h" +#include "wimlib/unaligned.h" + +/* + * Given a 32-bit value that was loaded with the platform's native endianness, + * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24 + * bits contain the first 3 bytes, arranged in octets in a platform-dependent + * order, at the memory location from which the input 32-bit value was loaded. + */ +static forceinline u32 +loaded_u32_to_u24(u32 v) +{ + if (CPU_IS_LITTLE_ENDIAN()) + return v & 0xFFFFFF; + else + return v >> 8; +} + +/* + * Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value. + * The order in which the 3 bytes will be arranged as octets in the 24 bits is + * platform-dependent. At least 4 bytes (not 3) must be available at @p. + */ +static forceinline u32 +load_u24_unaligned(const u8 *p) +{ +#if UNALIGNED_ACCESS_IS_FAST + return loaded_u32_to_u24(load_u32_unaligned(p)); +#else + if (CPU_IS_LITTLE_ENDIAN()) + return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16); + else + return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16); +#endif +} + +/* + * The hash function: given a sequence prefix held in the low-order bits of a + * 32-bit value, multiply by a carefully-chosen large constant. Discard any + * bits of the product that don't fit in a 32-bit value, but take the + * next-highest @num_bits bits of the product as the hash value, as those have + * the most randomness. + */ +static forceinline u32 +lz_hash(u32 seq, unsigned num_bits) +{ + return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits); +} + +/* + * Return the number of bytes at @matchptr that match the bytes at @strptr, up + * to a maximum of @max_len. Initially, @start_len bytes are matched. + */ +static forceinline unsigned +lz_extend(const u8 * const strptr, const u8 * const matchptr, + const unsigned start_len, const unsigned max_len) +{ + unsigned len = start_len; + machine_word_t v_word; + + if (UNALIGNED_ACCESS_IS_FAST) { + + if (likely(max_len - len >= 4 * WORDBYTES)) { + + #define COMPARE_WORD_STEP \ + v_word = load_word_unaligned(&matchptr[len]) ^ \ + load_word_unaligned(&strptr[len]); \ + if (v_word != 0) \ + goto word_differs; \ + len += WORDBYTES; \ + + COMPARE_WORD_STEP + COMPARE_WORD_STEP + COMPARE_WORD_STEP + COMPARE_WORD_STEP + #undef COMPARE_WORD_STEP + } + + while (len + WORDBYTES <= max_len) { + v_word = load_word_unaligned(&matchptr[len]) ^ + load_word_unaligned(&strptr[len]); + if (v_word != 0) + goto word_differs; + len += WORDBYTES; + } + } + + while (len < max_len && matchptr[len] == strptr[len]) + len++; + return len; + +word_differs: + if (CPU_IS_LITTLE_ENDIAN()) + len += (bsfw(v_word) >> 3); + else + len += (WORDBITS - 1 - bsrw(v_word)) >> 3; + return len; +} + +#endif /* _WIMLIB_MATCHFINDER_COMMON_H */ diff --git a/include/wimlib/unaligned.h b/include/wimlib/unaligned.h index b8a3481b..7db293da 100644 --- a/include/wimlib/unaligned.h +++ b/include/wimlib/unaligned.h @@ -107,42 +107,4 @@ put_unaligned_le32(u32 v, u8 *p) } } -/* - * Given a 32-bit value that was loaded with the platform's native endianness, - * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24 - * bits contain the first 3 bytes, arranged in octets in a platform-dependent - * order, at the memory location from which the input 32-bit value was loaded. - */ -static forceinline u32 -loaded_u32_to_u24(u32 v) -{ - if (CPU_IS_LITTLE_ENDIAN) - return v & 0xFFFFFF; - else - return v >> 8; -} - -/* - * Load the next 3 bytes from the memory location @p into the 24 low-order bits - * of a 32-bit value. The order in which the 3 bytes will be arranged as octets - * in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES - * bytes must be available at @p; note that this may be more than 3. - */ -static forceinline u32 -load_u24_unaligned(const u8 *p) -{ -#if UNALIGNED_ACCESS_IS_FAST -# define LOAD_U24_REQUIRED_NBYTES 4 - return loaded_u32_to_u24(load_u32_unaligned(p)); -#else -# define LOAD_U24_REQUIRED_NBYTES 3 -# if CPU_IS_BIG_ENDIAN - return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16); -# else - return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16); -# endif -#endif -} - - #endif /* _WIMLIB_UNALIGNED_H */ diff --git a/src/lzms_compress.c b/src/lzms_compress.c index 09999957..6843859b 100644 --- a/src/lzms_compress.c +++ b/src/lzms_compress.c @@ -32,9 +32,8 @@ #include "wimlib/compressor_ops.h" #include "wimlib/error.h" #include "wimlib/lcpit_matchfinder.h" -#include "wimlib/lz_extend.h" -#include "wimlib/lz_hash.h" #include "wimlib/lzms_common.h" +#include "wimlib/matchfinder_common.h" #include "wimlib/unaligned.h" #include "wimlib/util.h" diff --git a/src/lzms_decompress.c b/src/lzms_decompress.c index 4dd36627..c81a2964 100644 --- a/src/lzms_decompress.c +++ b/src/lzms_decompress.c @@ -386,7 +386,7 @@ lzms_ensure_bits(struct lzms_input_bitstream *is, unsigned num_bits) avail = BITBUF_NBITS - is->bitsleft; - if (UNALIGNED_ACCESS_IS_FAST && CPU_IS_LITTLE_ENDIAN && + if (UNALIGNED_ACCESS_IS_FAST && CPU_IS_LITTLE_ENDIAN() && WORDBYTES == 8 && likely(is->next - is->begin >= 8)) { is->next -= (avail & ~15) >> 3; diff --git a/src/lzx_compress.c b/src/lzx_compress.c index 6e1deca8..453f97d4 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -182,7 +182,6 @@ #include "wimlib/compress_common.h" #include "wimlib/compressor_ops.h" #include "wimlib/error.h" -#include "wimlib/lz_extend.h" #include "wimlib/lzx_common.h" #include "wimlib/unaligned.h" #include "wimlib/util.h" -- 2.43.0