From 32158cb5b4df58eb71a1986762e5aaf12bce9d30 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Jun 2016 20:01:59 -0500 Subject: [PATCH] Rename WORDSIZE to WORDBYTES and introduce WORDBITS --- include/wimlib/bitops.h | 8 +++---- include/wimlib/decompress_common.h | 36 ++++++++++++++---------------- include/wimlib/lz_extend.h | 6 ++--- include/wimlib/types.h | 3 ++- src/decompress_common.c | 6 ++--- src/lzms_decompress.c | 2 +- src/lzx_compress.c | 6 ++--- 7 files changed, 33 insertions(+), 34 deletions(-) diff --git a/include/wimlib/bitops.h b/include/wimlib/bitops.h index 70e6c611..ed8b16cd 100644 --- a/include/wimlib/bitops.h +++ b/include/wimlib/bitops.h @@ -55,8 +55,8 @@ fls64(u64 v) static inline unsigned flsw(machine_word_t v) { - STATIC_ASSERT(WORDSIZE == 4 || WORDSIZE == 8); - if (WORDSIZE == 4) + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); + if (WORDBITS == 32) return fls32(v); else return fls64(v); @@ -93,8 +93,8 @@ ffs64(u64 v) static inline unsigned ffsw(machine_word_t v) { - STATIC_ASSERT(WORDSIZE == 4 || WORDSIZE == 8); - if (WORDSIZE == 4) + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); + if (WORDBITS == 32) return ffs32(v); else return ffs64(v); diff --git a/include/wimlib/decompress_common.h b/include/wimlib/decompress_common.h index f3f1dee5..a06ede14 100644 --- a/include/wimlib/decompress_common.h +++ b/include/wimlib/decompress_common.h @@ -274,12 +274,12 @@ repeat_byte(u8 b) { machine_word_t v; - STATIC_ASSERT(WORDSIZE == 4 || WORDSIZE == 8); + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); v = b; v |= v << 8; v |= v << 16; - v |= v << ((WORDSIZE == 8) ? 32 : 0); + v |= v << ((WORDBITS == 64) ? 32 : 0); return v; } @@ -310,13 +310,11 @@ lz_copy(u8 *dst, u32 length, u32 offset, const u8 *winend, u32 min_length) * example, if a word is 8 bytes and the match is of length 5, then * we'll simply copy 8 bytes. This is okay as long as we don't write * beyond the end of the output buffer, hence the check for (winend - - * end >= WORDSIZE - 1). + * end >= WORDBYTES - 1). */ - if (UNALIGNED_ACCESS_IS_FAST && - likely(winend - end >= WORDSIZE - 1)) - { + if (UNALIGNED_ACCESS_IS_FAST && likely(winend - end >= WORDBYTES - 1)) { - if (offset >= WORDSIZE) { + if (offset >= WORDBYTES) { /* The source and destination words don't overlap. */ /* To improve branch prediction, one iteration of this @@ -326,14 +324,14 @@ lz_copy(u8 *dst, u32 length, u32 offset, const u8 *winend, u32 min_length) * and we'll need to continue copying. */ copy_word_unaligned(src, dst); - src += WORDSIZE; - dst += WORDSIZE; + src += WORDBYTES; + dst += WORDBYTES; if (dst < end) { do { copy_word_unaligned(src, dst); - src += WORDSIZE; - dst += WORDSIZE; + src += WORDBYTES; + dst += WORDBYTES; } while (dst < end); } return; @@ -346,19 +344,19 @@ lz_copy(u8 *dst, u32 length, u32 offset, const u8 *winend, u32 min_length) machine_word_t v = repeat_byte(*(dst - 1)); do { store_word_unaligned(v, dst); - src += WORDSIZE; - dst += WORDSIZE; + src += WORDBYTES; + dst += WORDBYTES; } while (dst < end); return; } /* * We don't bother with special cases for other 'offset < - * WORDSIZE', which are usually rarer than 'offset == 1'. Extra - * checks will just slow things down. Actually, it's possible - * to handle all the 'offset < WORDSIZE' cases using the same - * code, but it still becomes more complicated doesn't seem any - * faster overall; it definitely slows down the more common - * 'offset == 1' case. + * WORDBYTES', which are usually rarer than 'offset == 1'. + * Extra checks will just slow things down. Actually, it's + * possible to handle all the 'offset < WORDBYTES' cases using + * the same code, but it still becomes more complicated doesn't + * seem any faster overall; it definitely slows down the more + * common 'offset == 1' case. */ } diff --git a/include/wimlib/lz_extend.h b/include/wimlib/lz_extend.h index 2fb76bc9..c4547e2b 100644 --- a/include/wimlib/lz_extend.h +++ b/include/wimlib/lz_extend.h @@ -32,17 +32,17 @@ static inline u32 lz_extend(const u8 * const strptr, const u8 * const matchptr, u32 len, const u32 max_len) { - while (UNALIGNED_ACCESS_IS_FAST && len + WORDSIZE <= max_len) { + while (UNALIGNED_ACCESS_IS_FAST && len + WORDBYTES <= max_len) { machine_word_t v = load_word_unaligned(matchptr + len) ^ load_word_unaligned(strptr + len); if (v != 0) { if (CPU_IS_LITTLE_ENDIAN) len += ffsw(v) >> 3; else - len += (8 * WORDSIZE - 1 - flsw(v)) >> 3; + len += (WORDBITS - 1 - flsw(v)) >> 3; return len; } - len += WORDSIZE; + len += WORDBYTES; } while (len < max_len && matchptr[len] == strptr[len]) diff --git a/include/wimlib/types.h b/include/wimlib/types.h index 87f46042..4c85311e 100644 --- a/include/wimlib/types.h +++ b/include/wimlib/types.h @@ -47,6 +47,7 @@ typedef struct WIMStruct WIMStruct; */ typedef size_t machine_word_t; -#define WORDSIZE sizeof(machine_word_t) +#define WORDBYTES sizeof(machine_word_t) +#define WORDBITS (8 * WORDBYTES) #endif /* _WIMLIB_TYPES_H */ diff --git a/src/decompress_common.c b/src/decompress_common.c index c927502c..973f467c 100644 --- a/src/decompress_common.c +++ b/src/decompress_common.c @@ -157,7 +157,7 @@ make_huffman_decode_table(u16 decode_table[const], unsigned decode_table_pos; #ifdef USE_WORD_FILL - const unsigned entries_per_word = WORDSIZE / sizeof(decode_table[0]); + const unsigned entries_per_word = WORDBYTES / sizeof(decode_table[0]); #endif #ifdef USE_SSE2_FILL @@ -291,11 +291,11 @@ make_huffman_decode_table(u16 decode_table[const], aliased_word_t *p; unsigned n; - STATIC_ASSERT(WORDSIZE == 4 || WORDSIZE == 8); + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); v = MAKE_DIRECT_ENTRY(sorted_syms[sym_idx], codeword_len); v |= v << 16; - v |= v << (WORDSIZE == 8 ? 32 : 0); + v |= v << (WORDBITS == 64 ? 32 : 0); p = (aliased_word_t *)decode_table_ptr; n = stores_per_loop; diff --git a/src/lzms_decompress.c b/src/lzms_decompress.c index e14ba590..1ea0ac6d 100644 --- a/src/lzms_decompress.c +++ b/src/lzms_decompress.c @@ -387,7 +387,7 @@ lzms_ensure_bits(struct lzms_input_bitstream *is, unsigned num_bits) avail = BITBUF_NBITS - is->bitsleft; if (UNALIGNED_ACCESS_IS_FAST && CPU_IS_LITTLE_ENDIAN && - WORDSIZE == 8 && likely(is->next - is->begin >= 8)) + WORDBYTES == 8 && likely(is->next - is->begin >= 8)) { is->next -= (avail & ~15) >> 3; is->bitbuf |= load_u64_unaligned(is->next) << (avail & 15); diff --git a/src/lzx_compress.c b/src/lzx_compress.c index 19e2daa2..588b81d8 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -542,7 +542,7 @@ struct lzx_output_bitstream { /* Can the specified number of bits always be added to 'bitbuf' after any * pending 16-bit coding units have been flushed? */ -#define CAN_BUFFER(n) ((n) <= (8 * sizeof(machine_word_t)) - 15) +#define CAN_BUFFER(n) ((n) <= WORDBITS - 15) /* * Initialize the output bitstream. @@ -892,7 +892,7 @@ lzx_write_sequences(struct lzx_output_bitstream *os, int block_type, if (litrunlen) { /* Is the literal run nonempty? */ /* Verify optimization is enabled on 64-bit */ - STATIC_ASSERT(sizeof(machine_word_t) < 8 || + STATIC_ASSERT(WORDBITS < 64 || CAN_BUFFER(4 * MAIN_CODEWORD_LIMIT)); if (CAN_BUFFER(4 * MAIN_CODEWORD_LIMIT)) { @@ -968,7 +968,7 @@ lzx_write_sequences(struct lzx_output_bitstream *os, int block_type, 14 + ALIGNED_CODEWORD_LIMIT) /* Verify optimization is enabled on 64-bit */ - STATIC_ASSERT(sizeof(machine_word_t) < 8 || CAN_BUFFER(MAX_MATCH_BITS)); + STATIC_ASSERT(WORDBITS < 64 || CAN_BUFFER(MAX_MATCH_BITS)); /* Output the main symbol for the match. */ -- 2.43.0