From: Eric Biggers Date: Sat, 9 Jul 2016 15:01:26 +0000 (-0500) Subject: Stop force-inlining everything marked 'inline' X-Git-Tag: v1.10.0~22 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=4a20aae0dd8469a352517a0b107416ffa99ccc55 Stop force-inlining everything marked 'inline' Instead, replace 'inline' with 'forceinline' in selected places. --- diff --git a/include/wimlib/bitops.h b/include/wimlib/bitops.h index 2c905057..1fc30f6c 100644 --- a/include/wimlib/bitops.h +++ b/include/wimlib/bitops.h @@ -30,7 +30,7 @@ * input value must be nonzero! */ -static inline unsigned +static forceinline unsigned bsr32(u32 v) { #ifdef compiler_bsr32 @@ -43,7 +43,7 @@ bsr32(u32 v) #endif } -static inline unsigned +static forceinline unsigned bsr64(u64 v) { #ifdef compiler_bsr64 @@ -56,7 +56,7 @@ bsr64(u64 v) #endif } -static inline unsigned +static forceinline unsigned bsrw(machine_word_t v) { STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); @@ -72,7 +72,7 @@ bsrw(machine_word_t v) * input value must be nonzero! */ -static inline unsigned +static forceinline unsigned bsf32(u32 v) { #ifdef compiler_bsf32 @@ -85,7 +85,7 @@ bsf32(u32 v) #endif } -static inline unsigned +static forceinline unsigned bsf64(u64 v) { #ifdef compiler_bsf64 @@ -98,7 +98,7 @@ bsf64(u64 v) #endif } -static inline unsigned +static forceinline unsigned bsfw(machine_word_t v) { STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); @@ -109,7 +109,7 @@ bsfw(machine_word_t v) } /* Return the log base 2 of 'n', rounded up to the nearest integer. */ -static inline unsigned +static forceinline unsigned ilog2_ceil(size_t n) { if (n <= 1) @@ -118,7 +118,7 @@ ilog2_ceil(size_t n) } /* Round 'n' up to the nearest power of 2 */ -static inline size_t +static forceinline size_t roundup_pow_of_2(size_t n) { return (size_t)1 << ilog2_ceil(n); diff --git a/include/wimlib/bt_matchfinder.h b/include/wimlib/bt_matchfinder.h index 39a2778b..05bd07d9 100644 --- a/include/wimlib/bt_matchfinder.h +++ b/include/wimlib/bt_matchfinder.h @@ -108,7 +108,7 @@ struct TEMPLATED(bt_matchfinder) { /* Return the number of bytes that must be allocated for a 'bt_matchfinder' that * can work with buffers up to the specified size. */ -static inline size_t +static forceinline size_t TEMPLATED(bt_matchfinder_size)(size_t max_bufsize) { return sizeof(struct TEMPLATED(bt_matchfinder)) + @@ -116,19 +116,19 @@ TEMPLATED(bt_matchfinder_size)(size_t max_bufsize) } /* Prepare the matchfinder for a new input buffer. */ -static inline void +static forceinline void TEMPLATED(bt_matchfinder_init)(struct TEMPLATED(bt_matchfinder) *mf) { memset(mf, 0, sizeof(*mf)); } -static inline mf_pos_t * +static forceinline mf_pos_t * TEMPLATED(bt_left_child)(struct TEMPLATED(bt_matchfinder) *mf, u32 node) { return &mf->child_tab[(node << 1) + 0]; } -static inline mf_pos_t * +static forceinline mf_pos_t * TEMPLATED(bt_right_child)(struct TEMPLATED(bt_matchfinder) *mf, u32 node) { return &mf->child_tab[(node << 1) + 1]; @@ -141,7 +141,7 @@ TEMPLATED(bt_right_child)(struct TEMPLATED(bt_matchfinder) *mf, u32 node) /* Advance the binary tree matchfinder by one byte, optionally recording * matches. @record_matches should be a compile-time constant. */ -static inline struct lz_match * +static forceinline struct lz_match * TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * const restrict mf, const u8 * const restrict in_begin, const ptrdiff_t cur_pos, @@ -323,7 +323,7 @@ TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * co * The return value is a pointer to the next available slot in the @lz_matchptr * array. (If no matches were found, this will be the same as @lz_matchptr.) */ -static inline struct lz_match * +static forceinline struct lz_match * TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) *mf, const u8 *in_begin, ptrdiff_t cur_pos, @@ -352,7 +352,7 @@ TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) *mf, * This is very similar to bt_matchfinder_get_matches() because both functions * must do hashing and tree re-rooting. */ -static inline void +static forceinline void TEMPLATED(bt_matchfinder_skip_position)(struct TEMPLATED(bt_matchfinder) *mf, const u8 *in_begin, ptrdiff_t cur_pos, diff --git a/include/wimlib/compiler.h b/include/wimlib/compiler.h index 6bce5d01..2b1923c2 100644 --- a/include/wimlib/compiler.h +++ b/include/wimlib/compiler.h @@ -50,10 +50,9 @@ # define WIMLIBAPI __attribute__((visibility("default"))) #endif -/* Declare that the annotated function should be inlined. Currently, we force - * the compiler to honor this because we use 'inline' in highly tuned code, e.g. - * compression codecs. */ -#define inline inline __attribute__((always_inline)) +/* Declare that the annotated function should always be inlined. This might be + * desirable in highly tuned code, e.g. compression codecs. */ +#define forceinline inline __attribute__((always_inline)) /* Declare that the annotated function should *not* be inlined. */ #define noinline __attribute__((noinline)) diff --git a/include/wimlib/decompress_common.h b/include/wimlib/decompress_common.h index d20085db..65eb2e4b 100644 --- a/include/wimlib/decompress_common.h +++ b/include/wimlib/decompress_common.h @@ -54,7 +54,7 @@ struct input_bitstream { }; /* Initialize a bitstream to read from the specified input buffer. */ -static inline void +static forceinline void init_input_bitstream(struct input_bitstream *is, const void *buffer, u32 size) { is->bitbuf = 0; @@ -73,7 +73,7 @@ init_input_bitstream(struct input_bitstream *is, const void *buffer, u32 size) /* Ensure the bit buffer variable for the bitstream contains at least @num_bits * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits() * may be called on the bitstream to peek or remove up to @num_bits bits. */ -static inline void +static forceinline void bitstream_ensure_bits(struct input_bitstream *is, const unsigned num_bits) { /* This currently works for at most 17 bits. */ @@ -106,7 +106,7 @@ overflow: /* Return the next @num_bits bits from the bitstream, without removing them. * There must be at least @num_bits remaining in the buffer variable, from a * previous call to bitstream_ensure_bits(). */ -static inline u32 +static forceinline u32 bitstream_peek_bits(const struct input_bitstream *is, const unsigned num_bits) { return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1); @@ -115,7 +115,7 @@ bitstream_peek_bits(const struct input_bitstream *is, const unsigned num_bits) /* Remove @num_bits from the bitstream. There must be at least @num_bits * remaining in the buffer variable, from a previous call to * bitstream_ensure_bits(). */ -static inline void +static forceinline void bitstream_remove_bits(struct input_bitstream *is, unsigned num_bits) { is->bitbuf <<= num_bits; @@ -125,7 +125,7 @@ bitstream_remove_bits(struct input_bitstream *is, unsigned num_bits) /* Remove and return @num_bits bits from the bitstream. There must be at least * @num_bits remaining in the buffer variable, from a previous call to * bitstream_ensure_bits(). */ -static inline u32 +static forceinline u32 bitstream_pop_bits(struct input_bitstream *is, unsigned num_bits) { u32 bits = bitstream_peek_bits(is, num_bits); @@ -134,7 +134,7 @@ bitstream_pop_bits(struct input_bitstream *is, unsigned num_bits) } /* Read and return the next @num_bits bits from the bitstream. */ -static inline u32 +static forceinline u32 bitstream_read_bits(struct input_bitstream *is, unsigned num_bits) { bitstream_ensure_bits(is, num_bits); @@ -142,7 +142,7 @@ bitstream_read_bits(struct input_bitstream *is, unsigned num_bits) } /* Read and return the next literal byte embedded in the bitstream. */ -static inline u8 +static forceinline u8 bitstream_read_byte(struct input_bitstream *is) { if (unlikely(is->end == is->next)) @@ -151,7 +151,7 @@ bitstream_read_byte(struct input_bitstream *is) } /* Read and return the next 16-bit integer embedded in the bitstream. */ -static inline u16 +static forceinline u16 bitstream_read_u16(struct input_bitstream *is) { u16 v; @@ -164,7 +164,7 @@ bitstream_read_u16(struct input_bitstream *is) } /* Read and return the next 32-bit integer embedded in the bitstream. */ -static inline u32 +static forceinline u32 bitstream_read_u32(struct input_bitstream *is) { u32 v; @@ -178,7 +178,7 @@ bitstream_read_u32(struct input_bitstream *is) /* Read into @dst_buffer an array of literal bytes embedded in the bitstream. * Return 0 if there were enough bytes remaining in the input, otherwise -1. */ -static inline int +static forceinline int bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count) { if (unlikely(is->end - is->next < count)) @@ -189,7 +189,7 @@ bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count) } /* Align the input bitstream on a coding-unit boundary. */ -static inline void +static forceinline void bitstream_align(struct input_bitstream *is) { is->bitsleft = 0; @@ -242,7 +242,7 @@ bitstream_align(struct input_bitstream *is) * XXX: This is mostly duplicated in lzms_decode_huffman_symbol() in * lzms_decompress.c; keep them in sync! */ -static inline unsigned +static forceinline unsigned read_huffsym(struct input_bitstream *is, const u16 decode_table[], unsigned table_bits, unsigned max_codeword_len) { @@ -414,13 +414,13 @@ make_huffman_decode_table(u16 decode_table[], unsigned num_syms, /* LZ match copying */ /*----------------------------------------------------------------------------*/ -static inline void +static forceinline void copy_word_unaligned(const void *src, void *dst) { store_word_unaligned(load_word_unaligned(src), dst); } -static inline machine_word_t +static forceinline machine_word_t repeat_u16(u16 b) { machine_word_t v = b; @@ -431,7 +431,7 @@ repeat_u16(u16 b) return v; } -static inline machine_word_t +static forceinline machine_word_t repeat_byte(u8 b) { return repeat_u16(((u16)b << 8) | b); @@ -450,7 +450,7 @@ repeat_byte(u8 b) * 'min_length' is a hint which specifies the minimum possible match length. * This should be a compile-time constant. */ -static inline int +static forceinline int lz_copy(u32 length, u32 offset, u8 *out_begin, u8 *out_next, u8 *out_end, u32 min_length) { diff --git a/include/wimlib/endianness.h b/include/wimlib/endianness.h index 9cea963b..ed0b7ec4 100644 --- a/include/wimlib/endianness.h +++ b/include/wimlib/endianness.h @@ -47,7 +47,7 @@ (((u64)(n) & 0x00FF000000000000) >> 40) | \ (((u64)(n) & 0xFF00000000000000) >> 56)) -static inline u16 do_bswap16(u16 n) +static forceinline u16 do_bswap16(u16 n) { #ifdef compiler_bswap16 return compiler_bswap16(n); @@ -56,7 +56,7 @@ static inline u16 do_bswap16(u16 n) #endif } -static inline u32 do_bswap32(u32 n) +static forceinline u32 do_bswap32(u32 n) { #ifdef compiler_bswap32 return compiler_bswap32(n); @@ -65,7 +65,7 @@ static inline u32 do_bswap32(u32 n) #endif } -static inline u64 do_bswap64(u64 n) +static forceinline u64 do_bswap64(u64 n) { #ifdef compiler_bswap64 return compiler_bswap64(n); diff --git a/include/wimlib/hc_matchfinder.h b/include/wimlib/hc_matchfinder.h index 1f552db2..aa2e4542 100644 --- a/include/wimlib/hc_matchfinder.h +++ b/include/wimlib/hc_matchfinder.h @@ -141,7 +141,7 @@ struct TEMPLATED(hc_matchfinder) { /* Return the number of bytes that must be allocated for a 'hc_matchfinder' that * can work with buffers up to the specified size. */ -static inline size_t +static forceinline size_t TEMPLATED(hc_matchfinder_size)(size_t max_bufsize) { return sizeof(struct TEMPLATED(hc_matchfinder)) + @@ -149,7 +149,7 @@ TEMPLATED(hc_matchfinder_size)(size_t max_bufsize) } /* Prepare the matchfinder for a new input buffer. */ -static inline void +static forceinline void TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf) { memset(mf, 0, sizeof(*mf)); @@ -184,7 +184,7 @@ TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf) * Return the length of the match found, or 'best_len' if no match longer than * 'best_len' was found. */ -static inline u32 +static forceinline u32 TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const restrict mf, const u8 * const restrict in_begin, const ptrdiff_t cur_pos, @@ -353,7 +353,7 @@ out: * * Returns @in_next + @count. */ -static inline const u8 * +static forceinline const u8 * TEMPLATED(hc_matchfinder_skip_positions)(struct TEMPLATED(hc_matchfinder) * const restrict mf, const u8 * const restrict in_begin, const ptrdiff_t cur_pos, diff --git a/include/wimlib/lz_extend.h b/include/wimlib/lz_extend.h index cbbe88fd..26f0ce5c 100644 --- a/include/wimlib/lz_extend.h +++ b/include/wimlib/lz_extend.h @@ -28,7 +28,7 @@ * Return the number of bytes at @matchptr that match the bytes at @strptr, up * to a maximum of @max_len. Initially, @len bytes are matched. */ -static inline u32 +static forceinline u32 lz_extend(const u8 * const strptr, const u8 * const matchptr, u32 len, const u32 max_len) { diff --git a/include/wimlib/lz_hash.h b/include/wimlib/lz_hash.h index 7416585a..f7618152 100644 --- a/include/wimlib/lz_hash.h +++ b/include/wimlib/lz_hash.h @@ -30,7 +30,7 @@ * next-highest @num_bits bits of the product as the hash value, as those have * the most randomness. */ -static inline u32 +static forceinline u32 lz_hash(u32 seq, unsigned num_bits) { return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits); diff --git a/include/wimlib/lzms_common.h b/include/wimlib/lzms_common.h index 6db35b77..b5071469 100644 --- a/include/wimlib/lzms_common.h +++ b/include/wimlib/lzms_common.h @@ -23,14 +23,14 @@ extern unsigned lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots); /* Return the offset slot for the specified offset */ -static inline unsigned +static forceinline unsigned lzms_get_offset_slot(u32 offset) { return lzms_get_slot(offset, lzms_offset_slot_base, LZMS_MAX_NUM_OFFSET_SYMS); } /* Return the length slot for the specified length */ -static inline unsigned +static forceinline unsigned lzms_get_length_slot(u32 length) { return lzms_get_slot(length, lzms_length_slot_base, LZMS_NUM_LENGTH_SYMS); @@ -71,7 +71,7 @@ extern void lzms_init_probabilities(struct lzms_probabilites *probs); /* Given a decoded or encoded bit, update the probability entry. */ -static inline void +static forceinline void lzms_update_probability_entry(struct lzms_probability_entry *entry, int bit) { STATIC_ASSERT(LZMS_PROBABILITY_DENOMINATOR == sizeof(entry->recent_bits) * 8); @@ -108,7 +108,7 @@ lzms_update_probability_entry(struct lzms_probability_entry *entry, int bit) /* Given a probability entry, return the chance out of * LZMS_PROBABILITY_DENOMINATOR that the next decoded bit will be a 0. */ -static inline u32 +static forceinline u32 lzms_get_probability(const struct lzms_probability_entry *prob_entry) { u32 prob = prob_entry->num_recent_zero_bits; diff --git a/include/wimlib/unaligned.h b/include/wimlib/unaligned.h index cc9f27f1..ead46295 100644 --- a/include/wimlib/unaligned.h +++ b/include/wimlib/unaligned.h @@ -30,13 +30,13 @@ struct type##_unaligned { \ type v; \ } _packed_attribute; \ \ -static inline type \ +static forceinline type \ load_##type##_unaligned(const void *p) \ { \ return ((const struct type##_unaligned *)p)->v; \ } \ \ -static inline void \ +static forceinline void \ store_##type##_unaligned(type val, void *p) \ { \ ((struct type##_unaligned *)p)->v = val; \ @@ -57,7 +57,7 @@ DEFINE_UNALIGNED_TYPE(machine_word_t); #define load_word_unaligned load_machine_word_t_unaligned #define store_word_unaligned store_machine_word_t_unaligned -static inline u16 +static forceinline u16 get_unaligned_le16(const u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) @@ -66,7 +66,7 @@ get_unaligned_le16(const u8 *p) return ((u16)p[1] << 8) | p[0]; } -static inline u32 +static forceinline u32 get_unaligned_le32(const u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) @@ -76,7 +76,7 @@ get_unaligned_le32(const u8 *p) ((u32)p[1] << 8) | p[0]; } -static inline void +static forceinline void put_unaligned_le16(u16 v, u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) { @@ -87,7 +87,7 @@ put_unaligned_le16(u16 v, u8 *p) } } -static inline void +static forceinline void put_unaligned_le32(u32 v, u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) { @@ -106,7 +106,7 @@ put_unaligned_le32(u32 v, u8 *p) * bits contain the first 3 bytes, arranged in octets in a platform-dependent * order, at the memory location from which the input 32-bit value was loaded. */ -static inline u32 +static forceinline u32 loaded_u32_to_u24(u32 v) { if (CPU_IS_LITTLE_ENDIAN) @@ -121,7 +121,7 @@ loaded_u32_to_u24(u32 v) * in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES * bytes must be available at @p; note that this may be more than 3. */ -static inline u32 +static forceinline u32 load_u24_unaligned(const u8 *p) { #if UNALIGNED_ACCESS_IS_FAST diff --git a/src/divsufsort.c b/src/divsufsort.c index 67536956..c80412f5 100644 --- a/src/divsufsort.c +++ b/src/divsufsort.c @@ -111,7 +111,7 @@ static const int lg_table[256]= { #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) -static inline +static forceinline int ss_ilg(int n) { #if SS_BLOCKSIZE == 0 @@ -154,7 +154,7 @@ static const int sqq_table[256] = { 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 }; -static inline +static forceinline int ss_isqrt(int x) { int y, e; @@ -187,7 +187,7 @@ ss_isqrt(int x) { /*---------------------------------------------------------------------------*/ /* Compares two suffixes. */ -static inline +static forceinline int ss_compare(const unsigned char *T, const int *p1, const int *p2, @@ -238,7 +238,7 @@ ss_insertionsort(const unsigned char *T, const int *PA, #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) -static inline +static forceinline void ss_fixdown(const unsigned char *Td, const int *PA, int *SA, int i, int size) { @@ -280,7 +280,7 @@ ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { /*---------------------------------------------------------------------------*/ /* Returns the median of three elements. */ -static inline +static forceinline int * ss_median3(const unsigned char *Td, const int *PA, int *v1, int *v2, int *v3) { @@ -293,7 +293,7 @@ ss_median3(const unsigned char *Td, const int *PA, } /* Returns the median of five elements. */ -static inline +static forceinline int * ss_median5(const unsigned char *Td, const int *PA, int *v1, int *v2, int *v3, int *v4, int *v5) { @@ -307,7 +307,7 @@ ss_median5(const unsigned char *Td, const int *PA, } /* Returns the pivot element. */ -static inline +static forceinline int * ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { int *middle; @@ -335,7 +335,7 @@ ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { /*---------------------------------------------------------------------------*/ /* Binary partition for substrings. */ -static inline +static forceinline int * ss_partition(const int *PA, int *first, int *last, int depth) { @@ -496,7 +496,7 @@ ss_mintrosort(const unsigned char *T, const int *PA, #if SS_BLOCKSIZE != 0 -static inline +static forceinline void ss_blockswap(int *a, int *b, int n) { int t; @@ -505,7 +505,7 @@ ss_blockswap(int *a, int *b, int n) { } } -static inline +static forceinline void ss_rotate(int *first, int *middle, int *last) { int *a, *b, t; @@ -865,7 +865,7 @@ sssort(const unsigned char *T, const int *PA, /*---------------------------------------------------------------------------*/ -static inline +static forceinline int tr_ilg(int n) { return (n & 0xffff0000) ? @@ -900,7 +900,7 @@ tr_insertionsort(const int *ISAd, int *first, int *last) { /*---------------------------------------------------------------------------*/ -static inline +static forceinline void tr_fixdown(const int *ISAd, int *SA, int i, int size) { int j, k; @@ -941,7 +941,7 @@ tr_heapsort(const int *ISAd, int *SA, int size) { /*---------------------------------------------------------------------------*/ /* Returns the median of three elements. */ -static inline +static forceinline int * tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } @@ -953,7 +953,7 @@ tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { } /* Returns the median of five elements. */ -static inline +static forceinline int * tr_median5(const int *ISAd, int *v1, int *v2, int *v3, int *v4, int *v5) { @@ -967,7 +967,7 @@ tr_median5(const int *ISAd, } /* Returns the pivot element. */ -static inline +static forceinline int * tr_pivot(const int *ISAd, int *first, int *last) { int *middle; @@ -1002,14 +1002,14 @@ struct _trbudget_t { int count; }; -static inline +static forceinline void trbudget_init(trbudget_t *budget, int chance, int incval) { budget->chance = chance; budget->remain = budget->incval = incval; } -static inline +static forceinline int trbudget_check(trbudget_t *budget, int size) { if(size <= budget->remain) { budget->remain -= size; return 1; } @@ -1022,7 +1022,7 @@ trbudget_check(trbudget_t *budget, int size) { /*---------------------------------------------------------------------------*/ -static inline +static forceinline void tr_partition(const int *ISAd, int *first, int *middle, int *last, diff --git a/src/encoding.c b/src/encoding.c index 6d40605b..9337c9a1 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -77,7 +77,7 @@ typedef unsigned (*decode_codepoint_fn)(const u8 *in, size_t remaining, /* Encode the Unicode codepoint @c and return the number of bytes used. */ typedef unsigned (*encode_codepoint_fn)(u32 c, u8 *out); -static inline unsigned +static forceinline unsigned utf8_decode_codepoint(const u8 *in, size_t remaining, bool validate, u32 *c_ret) { if (likely(in[0] < 0x80)) { /* U+0...U+7F */ @@ -124,7 +124,7 @@ invalid: return 1; } -static inline unsigned +static forceinline unsigned utf8_encode_codepoint(u32 c, u8 *out) { if (likely(c < 0x80)) { @@ -152,7 +152,7 @@ utf8_encode_codepoint(u32 c, u8 *out) return 4; } -static inline unsigned +static forceinline unsigned utf16le_decode_codepoint(const u8 *in, size_t remaining, bool validate, u32 *c_ret) { @@ -188,7 +188,7 @@ invalid: return min(remaining, 2); } -static inline unsigned +static forceinline unsigned utf16le_encode_codepoint(u32 c, u8 *out) { if (likely(c < 0x10000)) { @@ -213,7 +213,7 @@ utf16le_encode_codepoint(u32 c, u8 *out) * If the input string is malformed, return @ilseq_err with errno set to EILSEQ. * If out of memory, return WIMLIB_ERR_NOMEM with errno set to ENOMEM. */ -static inline int +static forceinline int convert_string(const u8 * const in, const size_t in_nbytes, u8 **out_ret, size_t *out_nbytes_ret, int ilseq_err, diff --git a/src/lcpit_matchfinder.c b/src/lcpit_matchfinder.c index 2562bfb2..8b9ffd9d 100644 --- a/src/lcpit_matchfinder.c +++ b/src/lcpit_matchfinder.c @@ -284,7 +284,7 @@ build_LCPIT(u32 intervals[restrict], u32 pos_data[restrict], const u32 n) * around by just continuing until we get to a link that actually takes us * higher in the tree. This can be described as a lazy-update scheme. */ -static inline u32 +static forceinline u32 lcpit_advance_one_byte(const u32 cur_pos, u32 pos_data[restrict], u32 intervals[restrict], @@ -486,7 +486,7 @@ build_LCPIT_huge(u64 intervals64[restrict], u32 pos_data[restrict], const u32 n) /* Like lcpit_advance_one_byte(), but for buffers larger than * MAX_NORMAL_BUFSIZE. */ -static inline u32 +static forceinline u32 lcpit_advance_one_byte_huge(const u32 cur_pos, u32 pos_data[restrict], u64 intervals64[restrict], @@ -538,14 +538,14 @@ lcpit_advance_one_byte_huge(const u32 cur_pos, return matchptr - matches; } -static inline u64 +static forceinline u64 get_pos_data_size(size_t max_bufsize) { return (u64)max((u64)max_bufsize + PREFETCH_SAFETY, DIVSUFSORT_TMP_LEN) * sizeof(u32); } -static inline u64 +static forceinline u64 get_intervals_size(size_t max_bufsize) { return ((u64)max_bufsize + PREFETCH_SAFETY) * diff --git a/src/lzms_common.c b/src/lzms_common.c index 57c17d4d..380b7431 100644 --- a/src/lzms_common.c +++ b/src/lzms_common.c @@ -379,7 +379,7 @@ lzms_dilute_symbol_frequencies(u32 freqs[], unsigned num_syms) #ifdef __x86_64__ -static inline u8 * +static forceinline u8 * find_next_opcode_sse4_2(u8 *p) { const __v16qi potential_opcodes = (__v16qi) {0x48, 0x4C, 0xE8, 0xE9, 0xF0, 0xFF}; @@ -401,7 +401,7 @@ find_next_opcode_sse4_2(u8 *p) } #endif /* __x86_64__ */ -static inline u8 * +static forceinline u8 * find_next_opcode_default(u8 *p) { /* @@ -433,7 +433,7 @@ find_next_opcode_default(u8 *p) return p; } -static inline u8 * +static forceinline u8 * translate_if_needed(u8 *data, u8 *p, s32 *last_x86_pos, s32 last_target_usages[], bool undo) { diff --git a/src/lzms_compress.c b/src/lzms_compress.c index 8ee9e818..09999957 100644 --- a/src/lzms_compress.c +++ b/src/lzms_compress.c @@ -427,7 +427,7 @@ lzms_init_offset_slot_tabs(struct lzms_compressor *c) * Return the length slot for the specified match length, using the compressor's * acceleration table if the length is small enough. */ -static inline unsigned +static forceinline unsigned lzms_comp_get_length_slot(const struct lzms_compressor *c, u32 length) { if (likely(length <= MAX_FAST_LENGTH)) @@ -439,7 +439,7 @@ lzms_comp_get_length_slot(const struct lzms_compressor *c, u32 length) * Return the offset slot for the specified match offset, using the compressor's * acceleration tables to speed up the mapping. */ -static inline unsigned +static forceinline unsigned lzms_comp_get_offset_slot(const struct lzms_compressor *c, u32 offset) { if (offset < 0xe4a5) @@ -529,7 +529,7 @@ lzms_range_encoder_flush(struct lzms_range_encoder *rc) * @prob is the probability out of LZMS_PROBABILITY_DENOMINATOR that the next * bit is 0 rather than 1. */ -static inline void +static forceinline void lzms_range_encode_bit(struct lzms_range_encoder *rc, int bit, u32 prob) { /* Normalize if needed. */ @@ -551,7 +551,7 @@ lzms_range_encode_bit(struct lzms_range_encoder *rc, int bit, u32 prob) * Encode a bit. This wraps around lzms_range_encode_bit() to handle using and * updating the state and its corresponding probability entry. */ -static inline void +static forceinline void lzms_encode_bit(int bit, unsigned *state_p, unsigned num_states, struct lzms_probability_entry *probs, struct lzms_range_encoder *rc) @@ -644,7 +644,7 @@ lzms_output_bitstream_init(struct lzms_output_bitstream *os, * @max_num_bits is a compile-time constant that specifies the maximum number of * bits that can ever be written at this call site. */ -static inline void +static forceinline void lzms_write_bits(struct lzms_output_bitstream *os, const u32 bits, const unsigned num_bits, const unsigned max_num_bits) { @@ -725,7 +725,7 @@ lzms_rebuild_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info) * Encode a symbol using the specified Huffman code. Then, if the Huffman code * needs to be rebuilt, rebuild it and return true; otherwise return false. */ -static inline bool +static forceinline bool lzms_huffman_encode_symbol(unsigned sym, const u32 *codewords, const u8 *lens, u32 *freqs, struct lzms_output_bitstream *os, @@ -936,7 +936,7 @@ lzms_encode_nonempty_item_list(struct lzms_compressor *c, } while (cur_node != end_node); } -static inline void +static forceinline void lzms_encode_item_list(struct lzms_compressor *c, struct lzms_optimum_node *end_node) { @@ -1003,14 +1003,14 @@ lzms_compute_bit_costs(void) #endif /* Return the cost to encode a 0 bit in the specified context. */ -static inline u32 +static forceinline u32 lzms_bit_0_cost(unsigned state, const struct lzms_probability_entry *probs) { return lzms_bit_costs[probs[state].num_recent_zero_bits]; } /* Return the cost to encode a 1 bit in the specified context. */ -static inline u32 +static forceinline u32 lzms_bit_1_cost(unsigned state, const struct lzms_probability_entry *probs) { return lzms_bit_costs[LZMS_PROBABILITY_DENOMINATOR - @@ -1018,7 +1018,7 @@ lzms_bit_1_cost(unsigned state, const struct lzms_probability_entry *probs) } /* Return the cost to encode a literal, including the main bit. */ -static inline u32 +static forceinline u32 lzms_literal_cost(struct lzms_compressor *c, unsigned main_state, unsigned literal) { return lzms_bit_0_cost(main_state, c->probs.main) + @@ -1043,14 +1043,14 @@ lzms_update_fast_length_costs(struct lzms_compressor *c) /* Return the cost to encode the specified match length, which must not exceed * MAX_FAST_LENGTH. */ -static inline u32 +static forceinline u32 lzms_fast_length_cost(const struct lzms_compressor *c, u32 length) { return c->fast_length_cost_tab[length]; } /* Return the cost to encode the specified LZ match offset. */ -static inline u32 +static forceinline u32 lzms_lz_offset_cost(const struct lzms_compressor *c, u32 offset) { unsigned slot = lzms_comp_get_offset_slot(c, offset); @@ -1059,7 +1059,7 @@ lzms_lz_offset_cost(const struct lzms_compressor *c, u32 offset) } /* Return the cost to encode the specified delta power and raw offset. */ -static inline u32 +static forceinline u32 lzms_delta_source_cost(const struct lzms_compressor *c, u32 power, u32 raw_offset) { unsigned slot = lzms_comp_get_offset_slot(c, raw_offset); @@ -1122,31 +1122,31 @@ lzms_update_lru_queues(struct lzms_adaptive_state *state) state->prev_delta_pair = state->upcoming_delta_pair; } -static inline void +static forceinline void lzms_update_state(u8 *state_p, int bit, unsigned num_states) { *state_p = ((*state_p << 1) | bit) & (num_states - 1); } -static inline void +static forceinline void lzms_update_main_state(struct lzms_adaptive_state *state, int is_match) { lzms_update_state(&state->main_state, is_match, LZMS_NUM_MAIN_PROBS); } -static inline void +static forceinline void lzms_update_match_state(struct lzms_adaptive_state *state, int is_delta) { lzms_update_state(&state->match_state, is_delta, LZMS_NUM_MATCH_PROBS); } -static inline void +static forceinline void lzms_update_lz_state(struct lzms_adaptive_state *state, int is_rep) { lzms_update_state(&state->lz_state, is_rep, LZMS_NUM_LZ_PROBS); } -static inline void +static forceinline void lzms_update_lz_rep_states(struct lzms_adaptive_state *state, int rep_idx) { for (int i = 0; i < rep_idx; i++) @@ -1156,13 +1156,13 @@ lzms_update_lz_rep_states(struct lzms_adaptive_state *state, int rep_idx) lzms_update_state(&state->lz_rep_states[rep_idx], 0, LZMS_NUM_LZ_REP_PROBS); } -static inline void +static forceinline void lzms_update_delta_state(struct lzms_adaptive_state *state, int is_rep) { lzms_update_state(&state->delta_state, is_rep, LZMS_NUM_DELTA_PROBS); } -static inline void +static forceinline void lzms_update_delta_rep_states(struct lzms_adaptive_state *state, int rep_idx) { for (int i = 0; i < rep_idx; i++) @@ -1199,7 +1199,7 @@ lzms_init_delta_matchfinder(struct lzms_compressor *c) * NBYTES_HASHED_FOR_DELTA bytes of the sequence beginning at @p when taken in a * delta context with the specified @span. */ -static inline u32 +static forceinline u32 lzms_delta_hash(const u8 *p, const u32 pos, u32 span) { /* A delta match has a certain span and an offset that is a multiple of @@ -1222,7 +1222,7 @@ lzms_delta_hash(const u8 *p, const u32 pos, u32 span) * specified @span and having the initial @len, extend the match as far as * possible, up to a limit of @max_len. */ -static inline u32 +static forceinline u32 lzms_extend_delta_match(const u8 *in_next, const u8 *matchptr, u32 len, u32 max_len, u32 span) { diff --git a/src/lzms_decompress.c b/src/lzms_decompress.c index 2ef2debd..4dd36627 100644 --- a/src/lzms_decompress.c +++ b/src/lzms_decompress.c @@ -376,7 +376,7 @@ lzms_input_bitstream_init(struct lzms_input_bitstream *is, /* Ensure that at least @num_bits bits are in the bitbuffer variable. * @num_bits cannot be more than 32. */ -static inline void +static forceinline void lzms_ensure_bits(struct lzms_input_bitstream *is, unsigned num_bits) { unsigned avail; @@ -408,14 +408,14 @@ lzms_ensure_bits(struct lzms_input_bitstream *is, unsigned num_bits) } /* Get @num_bits bits from the bitbuffer variable. */ -static inline bitbuf_t +static forceinline bitbuf_t lzms_peek_bits(struct lzms_input_bitstream *is, unsigned num_bits) { return (is->bitbuf >> 1) >> (BITBUF_NBITS - num_bits - 1); } /* Remove @num_bits bits from the bitbuffer variable. */ -static inline void +static forceinline void lzms_remove_bits(struct lzms_input_bitstream *is, unsigned num_bits) { is->bitbuf <<= num_bits; @@ -423,7 +423,7 @@ lzms_remove_bits(struct lzms_input_bitstream *is, unsigned num_bits) } /* Remove and return @num_bits bits from the bitbuffer variable. */ -static inline bitbuf_t +static forceinline bitbuf_t lzms_pop_bits(struct lzms_input_bitstream *is, unsigned num_bits) { bitbuf_t bits = lzms_peek_bits(is, num_bits); @@ -432,7 +432,7 @@ lzms_pop_bits(struct lzms_input_bitstream *is, unsigned num_bits) } /* Read @num_bits bits from the input bitstream. */ -static inline bitbuf_t +static forceinline bitbuf_t lzms_read_bits(struct lzms_input_bitstream *is, unsigned num_bits) { lzms_ensure_bits(is, num_bits); @@ -457,7 +457,7 @@ lzms_range_decoder_init(struct lzms_range_decoder *rd, * probability entry to use. The state and probability entry will be updated * based on the decoded bit. */ -static inline int +static forceinline int lzms_decode_bit(struct lzms_range_decoder *rd, u32 *state_p, u32 num_states, struct lzms_probability_entry *probs) { @@ -597,7 +597,7 @@ lzms_rebuild_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info) /* XXX: mostly copied from read_huffsym() in decompress_common.h because LZMS * needs its own bitstream */ -static inline unsigned +static forceinline unsigned lzms_decode_huffman_symbol(struct lzms_input_bitstream *is, u16 decode_table[], unsigned table_bits, u32 freqs[], struct lzms_huffman_rebuild_info *rebuild_info) @@ -627,7 +627,7 @@ lzms_decode_huffman_symbol(struct lzms_input_bitstream *is, u16 decode_table[], return symbol; } -static inline unsigned +static forceinline unsigned lzms_decode_literal(struct lzms_decompressor *d, struct lzms_input_bitstream *is) { @@ -638,7 +638,7 @@ lzms_decode_literal(struct lzms_decompressor *d, &d->literal_rebuild_info); } -static inline u32 +static forceinline u32 lzms_decode_lz_offset(struct lzms_decompressor *d, struct lzms_input_bitstream *is) { @@ -651,7 +651,7 @@ lzms_decode_lz_offset(struct lzms_decompressor *d, lzms_read_bits(is, lzms_extra_offset_bits[slot]); } -static inline u32 +static forceinline u32 lzms_decode_length(struct lzms_decompressor *d, struct lzms_input_bitstream *is) { @@ -668,7 +668,7 @@ lzms_decode_length(struct lzms_decompressor *d, return length; } -static inline u32 +static forceinline u32 lzms_decode_delta_offset(struct lzms_decompressor *d, struct lzms_input_bitstream *is) { @@ -681,7 +681,7 @@ lzms_decode_delta_offset(struct lzms_decompressor *d, lzms_read_bits(is, lzms_extra_offset_bits[slot]); } -static inline unsigned +static forceinline unsigned lzms_decode_delta_power(struct lzms_decompressor *d, struct lzms_input_bitstream *is) { diff --git a/src/lzx_compress.c b/src/lzx_compress.c index 10b51902..21636d49 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -488,7 +488,7 @@ struct lzx_compressor { * This requires that the limit be no more than the length of offset_slot_tab_1 * (currently 32768). */ -static inline bool +static forceinline bool lzx_is_16_bit(size_t max_bufsize) { STATIC_ASSERT(ARRAY_LEN(((struct lzx_compressor *)0)->offset_slot_tab_1) == 32768); @@ -498,7 +498,7 @@ lzx_is_16_bit(size_t max_bufsize) /* * Return the offset slot for the specified adjusted match offset. */ -static inline unsigned +static forceinline unsigned lzx_get_offset_slot(struct lzx_compressor *c, u32 adjusted_offset, bool is_16_bit) { @@ -574,7 +574,7 @@ lzx_init_output(struct lzx_output_bitstream *os, void *buffer, size_t size) * Add some bits to the bitbuffer variable of the output bitstream. The caller * must make sure there is enough room. */ -static inline void +static forceinline void lzx_add_bits(struct lzx_output_bitstream *os, u32 bits, unsigned num_bits) { os->bitbuf = (os->bitbuf << num_bits) | bits; @@ -586,7 +586,7 @@ lzx_add_bits(struct lzx_output_bitstream *os, u32 bits, unsigned num_bits) * specifies the maximum number of bits that may have been added since the last * flush. */ -static inline void +static forceinline void lzx_flush_bits(struct lzx_output_bitstream *os, unsigned max_num_bits) { /* Masking the number of bits to shift is only needed to avoid undefined @@ -609,7 +609,7 @@ lzx_flush_bits(struct lzx_output_bitstream *os, unsigned max_num_bits) } /* Add at most 16 bits to the bitbuffer and flush it. */ -static inline void +static forceinline void lzx_write_bits(struct lzx_output_bitstream *os, u32 bits, unsigned num_bits) { lzx_add_bits(os, bits, num_bits); @@ -1218,7 +1218,7 @@ lzx_init_block_split_stats(struct lzx_block_split_stats *stats) /* Literal observation. Heuristic: use the top 2 bits and low 1 bits of the * literal, for 8 possible literal observation types. */ -static inline void +static forceinline void lzx_observe_literal(struct lzx_block_split_stats *stats, u8 lit) { stats->new_observations[((lit >> 5) & 0x6) | (lit & 1)]++; @@ -1227,7 +1227,7 @@ lzx_observe_literal(struct lzx_block_split_stats *stats, u8 lit) /* Match observation. Heuristic: use one observation type for "short match" and * one observation type for "long match". */ -static inline void +static forceinline void lzx_observe_match(struct lzx_block_split_stats *stats, unsigned length) { stats->new_observations[NUM_LITERAL_OBSERVATION_TYPES + (length >= 5)]++; @@ -1298,26 +1298,26 @@ struct lzx_lru_queue { ((u64)1 << LZX_QUEUE_R1_SHIFT) | \ ((u64)1 << LZX_QUEUE_R2_SHIFT) } -static inline u64 +static forceinline u64 lzx_lru_queue_R0(struct lzx_lru_queue queue) { return (queue.R >> LZX_QUEUE_R0_SHIFT) & LZX_QUEUE_OFFSET_MASK; } -static inline u64 +static forceinline u64 lzx_lru_queue_R1(struct lzx_lru_queue queue) { return (queue.R >> LZX_QUEUE_R1_SHIFT) & LZX_QUEUE_OFFSET_MASK; } -static inline u64 +static forceinline u64 lzx_lru_queue_R2(struct lzx_lru_queue queue) { return (queue.R >> LZX_QUEUE_R2_SHIFT) & LZX_QUEUE_OFFSET_MASK; } /* Push a match offset onto the front (most recently used) end of the queue. */ -static inline struct lzx_lru_queue +static forceinline struct lzx_lru_queue lzx_lru_queue_push(struct lzx_lru_queue queue, u32 offset) { return (struct lzx_lru_queue) { @@ -1326,7 +1326,7 @@ lzx_lru_queue_push(struct lzx_lru_queue queue, u32 offset) } /* Swap a match offset to the front of the queue. */ -static inline struct lzx_lru_queue +static forceinline struct lzx_lru_queue lzx_lru_queue_swap(struct lzx_lru_queue queue, unsigned idx) { unsigned shift = idx * 21; @@ -1340,7 +1340,7 @@ lzx_lru_queue_swap(struct lzx_lru_queue queue, unsigned idx) }; } -static inline u32 +static forceinline u32 lzx_walk_item_list(struct lzx_compressor *c, u32 block_size, bool is_16_bit, bool record) { @@ -1475,7 +1475,7 @@ lzx_walk_item_list(struct lzx_compressor *c, u32 block_size, bool is_16_bit, * beginning of the block), but this doesn't matter because this function only * computes frequencies. */ -static inline void +static forceinline void lzx_tally_item_list(struct lzx_compressor *c, u32 block_size, bool is_16_bit) { lzx_walk_item_list(c, block_size, is_16_bit, false); @@ -1490,7 +1490,7 @@ lzx_tally_item_list(struct lzx_compressor *c, u32 block_size, bool is_16_bit) * first-to-last order. The return value is the index in c->chosen_sequences at * which the lzx_sequences begin. */ -static inline u32 +static forceinline u32 lzx_record_item_list(struct lzx_compressor *c, u32 block_size, bool is_16_bit) { return lzx_walk_item_list(c, block_size, is_16_bit, true); @@ -1530,7 +1530,7 @@ lzx_record_item_list(struct lzx_compressor *c, u32 block_size, bool is_16_bit) * one step ahead, with the exception of special consideration for "gap * matches". */ -static inline struct lzx_lru_queue +static forceinline struct lzx_lru_queue lzx_find_min_cost_path(struct lzx_compressor * const restrict c, const u8 * const restrict block_begin, const u32 block_size, @@ -2095,7 +2095,7 @@ lzx_set_costs_from_codes(struct lzx_compressor *c) * for the block uses default costs; additional passes use costs derived from * the Huffman codes computed in the previous pass. */ -static inline struct lzx_lru_queue +static forceinline struct lzx_lru_queue lzx_optimize_and_flush_block(struct lzx_compressor * const restrict c, struct lzx_output_bitstream * const restrict os, const u8 * const restrict block_begin, @@ -2144,7 +2144,7 @@ lzx_optimize_and_flush_block(struct lzx_compressor * const restrict c, * time, but rather to produce a compression ratio significantly better than a * simpler "greedy" or "lazy" parse while still being relatively fast. */ -static inline void +static forceinline void lzx_compress_near_optimal(struct lzx_compressor * restrict c, const u8 * const restrict in_begin, size_t in_nbytes, struct lzx_output_bitstream * restrict os, @@ -2349,7 +2349,7 @@ lzx_compress_near_optimal_32(struct lzx_compressor *c, const u8 *in, * Huffman symbol for the literal, increments the current literal run length, * and "observes" the literal for the block split statistics. */ -static inline void +static forceinline void lzx_choose_literal(struct lzx_compressor *c, unsigned literal, u32 *litrunlen_p) { lzx_observe_literal(&c->split_stats, literal); @@ -2363,7 +2363,7 @@ lzx_choose_literal(struct lzx_compressor *c, unsigned literal, u32 *litrunlen_p) * literal run, updates the recent offsets queue, and "observes" the match for * the block split statistics. */ -static inline void +static forceinline void lzx_choose_match(struct lzx_compressor *c, unsigned length, u32 adjusted_offset, u32 recent_offsets[LZX_NUM_RECENT_OFFSETS], bool is_16_bit, u32 *litrunlen_p, struct lzx_sequence **next_seq_p) @@ -2425,7 +2425,7 @@ lzx_choose_match(struct lzx_compressor *c, unsigned length, u32 adjusted_offset, * which is just a literal run with no following match. This literal run might * be empty. */ -static inline void +static forceinline void lzx_finish_sequence(struct lzx_sequence *last_seq, u32 litrunlen) { last_seq->litrunlen = litrunlen; @@ -2492,7 +2492,7 @@ lzx_find_longest_repeat_offset_match(const u8 * const in_next, * offset matches, since those require fewer bits to encode. */ -static inline unsigned +static forceinline unsigned lzx_explicit_offset_match_score(unsigned len, u32 adjusted_offset) { unsigned score = len; @@ -2505,7 +2505,7 @@ lzx_explicit_offset_match_score(unsigned len, u32 adjusted_offset) return score; } -static inline unsigned +static forceinline unsigned lzx_repeat_offset_match_score(unsigned rep_len, unsigned rep_idx) { return rep_len + 3; @@ -2523,7 +2523,7 @@ lzx_repeat_offset_match_score(unsigned rep_len, unsigned rep_idx) * when we decide whether a match is "better" than another, we take the offset * into consideration as well as the length. */ -static inline void +static forceinline void lzx_compress_lazy(struct lzx_compressor * restrict c, const u8 * const restrict in_begin, size_t in_nbytes, struct lzx_output_bitstream * restrict os, bool is_16_bit) diff --git a/src/lzx_decompress.c b/src/lzx_decompress.c index cce98e32..299b5409 100644 --- a/src/lzx_decompress.c +++ b/src/lzx_decompress.c @@ -118,7 +118,7 @@ struct lzx_decompressor { } _aligned_attribute(DECODE_TABLE_ALIGNMENT); /* Read a Huffman-encoded symbol using the precode. */ -static inline unsigned +static forceinline unsigned read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->precode_decode_table, @@ -126,7 +126,7 @@ read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) } /* Read a Huffman-encoded symbol using the main code. */ -static inline unsigned +static forceinline unsigned read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->maincode_decode_table, @@ -134,7 +134,7 @@ read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) } /* Read a Huffman-encoded symbol using the length code. */ -static inline unsigned +static forceinline unsigned read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->lencode_decode_table, @@ -142,7 +142,7 @@ read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) } /* Read a Huffman-encoded symbol using the aligned offset code. */ -static inline unsigned +static forceinline unsigned read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->alignedcode_decode_table, diff --git a/src/xpress_compress.c b/src/xpress_compress.c index 99a4b46a..1b430912 100644 --- a/src/xpress_compress.c +++ b/src/xpress_compress.c @@ -279,7 +279,7 @@ xpress_init_output(struct xpress_output_bitstream *os, void *buffer, size_t size * If the output buffer space is exhausted, then the bits will be ignored, and * xpress_flush_output() will return 0 when it gets called. */ -static inline void +static forceinline void xpress_write_bits(struct xpress_output_bitstream *os, const u32 bits, const unsigned num_bits) { @@ -303,7 +303,7 @@ xpress_write_bits(struct xpress_output_bitstream *os, /* * Interweave a literal byte into the output bitstream. */ -static inline void +static forceinline void xpress_write_byte(struct xpress_output_bitstream *os, u8 byte) { if (os->next_byte < os->end) @@ -313,7 +313,7 @@ xpress_write_byte(struct xpress_output_bitstream *os, u8 byte) /* * Interweave two literal bytes into the output bitstream. */ -static inline void +static forceinline void xpress_write_u16(struct xpress_output_bitstream *os, u16 v) { if (os->end - os->next_byte >= 2) { @@ -338,7 +338,7 @@ xpress_flush_output(struct xpress_output_bitstream *os) return os->next_byte - os->start; } -static inline void +static forceinline void xpress_write_extra_length_bytes(struct xpress_output_bitstream *os, unsigned adjusted_len) { @@ -353,7 +353,7 @@ xpress_write_extra_length_bytes(struct xpress_output_bitstream *os, } /* Output a match or literal. */ -static inline void +static forceinline void xpress_write_item(struct xpress_item item, struct xpress_output_bitstream *os, const u32 codewords[], const u8 lens[]) { @@ -484,7 +484,7 @@ xpress_write(struct xpress_compressor *c, void *out, size_t out_nbytes_avail, /* Tally the Huffman symbol for a literal and return the intermediate * representation of that literal. */ -static inline struct xpress_item +static forceinline struct xpress_item xpress_record_literal(struct xpress_compressor *c, unsigned literal) { c->freqs[literal]++; @@ -496,7 +496,7 @@ xpress_record_literal(struct xpress_compressor *c, unsigned literal) /* Tally the Huffman symbol for a match and return the intermediate * representation of that match. */ -static inline struct xpress_item +static forceinline struct xpress_item xpress_record_match(struct xpress_compressor *c, unsigned length, unsigned offset) { unsigned adjusted_len = length - XPRESS_MIN_MATCH_LEN;