* This requires that the limit be no more than the length of offset_slot_tab_1
* (currently 32768).
*/
-static inline bool
+static forceinline bool
lzx_is_16_bit(size_t max_bufsize)
{
STATIC_ASSERT(ARRAY_LEN(((struct lzx_compressor *)0)->offset_slot_tab_1) == 32768);
/*
* Return the offset slot for the specified adjusted match offset.
*/
-static inline unsigned
+static forceinline unsigned
lzx_get_offset_slot(struct lzx_compressor *c, u32 adjusted_offset,
bool is_16_bit)
{
* Add some bits to the bitbuffer variable of the output bitstream. The caller
* must make sure there is enough room.
*/
-static inline void
+static forceinline void
lzx_add_bits(struct lzx_output_bitstream *os, u32 bits, unsigned num_bits)
{
os->bitbuf = (os->bitbuf << num_bits) | bits;
* specifies the maximum number of bits that may have been added since the last
* flush.
*/
-static inline void
+static forceinline void
lzx_flush_bits(struct lzx_output_bitstream *os, unsigned max_num_bits)
{
/* Masking the number of bits to shift is only needed to avoid undefined
}
/* Add at most 16 bits to the bitbuffer and flush it. */
-static inline void
+static forceinline void
lzx_write_bits(struct lzx_output_bitstream *os, u32 bits, unsigned num_bits)
{
lzx_add_bits(os, bits, num_bits);
const struct lzx_codes *codes)
{
const struct lzx_sequence *seq = sequences;
- u32 ones_if_aligned = 0 - (block_type == LZX_BLOCKTYPE_ALIGNED);
+ unsigned min_aligned_offset_slot;
+
+ if (block_type == LZX_BLOCKTYPE_ALIGNED)
+ min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
+ else
+ min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS;
for (;;) {
/* Output the next sequence. */
extra_bits = adjusted_offset - (lzx_offset_slot_base[offset_slot] +
LZX_OFFSET_ADJUSTMENT);
- #define MAX_MATCH_BITS (MAIN_CODEWORD_LIMIT + LENGTH_CODEWORD_LIMIT + \
- 14 + ALIGNED_CODEWORD_LIMIT)
+ #define MAX_MATCH_BITS (MAIN_CODEWORD_LIMIT + \
+ LENGTH_CODEWORD_LIMIT + \
+ LZX_MAX_NUM_EXTRA_BITS - \
+ LZX_NUM_ALIGNED_OFFSET_BITS + \
+ ALIGNED_CODEWORD_LIMIT)
/* Verify optimization is enabled on 64-bit */
STATIC_ASSERT(WORDBITS < 64 || CAN_BUFFER(MAX_MATCH_BITS));
* there are at least extra 3 offset bits required. All other
* extra offset bits are output verbatim. */
- if ((adjusted_offset & ones_if_aligned) >= 16) {
+ if (offset_slot >= min_aligned_offset_slot) {
lzx_add_bits(os, extra_bits >> LZX_NUM_ALIGNED_OFFSET_BITS,
num_extra_bits - LZX_NUM_ALIGNED_OFFSET_BITS);
if (!CAN_BUFFER(MAX_MATCH_BITS))
- lzx_flush_bits(os, 14);
+ lzx_flush_bits(os, LZX_MAX_NUM_EXTRA_BITS -
+ LZX_NUM_ALIGNED_OFFSET_BITS);
lzx_add_bits(os, codes->codewords.aligned[adjusted_offset &
LZX_ALIGNED_OFFSET_BITMASK],
if (!CAN_BUFFER(MAX_MATCH_BITS))
lzx_flush_bits(os, ALIGNED_CODEWORD_LIMIT);
} else {
- STATIC_ASSERT(CAN_BUFFER(17));
+ STATIC_ASSERT(CAN_BUFFER(LZX_MAX_NUM_EXTRA_BITS));
lzx_add_bits(os, extra_bits, num_extra_bits);
if (!CAN_BUFFER(MAX_MATCH_BITS))
- lzx_flush_bits(os, 17);
+ lzx_flush_bits(os, LZX_MAX_NUM_EXTRA_BITS);
}
if (CAN_BUFFER(MAX_MATCH_BITS))
/* Literal observation. Heuristic: use the top 2 bits and low 1 bits of the
* literal, for 8 possible literal observation types. */
-static inline void
+static forceinline void
lzx_observe_literal(struct lzx_block_split_stats *stats, u8 lit)
{
stats->new_observations[((lit >> 5) & 0x6) | (lit & 1)]++;
/* Match observation. Heuristic: use one observation type for "short match" and
* one observation type for "long match". */
-static inline void
+static forceinline void
lzx_observe_match(struct lzx_block_split_stats *stats, unsigned length)
{
stats->new_observations[NUM_LITERAL_OBSERVATION_TYPES + (length >= 5)]++;
((u64)1 << LZX_QUEUE_R1_SHIFT) | \
((u64)1 << LZX_QUEUE_R2_SHIFT) }
-static inline u64
+static forceinline u64
lzx_lru_queue_R0(struct lzx_lru_queue queue)
{
return (queue.R >> LZX_QUEUE_R0_SHIFT) & LZX_QUEUE_OFFSET_MASK;
}
-static inline u64
+static forceinline u64
lzx_lru_queue_R1(struct lzx_lru_queue queue)
{
return (queue.R >> LZX_QUEUE_R1_SHIFT) & LZX_QUEUE_OFFSET_MASK;
}
-static inline u64
+static forceinline u64
lzx_lru_queue_R2(struct lzx_lru_queue queue)
{
return (queue.R >> LZX_QUEUE_R2_SHIFT) & LZX_QUEUE_OFFSET_MASK;
}
/* Push a match offset onto the front (most recently used) end of the queue. */
-static inline struct lzx_lru_queue
+static forceinline struct lzx_lru_queue
lzx_lru_queue_push(struct lzx_lru_queue queue, u32 offset)
{
return (struct lzx_lru_queue) {
}
/* Swap a match offset to the front of the queue. */
-static inline struct lzx_lru_queue
+static forceinline struct lzx_lru_queue
lzx_lru_queue_swap(struct lzx_lru_queue queue, unsigned idx)
{
unsigned shift = idx * 21;
};
}
-static inline u32
+static forceinline u32
lzx_walk_item_list(struct lzx_compressor *c, u32 block_size, bool is_16_bit,
bool record)
{
/* Record a match. */
/* Tally the aligned offset symbol if needed. */
- if (adjusted_offset >= 16)
+ if (adjusted_offset >= LZX_MIN_ALIGNED_OFFSET + LZX_OFFSET_ADJUSTMENT)
c->freqs.aligned[adjusted_offset & LZX_ALIGNED_OFFSET_BITMASK]++;
/* Record the adjusted length. */
* beginning of the block), but this doesn't matter because this function only
* computes frequencies.
*/
-static inline void
+static forceinline void
lzx_tally_item_list(struct lzx_compressor *c, u32 block_size, bool is_16_bit)
{
lzx_walk_item_list(c, block_size, is_16_bit, false);
* first-to-last order. The return value is the index in c->chosen_sequences at
* which the lzx_sequences begin.
*/
-static inline u32
+static forceinline u32
lzx_record_item_list(struct lzx_compressor *c, u32 block_size, bool is_16_bit)
{
return lzx_walk_item_list(c, block_size, is_16_bit, true);
* one step ahead, with the exception of special consideration for "gap
* matches".
*/
-static inline struct lzx_lru_queue
+static forceinline struct lzx_lru_queue
lzx_find_min_cost_path(struct lzx_compressor * const restrict c,
const u8 * const restrict block_begin,
const u32 block_size,
u32 cost;
#if CONSIDER_ALIGNED_COSTS
- if (offset >= 16 - LZX_OFFSET_ADJUSTMENT)
+ if (offset >= LZX_MIN_ALIGNED_OFFSET)
base_cost += c->costs.aligned[adjusted_offset &
LZX_ALIGNED_OFFSET_BITMASK];
#endif
unsigned i;
#if CONSIDER_ALIGNED_COSTS
- if (offset_slot >= 8)
+ if (offset_slot >= LZX_MIN_ALIGNED_OFFSET_SLOT)
extra_cost -= LZX_NUM_ALIGNED_OFFSET_BITS * BIT_COST;
#endif
* for the block uses default costs; additional passes use costs derived from
* the Huffman codes computed in the previous pass.
*/
-static inline struct lzx_lru_queue
+static forceinline struct lzx_lru_queue
lzx_optimize_and_flush_block(struct lzx_compressor * const restrict c,
struct lzx_output_bitstream * const restrict os,
const u8 * const restrict block_begin,
* time, but rather to produce a compression ratio significantly better than a
* simpler "greedy" or "lazy" parse while still being relatively fast.
*/
-static inline void
+static forceinline void
lzx_compress_near_optimal(struct lzx_compressor * restrict c,
const u8 * const restrict in_begin, size_t in_nbytes,
struct lzx_output_bitstream * restrict os,
* Huffman symbol for the literal, increments the current literal run length,
* and "observes" the literal for the block split statistics.
*/
-static inline void
+static forceinline void
lzx_choose_literal(struct lzx_compressor *c, unsigned literal, u32 *litrunlen_p)
{
lzx_observe_literal(&c->split_stats, literal);
* literal run, updates the recent offsets queue, and "observes" the match for
* the block split statistics.
*/
-static inline void
+static forceinline void
lzx_choose_match(struct lzx_compressor *c, unsigned length, u32 adjusted_offset,
u32 recent_offsets[LZX_NUM_RECENT_OFFSETS], bool is_16_bit,
u32 *litrunlen_p, struct lzx_sequence **next_seq_p)
* which is just a literal run with no following match. This literal run might
* be empty.
*/
-static inline void
+static forceinline void
lzx_finish_sequence(struct lzx_sequence *last_seq, u32 litrunlen)
{
last_seq->litrunlen = litrunlen;
* offset matches, since those require fewer bits to encode.
*/
-static inline unsigned
+static forceinline unsigned
lzx_explicit_offset_match_score(unsigned len, u32 adjusted_offset)
{
unsigned score = len;
return score;
}
-static inline unsigned
+static forceinline unsigned
lzx_repeat_offset_match_score(unsigned rep_len, unsigned rep_idx)
{
return rep_len + 3;
* when we decide whether a match is "better" than another, we take the offset
* into consideration as well as the length.
*/
-static inline void
+static forceinline void
lzx_compress_lazy(struct lzx_compressor * restrict c,
const u8 * const restrict in_begin, size_t in_nbytes,
struct lzx_output_bitstream * restrict os, bool is_16_bit)