X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzms_compress.c;h=09999957278befa0018c86f670506fba0cdb3120;hp=ef67edb213f79fa2c1bf2b1057568a28e93d7fa5;hb=4a20aae0dd8469a352517a0b107416ffa99ccc55;hpb=de58d5f57732df8129fbfd71d46ae5968ac59646 diff --git a/src/lzms_compress.c b/src/lzms_compress.c index ef67edb2..09999957 100644 --- a/src/lzms_compress.c +++ b/src/lzms_compress.c @@ -83,13 +83,13 @@ struct lzms_output_bitstream { /* Number of bits currently held in @bitbuf */ unsigned bitcount; - /* Pointer to one past the next position in the output buffer at which - * to output a 16-bit coding unit */ - le16 *next; - /* Pointer to the beginning of the output buffer (this is the "end" when * writing backwards!) */ - le16 *begin; + u8 *begin; + + /* Pointer to just past the next position in the output buffer at which + * to output a 16-bit coding unit */ + u8 *next; }; /* This structure tracks the state of range encoding and its output, which @@ -112,14 +112,14 @@ struct lzms_range_encoder { u32 cache_size; /* Pointer to the beginning of the output buffer */ - le16 *begin; + u8 *begin; /* Pointer to the position in the output buffer at which the next coding * unit must be written */ - le16 *next; + u8 *next; /* Pointer to just past the end of the output buffer */ - le16 *end; + u8 *end; }; /* Bookkeeping information for an adaptive Huffman code */ @@ -170,7 +170,7 @@ struct lzms_item { #define DELTA_SOURCE_POWER_SHIFT 28 #define DELTA_SOURCE_RAW_OFFSET_MASK (((u32)1 << DELTA_SOURCE_POWER_SHIFT) - 1) -static inline void +static _unused_attribute void check_that_powers_fit_in_bitfield(void) { STATIC_ASSERT(LZMS_NUM_DELTA_POWER_SYMS <= (1 << (31 - DELTA_SOURCE_POWER_SHIFT))); @@ -249,7 +249,7 @@ struct lzms_optimum_node { * * Note: this adaptive state structure also does not include the * probability entries or current Huffman codewords. Those aren't - * maintained per-position and are only updated occassionally. + * maintained per-position and are only updated occasionally. */ struct lzms_adaptive_state state; } _aligned_attribute(64); @@ -427,7 +427,7 @@ lzms_init_offset_slot_tabs(struct lzms_compressor *c) * Return the length slot for the specified match length, using the compressor's * acceleration table if the length is small enough. */ -static inline unsigned +static forceinline unsigned lzms_comp_get_length_slot(const struct lzms_compressor *c, u32 length) { if (likely(length <= MAX_FAST_LENGTH)) @@ -439,7 +439,7 @@ lzms_comp_get_length_slot(const struct lzms_compressor *c, u32 length) * Return the offset slot for the specified match offset, using the compressor's * acceleration tables to speed up the mapping. */ -static inline unsigned +static forceinline unsigned lzms_comp_get_offset_slot(const struct lzms_compressor *c, u32 offset) { if (offset < 0xe4a5) @@ -456,18 +456,18 @@ lzms_comp_get_offset_slot(const struct lzms_compressor *c, u32 offset) /* * Initialize the range encoder @rc to write forwards to the specified buffer - * @out that is @count 16-bit integers long. + * @out that is @size bytes long. */ static void -lzms_range_encoder_init(struct lzms_range_encoder *rc, le16 *out, size_t count) +lzms_range_encoder_init(struct lzms_range_encoder *rc, u8 *out, size_t size) { rc->lower_bound = 0; rc->range_size = 0xffffffff; rc->cache = 0; rc->cache_size = 1; rc->begin = out; - rc->next = out - 1; - rc->end = out + count; + rc->next = out - sizeof(le16); + rc->end = out + (size & ~1); } /* @@ -498,12 +498,13 @@ lzms_range_encoder_shift_low(struct lzms_range_encoder *rc) do { if (likely(rc->next >= rc->begin)) { if (rc->next != rc->end) { - put_unaligned_u16_le(rc->cache + - (u16)(rc->lower_bound >> 32), - rc->next++); + put_unaligned_le16(rc->cache + + (u16)(rc->lower_bound >> 32), + rc->next); + rc->next += sizeof(le16); } } else { - rc->next++; + rc->next += sizeof(le16); } rc->cache = 0xffff; } while (--rc->cache_size != 0); @@ -528,7 +529,7 @@ lzms_range_encoder_flush(struct lzms_range_encoder *rc) * @prob is the probability out of LZMS_PROBABILITY_DENOMINATOR that the next * bit is 0 rather than 1. */ -static inline void +static forceinline void lzms_range_encode_bit(struct lzms_range_encoder *rc, int bit, u32 prob) { /* Normalize if needed. */ @@ -550,7 +551,7 @@ lzms_range_encode_bit(struct lzms_range_encoder *rc, int bit, u32 prob) * Encode a bit. This wraps around lzms_range_encode_bit() to handle using and * updating the state and its corresponding probability entry. */ -static inline void +static forceinline void lzms_encode_bit(int bit, unsigned *state_p, unsigned num_states, struct lzms_probability_entry *probs, struct lzms_range_encoder *rc) @@ -624,16 +625,16 @@ lzms_encode_delta_rep_bit(struct lzms_compressor *c, int bit, int idx) /* * Initialize the output bitstream @os to write backwards to the specified - * buffer @out that is @count 16-bit integers long. + * buffer @out that is @size bytes long. */ static void lzms_output_bitstream_init(struct lzms_output_bitstream *os, - le16 *out, size_t count) + u8 *out, size_t size) { os->bitbuf = 0; os->bitcount = 0; - os->next = out + count; os->begin = out; + os->next = out + (size & ~1); } /* @@ -643,7 +644,7 @@ lzms_output_bitstream_init(struct lzms_output_bitstream *os, * @max_num_bits is a compile-time constant that specifies the maximum number of * bits that can ever be written at this call site. */ -static inline void +static forceinline void lzms_write_bits(struct lzms_output_bitstream *os, const u32 bits, const unsigned num_bits, const unsigned max_num_bits) { @@ -657,8 +658,10 @@ lzms_write_bits(struct lzms_output_bitstream *os, const u32 bits, os->bitcount -= 16; /* Write a coding unit, unless it would underflow the buffer. */ - if (os->next != os->begin) - put_unaligned_u16_le(os->bitbuf >> os->bitcount, --os->next); + if (os->next != os->begin) { + os->next -= sizeof(le16); + put_unaligned_le16(os->bitbuf >> os->bitcount, os->next); + } /* Optimization for call sites that never write more than 16 * bits at once. */ @@ -678,8 +681,10 @@ lzms_output_bitstream_flush(struct lzms_output_bitstream *os) if (os->next == os->begin) return false; - if (os->bitcount != 0) - put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), --os->next); + if (os->bitcount != 0) { + os->next -= sizeof(le16); + put_unaligned_le16(os->bitbuf << (16 - os->bitcount), os->next); + } return true; } @@ -720,7 +725,7 @@ lzms_rebuild_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info) * Encode a symbol using the specified Huffman code. Then, if the Huffman code * needs to be rebuilt, rebuild it and return true; otherwise return false. */ -static inline bool +static forceinline bool lzms_huffman_encode_symbol(unsigned sym, const u32 *codewords, const u8 *lens, u32 *freqs, struct lzms_output_bitstream *os, @@ -931,7 +936,7 @@ lzms_encode_nonempty_item_list(struct lzms_compressor *c, } while (cur_node != end_node); } -static inline void +static forceinline void lzms_encode_item_list(struct lzms_compressor *c, struct lzms_optimum_node *end_node) { @@ -971,7 +976,7 @@ static const u32 lzms_bit_costs[LZMS_PROBABILITY_DENOMINATOR + 1] = { 1 }; -static inline void +static _unused_attribute void check_cost_shift(void) { /* lzms_bit_costs is hard-coded to the current COST_SHIFT. */ @@ -998,14 +1003,14 @@ lzms_compute_bit_costs(void) #endif /* Return the cost to encode a 0 bit in the specified context. */ -static inline u32 +static forceinline u32 lzms_bit_0_cost(unsigned state, const struct lzms_probability_entry *probs) { return lzms_bit_costs[probs[state].num_recent_zero_bits]; } /* Return the cost to encode a 1 bit in the specified context. */ -static inline u32 +static forceinline u32 lzms_bit_1_cost(unsigned state, const struct lzms_probability_entry *probs) { return lzms_bit_costs[LZMS_PROBABILITY_DENOMINATOR - @@ -1013,7 +1018,7 @@ lzms_bit_1_cost(unsigned state, const struct lzms_probability_entry *probs) } /* Return the cost to encode a literal, including the main bit. */ -static inline u32 +static forceinline u32 lzms_literal_cost(struct lzms_compressor *c, unsigned main_state, unsigned literal) { return lzms_bit_0_cost(main_state, c->probs.main) + @@ -1038,14 +1043,14 @@ lzms_update_fast_length_costs(struct lzms_compressor *c) /* Return the cost to encode the specified match length, which must not exceed * MAX_FAST_LENGTH. */ -static inline u32 +static forceinline u32 lzms_fast_length_cost(const struct lzms_compressor *c, u32 length) { return c->fast_length_cost_tab[length]; } /* Return the cost to encode the specified LZ match offset. */ -static inline u32 +static forceinline u32 lzms_lz_offset_cost(const struct lzms_compressor *c, u32 offset) { unsigned slot = lzms_comp_get_offset_slot(c, offset); @@ -1054,7 +1059,7 @@ lzms_lz_offset_cost(const struct lzms_compressor *c, u32 offset) } /* Return the cost to encode the specified delta power and raw offset. */ -static inline u32 +static forceinline u32 lzms_delta_source_cost(const struct lzms_compressor *c, u32 power, u32 raw_offset) { unsigned slot = lzms_comp_get_offset_slot(c, raw_offset); @@ -1117,31 +1122,31 @@ lzms_update_lru_queues(struct lzms_adaptive_state *state) state->prev_delta_pair = state->upcoming_delta_pair; } -static inline void +static forceinline void lzms_update_state(u8 *state_p, int bit, unsigned num_states) { *state_p = ((*state_p << 1) | bit) & (num_states - 1); } -static inline void +static forceinline void lzms_update_main_state(struct lzms_adaptive_state *state, int is_match) { lzms_update_state(&state->main_state, is_match, LZMS_NUM_MAIN_PROBS); } -static inline void +static forceinline void lzms_update_match_state(struct lzms_adaptive_state *state, int is_delta) { lzms_update_state(&state->match_state, is_delta, LZMS_NUM_MATCH_PROBS); } -static inline void +static forceinline void lzms_update_lz_state(struct lzms_adaptive_state *state, int is_rep) { lzms_update_state(&state->lz_state, is_rep, LZMS_NUM_LZ_PROBS); } -static inline void +static forceinline void lzms_update_lz_rep_states(struct lzms_adaptive_state *state, int rep_idx) { for (int i = 0; i < rep_idx; i++) @@ -1151,13 +1156,13 @@ lzms_update_lz_rep_states(struct lzms_adaptive_state *state, int rep_idx) lzms_update_state(&state->lz_rep_states[rep_idx], 0, LZMS_NUM_LZ_REP_PROBS); } -static inline void +static forceinline void lzms_update_delta_state(struct lzms_adaptive_state *state, int is_rep) { lzms_update_state(&state->delta_state, is_rep, LZMS_NUM_DELTA_PROBS); } -static inline void +static forceinline void lzms_update_delta_rep_states(struct lzms_adaptive_state *state, int rep_idx) { for (int i = 0; i < rep_idx; i++) @@ -1194,7 +1199,7 @@ lzms_init_delta_matchfinder(struct lzms_compressor *c) * NBYTES_HASHED_FOR_DELTA bytes of the sequence beginning at @p when taken in a * delta context with the specified @span. */ -static inline u32 +static forceinline u32 lzms_delta_hash(const u8 *p, const u32 pos, u32 span) { /* A delta match has a certain span and an offset that is a multiple of @@ -1217,7 +1222,7 @@ lzms_delta_hash(const u8 *p, const u32 pos, u32 span) * specified @span and having the initial @len, extend the match as far as * possible, up to a limit of @max_len. */ -static inline u32 +static forceinline u32 lzms_extend_delta_match(const u8 *in_next, const u8 *matchptr, u32 len, u32 max_len, u32 span) { @@ -2068,8 +2073,8 @@ lzms_init_huffman_codes(struct lzms_compressor *c, unsigned num_offset_slots) static size_t lzms_finalize(struct lzms_compressor *c) { - size_t num_forwards_units; - size_t num_backwards_units; + size_t num_forwards_bytes; + size_t num_backwards_bytes; /* Flush both the forwards and backwards streams, and make sure they * didn't cross each other and start overwriting each other's data. */ @@ -2087,12 +2092,12 @@ lzms_finalize(struct lzms_compressor *c) * bitstream. Move the data output by the backwards bitstream to be * adjacent to the data output by the forward bitstream, and calculate * the compressed size that this results in. */ - num_forwards_units = c->rc.next - c->rc.begin; - num_backwards_units = c->rc.end - c->os.next; + num_forwards_bytes = c->rc.next - c->rc.begin; + num_backwards_bytes = c->rc.end - c->os.next; - memmove(c->rc.next, c->os.next, num_backwards_units * sizeof(le16)); + memmove(c->rc.next, c->os.next, num_backwards_bytes); - return (num_forwards_units + num_backwards_units) * sizeof(le16); + return num_forwards_bytes + num_backwards_bytes; } static u64 @@ -2190,8 +2195,8 @@ lzms_compress(const void *restrict in, size_t in_nbytes, lzms_init_delta_matchfinder(c); /* Initialize the encoder structures. */ - lzms_range_encoder_init(&c->rc, out, out_nbytes_avail / sizeof(le16)); - lzms_output_bitstream_init(&c->os, out, out_nbytes_avail / sizeof(le16)); + lzms_range_encoder_init(&c->rc, out, out_nbytes_avail); + lzms_output_bitstream_init(&c->os, out, out_nbytes_avail); lzms_init_states_and_probabilities(c); lzms_init_huffman_codes(c, lzms_get_num_offset_slots(c->in_nbytes));