]> wimlib.net Git - wimlib/blobdiff - src/lzx-compress.c
lzx-compress.c: Avoid unnecessary branch in match cost calculation
[wimlib] / src / lzx-compress.c
index 4c89b7d9768e1110b5e54f71580ba6ec0a592327..bc50a858bc9b62cf26d9d1b03d7b822591a1eb33 100644 (file)
@@ -659,9 +659,9 @@ lzx_make_huffman_codes(const struct lzx_freqs *freqs,
  *
  * @os:
  *     The bitstream to which to write the match.
  *
  * @os:
  *     The bitstream to which to write the match.
- * @block_type:
- *     The type of the LZX block (LZX_BLOCKTYPE_ALIGNED or
- *     LZX_BLOCKTYPE_VERBATIM)
+ * @ones_if_aligned
+ *     A mask of all ones if the block is of type LZX_BLOCKTYPE_ALIGNED,
+ *     otherwise 0.
  * @match:
  *     The match data.
  * @codes:
  * @match:
  *     The match data.
  * @codes:
@@ -669,7 +669,7 @@ lzx_make_huffman_codes(const struct lzx_freqs *freqs,
  *     and aligned offset Huffman codes for the current LZX compressed block.
  */
 static void
  *     and aligned offset Huffman codes for the current LZX compressed block.
  */
 static void
-lzx_write_match(struct lzx_output_bitstream *os, int block_type,
+lzx_write_match(struct lzx_output_bitstream *os, unsigned ones_if_aligned,
                struct lzx_item match, const struct lzx_codes *codes)
 {
        unsigned match_len_minus_2 = match.data & 0xff;
                struct lzx_item match, const struct lzx_codes *codes)
 {
        unsigned match_len_minus_2 = match.data & 0xff;
@@ -719,7 +719,7 @@ lzx_write_match(struct lzx_output_bitstream *os, int block_type,
 
        num_extra_bits = lzx_get_num_extra_bits(position_slot);
 
 
        num_extra_bits = lzx_get_num_extra_bits(position_slot);
 
-       if ((block_type == LZX_BLOCKTYPE_ALIGNED) && (num_extra_bits >= 3)) {
+       if ((num_extra_bits & ones_if_aligned) >= 3) {
 
                /* Aligned offset blocks: The low 3 bits of the position footer
                 * are Huffman-encoded using the aligned offset code.  The
 
                /* Aligned offset blocks: The low 3 bits of the position footer
                 * are Huffman-encoded using the aligned offset code.  The
@@ -948,12 +948,14 @@ lzx_write_items(struct lzx_output_bitstream *os, int block_type,
                const struct lzx_item items[], u32 num_items,
                const struct lzx_codes *codes)
 {
                const struct lzx_item items[], u32 num_items,
                const struct lzx_codes *codes)
 {
+       unsigned ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
+
        for (u32 i = 0; i < num_items; i++) {
                /* The high bit of the 32-bit intermediate representation
                 * indicates whether the item is an actual LZ-style match (1) or
                 * a literal byte (0).  */
                if (items[i].data & 0x80000000)
        for (u32 i = 0; i < num_items; i++) {
                /* The high bit of the 32-bit intermediate representation
                 * indicates whether the item is an actual LZ-style match (1) or
                 * a literal byte (0).  */
                if (items[i].data & 0x80000000)
-                       lzx_write_match(os, block_type, items[i], codes);
+                       lzx_write_match(os, ones_if_aligned, items[i], codes);
                else
                        lzx_write_literal(os, items[i].data, codes);
        }
                else
                        lzx_write_literal(os, items[i].data, codes);
        }
@@ -1461,7 +1463,7 @@ lzx_repsearch(const u8 * const strptr, const u32 bytes_remaining,
 }
 
 /*
 }
 
 /*
- * lzx_choose_near_optimal_match() -
+ * lzx_choose_near_optimal_item() -
  *
  * Choose an approximately optimal match or literal to use at the next position
  * in the string, or "window", being LZ-encoded.
  *
  * Choose an approximately optimal match or literal to use at the next position
  * in the string, or "window", being LZ-encoded.
@@ -1622,17 +1624,21 @@ lzx_choose_near_optimal_item(struct lzx_compressor *c)
                }
 
                do {
                }
 
                do {
+                       u32 cost;
                        unsigned len_header;
                        unsigned main_symbol;
                        unsigned len_header;
                        unsigned main_symbol;
-                       u32 cost;
 
                        cost = position_cost;
 
 
                        cost = position_cost;
 
-                       len_header = min(len - LZX_MIN_MATCH_LEN, LZX_NUM_PRIMARY_LENS);
+                       if (len - LZX_MIN_MATCH_LEN < LZX_NUM_PRIMARY_LENS) {
+                               len_header = len - LZX_MIN_MATCH_LEN;
+                       } else {
+                               len_header = LZX_NUM_PRIMARY_LENS;
+                               cost += c->costs.len[len - LZX_MIN_MATCH_LEN - LZX_NUM_PRIMARY_LENS];
+                       }
+
                        main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
                        cost += c->costs.main[main_symbol];
                        main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
                        cost += c->costs.main[main_symbol];
-                       if (len_header == LZX_NUM_PRIMARY_LENS)
-                               cost += c->costs.len[len - LZX_MIN_MATCH_LEN - LZX_NUM_PRIMARY_LENS];
 
                        optimum[len].queue = queue;
                        optimum[len].prev.link = 0;
 
                        optimum[len].queue = queue;
                        optimum[len].prev.link = 0;
@@ -1825,22 +1831,25 @@ lzx_choose_near_optimal_item(struct lzx_compressor *c)
                have_position_cost:
 
                        do {
                have_position_cost:
 
                        do {
+                               u32 cost;
                                unsigned len_header;
                                unsigned main_symbol;
                                unsigned len_header;
                                unsigned main_symbol;
-                               u32 cost;
 
                                cost = position_cost;
 
 
                                cost = position_cost;
 
-                               len_header = min(len - LZX_MIN_MATCH_LEN,
-                                                LZX_NUM_PRIMARY_LENS);
-                               main_symbol = ((position_slot << 3) | len_header) +
-                                               LZX_NUM_CHARS;
-                               cost += c->costs.main[main_symbol];
-                               if (len_header == LZX_NUM_PRIMARY_LENS) {
+                               if (len - LZX_MIN_MATCH_LEN < LZX_NUM_PRIMARY_LENS) {
+                                       len_header = len - LZX_MIN_MATCH_LEN;
+                               } else {
+                                       len_header = LZX_NUM_PRIMARY_LENS;
                                        cost += c->costs.len[len -
                                                        LZX_MIN_MATCH_LEN -
                                                        LZX_NUM_PRIMARY_LENS];
                                }
                                        cost += c->costs.len[len -
                                                        LZX_MIN_MATCH_LEN -
                                                        LZX_NUM_PRIMARY_LENS];
                                }
+
+                               main_symbol = ((position_slot << 3) | len_header) +
+                                               LZX_NUM_CHARS;
+                               cost += c->costs.main[main_symbol];
+
                                if (cost < optimum[cur_pos + len].cost) {
                                        if (position_slot < LZX_NUM_RECENT_OFFSETS) {
                                                optimum[cur_pos + len].queue = optimum[cur_pos].queue;
                                if (cost < optimum[cur_pos + len].cost) {
                                        if (position_slot < LZX_NUM_RECENT_OFFSETS) {
                                                optimum[cur_pos + len].queue = optimum[cur_pos].queue;