4 * Code shared between the compressor and decompressor for the LZMS compression
9 * Copyright (C) 2013 Eric Biggers
11 * This file is part of wimlib, a library for working with WIM files.
13 * wimlib is free software; you can redistribute it and/or modify it under the
14 * terms of the GNU General Public License as published by the Free
15 * Software Foundation; either version 3 of the License, or (at your option)
18 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20 * A PARTICULAR PURPOSE. See the GNU General Public License for more
23 * You should have received a copy of the GNU General Public License
24 * along with wimlib; if not, see http://www.gnu.org/licenses/.
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/lzms.h"
34 #include "wimlib/util.h"
38 /* A table that maps position slots to their base values. These are constants
39 * computed at runtime by lzms_compute_slot_bases(). */
40 u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1];
42 /* A table that maps length slots to their base values. These are constants
43 * computed at runtime by lzms_compute_slot_bases(). */
44 u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1];
46 /* Return the slot for the specified value. */
48 lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots)
50 /* TODO: Speed this up. */
53 while (slot_base_tab[slot + 1] <= value)
61 lzms_decode_delta_rle_slot_bases(u32 slot_bases[],
62 const u8 delta_run_lens[], size_t num_run_lens)
67 for (size_t i = 0; i < num_run_lens; i++) {
68 u8 run_len = delta_run_lens[i];
71 slot_bases[slot++] = base;
77 /* Initialize the global position and length slot tables. */
79 lzms_compute_slot_bases(void)
81 /* If an explicit formula that maps LZMS position and length slots to
82 * slot bases exists, then it could be used here. But until one is
83 * found, the following code fills in the slots using the observation
84 * that the increase from one slot base to the next is an increasing
85 * power of 2. Therefore, run-length encoding of the delta of adjacent
86 * entries can be used. */
87 static const u8 position_slot_delta_run_lens[] = {
88 9, 0, 9, 7, 10, 15, 15, 20,
89 20, 30, 33, 40, 42, 45, 60, 73,
93 static const u8 length_slot_delta_run_lens[] = {
94 27, 4, 6, 4, 5, 2, 1, 1,
95 1, 1, 1, 0, 0, 0, 0, 0,
99 lzms_decode_delta_rle_slot_bases(lzms_position_slot_base,
100 position_slot_delta_run_lens,
101 ARRAY_LEN(position_slot_delta_run_lens));
103 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS] = 0x7fffffff;
105 lzms_decode_delta_rle_slot_bases(lzms_length_slot_base,
106 length_slot_delta_run_lens,
107 ARRAY_LEN(length_slot_delta_run_lens));
109 lzms_length_slot_base[LZMS_NUM_LEN_SYMS] = 0x400108ab;
112 /* Initialize the global position length slot tables if not done so already. */
114 lzms_init_slot_bases(void)
116 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
117 static bool already_computed = false;
119 if (unlikely(!already_computed)) {
120 pthread_mutex_lock(&mutex);
121 if (!already_computed) {
122 lzms_compute_slot_bases();
123 already_computed = true;
125 pthread_mutex_unlock(&mutex);
130 lzms_maybe_do_x86_translation(u8 data[restrict], s32 i, s32 num_op_bytes,
131 s32 * restrict closest_target_usage_p,
132 s32 last_target_usages[restrict],
133 s32 max_trans_offset, bool undo)
138 if (i - *closest_target_usage_p <= max_trans_offset) {
139 LZMS_DEBUG("Undid x86 translation at position %d "
140 "(opcode 0x%02x)", i, data[i]);
141 le32 *p32 = (le32*)&data[i + num_op_bytes];
142 u32 n = le32_to_cpu(*p32);
143 *p32 = cpu_to_le32(n - i);
145 pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]);
147 pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]);
149 if (i - *closest_target_usage_p <= max_trans_offset) {
150 LZMS_DEBUG("Did x86 translation at position %d "
151 "(opcode 0x%02x)", i, data[i]);
152 le32 *p32 = (le32*)&data[i + num_op_bytes];
153 u32 n = le32_to_cpu(*p32);
154 *p32 = cpu_to_le32(n + i);
158 i += num_op_bytes + sizeof(le32) - 1;
160 if (i - last_target_usages[pos] <= LZMS_X86_MAX_GOOD_TARGET_OFFSET)
161 *closest_target_usage_p = i;
163 last_target_usages[pos] = i;
169 lzms_may_x86_translate(const u8 p[restrict],
170 s32 *restrict max_offset_ret)
172 /* Switch on first byte of the opcode, assuming it is really an x86
174 *max_offset_ret = LZMS_X86_MAX_TRANSLATION_OFFSET;
178 if (p[2] == 0x5 || p[2] == 0xd) {
179 /* Load relative (x86_64) */
182 } else if (p[1] == 0x8d) {
183 if ((p[2] & 0x7) == 0x5) {
184 /* Load effective address relative (x86_64) */
192 if ((p[2] & 0x7) == 0x5) {
193 /* Load effective address relative (x86_64) */
201 *max_offset_ret = LZMS_X86_MAX_TRANSLATION_OFFSET / 2;
210 if (p[1] == 0x83 && p[2] == 0x05) {
211 /* Lock add relative */
228 * Translate relative addresses embedded in x86 instructions into absolute
229 * addresses (@undo == %false), or undo this translation (@undo == %true).
231 * @last_target_usages is a temporary array of length >= 65536.
234 lzms_x86_filter(u8 data[restrict],
236 s32 last_target_usages[restrict],
239 s32 closest_target_usage = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1;
241 for (s32 i = 0; i < 65536; i++)
242 last_target_usages[i] = -LZMS_X86_MAX_GOOD_TARGET_OFFSET - 1;
244 for (s32 i = 0; i < size - 11; ) {
245 s32 max_trans_offset;
248 n = lzms_may_x86_translate(data + i, &max_trans_offset);
249 if (max_trans_offset) {
250 i = lzms_maybe_do_x86_translation(data, i, n,
251 &closest_target_usage,
262 lzms_init_lz_lru_queues(struct lzms_lz_lru_queues *lz)
264 /* Recent offsets for LZ matches */
265 for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++)
266 lz->recent_offsets[i] = i + 1;
269 lz->upcoming_offset = 0;
273 lzms_init_delta_lru_queues(struct lzms_delta_lru_queues *delta)
275 /* Recent offsets and powers for LZ matches */
276 for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) {
277 delta->recent_offsets[i] = i + 1;
278 delta->recent_powers[i] = 0;
280 delta->prev_offset = 0;
281 delta->prev_power = 0;
282 delta->upcoming_offset = 0;
283 delta->upcoming_power = 0;
288 lzms_init_lru_queues(struct lzms_lru_queues *lru)
290 lzms_init_lz_lru_queues(&lru->lz);
291 lzms_init_delta_lru_queues(&lru->delta);
295 lzms_update_lz_lru_queues(struct lzms_lz_lru_queues *lz)
297 if (lz->prev_offset != 0) {
298 for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--)
299 lz->recent_offsets[i + 1] = lz->recent_offsets[i];
300 lz->recent_offsets[0] = lz->prev_offset;
302 lz->prev_offset = lz->upcoming_offset;
306 lzms_update_delta_lru_queues(struct lzms_delta_lru_queues *delta)
308 if (delta->prev_offset != 0) {
309 for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) {
310 delta->recent_offsets[i + 1] = delta->recent_offsets[i];
311 delta->recent_powers[i + 1] = delta->recent_powers[i];
313 delta->recent_offsets[0] = delta->prev_offset;
314 delta->recent_powers[0] = delta->prev_power;
317 delta->prev_offset = delta->upcoming_offset;
318 delta->prev_power = delta->upcoming_power;
322 lzms_update_lru_queues(struct lzms_lru_queues *lru)
324 lzms_update_lz_lru_queues(&lru->lz);
325 lzms_update_delta_lru_queues(&lru->delta);