4 * Code shared between the compressor and decompressor for the LZMS compression
9 * Copyright (C) 2013 Eric Biggers
11 * This file is part of wimlib, a library for working with WIM files.
13 * wimlib is free software; you can redistribute it and/or modify it under the
14 * terms of the GNU General Public License as published by the Free
15 * Software Foundation; either version 3 of the License, or (at your option)
18 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20 * A PARTICULAR PURPOSE. See the GNU General Public License for more
23 * You should have received a copy of the GNU General Public License
24 * along with wimlib; if not, see http://www.gnu.org/licenses/.
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/lzms.h"
34 #include "wimlib/util.h"
38 /* A table that maps position slots to their base values. These are constants
39 * computed at runtime by lzms_compute_slot_bases(). */
40 u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1];
42 /* A table that maps length slots to their base values. These are constants
43 * computed at runtime by lzms_compute_slot_bases(). */
44 u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1];
46 /* Return the slot for the specified value. */
48 lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots)
52 while (slot_base_tab[slot + 1] <= value)
60 lzms_decode_delta_rle_slot_bases(u32 slot_bases[],
61 const u8 delta_run_lens[], size_t num_run_lens)
66 for (size_t i = 0; i < num_run_lens; i++) {
67 u8 run_len = delta_run_lens[i];
70 slot_bases[slot++] = base;
76 /* Initialize the global position and length slot tables. */
78 lzms_compute_slot_bases(void)
80 /* If an explicit formula that maps LZMS position and length slots to
81 * slot bases exists, then it could be used here. But until one is
82 * found, the following code fills in the slots using the observation
83 * that the increase from one slot base to the next is an increasing
84 * power of 2. Therefore, run-length encoding of the delta of adjacent
85 * entries can be used. */
86 static const u8 position_slot_delta_run_lens[] = {
87 9, 0, 9, 7, 10, 15, 15, 20,
88 20, 30, 33, 40, 42, 45, 60, 73,
92 static const u8 length_slot_delta_run_lens[] = {
93 27, 4, 6, 4, 5, 2, 1, 1,
94 1, 1, 1, 0, 0, 0, 0, 0,
98 lzms_decode_delta_rle_slot_bases(lzms_position_slot_base,
99 position_slot_delta_run_lens,
100 ARRAY_LEN(position_slot_delta_run_lens));
102 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS] = 0x7fffffff;
104 lzms_decode_delta_rle_slot_bases(lzms_length_slot_base,
105 length_slot_delta_run_lens,
106 ARRAY_LEN(length_slot_delta_run_lens));
108 lzms_length_slot_base[LZMS_NUM_LEN_SYMS] = 0x400108ab;
111 /* Initialize the global position length slot tables if not done so already. */
113 lzms_init_slot_bases(void)
115 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
116 static bool already_computed = false;
118 if (unlikely(!already_computed)) {
119 pthread_mutex_lock(&mutex);
120 if (!already_computed) {
121 lzms_compute_slot_bases();
122 already_computed = true;
124 pthread_mutex_unlock(&mutex);
129 lzms_maybe_do_x86_translation(u8 data[restrict], s32 i, s32 num_op_bytes,
130 s32 * restrict closest_target_usage_p,
131 s32 last_target_usages[restrict],
132 s32 max_trans_offset, bool undo)
137 if (i - *closest_target_usage_p <= max_trans_offset) {
138 LZMS_DEBUG("Undid x86 translation at position %d "
139 "(opcode 0x%02x)", i, data[i]);
140 le32 *p32 = (le32*)&data[i + num_op_bytes];
141 u32 n = le32_to_cpu(*p32);
142 *p32 = cpu_to_le32(n - i);
144 pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]);
146 pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]);
148 if (i - *closest_target_usage_p <= max_trans_offset) {
149 LZMS_DEBUG("Did x86 translation at position %d "
150 "(opcode 0x%02x)", i, data[i]);
151 le32 *p32 = (le32*)&data[i + num_op_bytes];
152 u32 n = le32_to_cpu(*p32);
153 *p32 = cpu_to_le32(n + i);
157 i += num_op_bytes + sizeof(le32) - 1;
159 if (i - last_target_usages[pos] <= LZMS_X86_MAX_GOOD_TARGET_OFFSET)
160 *closest_target_usage_p = i;
162 last_target_usages[pos] = i;
168 lzms_may_x86_translate(const u8 p[restrict],
169 s32 *restrict max_offset_ret)
171 /* Switch on first byte of the opcode, assuming it is really an x86
173 *max_offset_ret = LZMS_X86_MAX_TRANSLATION_OFFSET;
177 if (p[2] == 0x5 || p[2] == 0xd) {
178 /* Load relative (x86_64) */
181 } else if (p[1] == 0x8d) {
182 if ((p[2] & 0x7) == 0x5) {
183 /* Load effective address relative (x86_64) */
191 if ((p[2] & 0x7) == 0x5) {
192 /* Load effective address relative (x86_64) */
200 *max_offset_ret = LZMS_X86_MAX_TRANSLATION_OFFSET / 2;
209 if (p[1] == 0x83 && p[2] == 0x05) {
210 /* Lock add relative */
227 * Translate relative addresses embedded in x86 instructions into absolute
228 * addresses (@undo == %false), or undo this translation (@undo == %true).
230 * @last_target_usages is a temporary array of length >= 65536.
233 lzms_x86_filter(u8 data[restrict],
235 s32 last_target_usages[restrict],
238 s32 closest_target_usage = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1;
240 for (s32 i = 0; i < 65536; i++)
241 last_target_usages[i] = -LZMS_X86_MAX_GOOD_TARGET_OFFSET - 1;
243 for (s32 i = 0; i < size - 11; ) {
244 s32 max_trans_offset;
247 n = lzms_may_x86_translate(data + i, &max_trans_offset);
248 if (max_trans_offset) {
249 i = lzms_maybe_do_x86_translation(data, i, n,
250 &closest_target_usage,