include/wimlib/textfile.h \
include/wimlib/timestamp.h \
include/wimlib/types.h \
+ include/wimlib/unaligned.h \
include/wimlib/unix_data.h \
include/wimlib/util.h \
include/wimlib/version.h \
Version 1.7.3-BETA:
Fix for very slow export from solid WIM / ESD files.
+ Fix for LZX and LZMS algorithms on non-x86 architectures, such as ARM.
+
New progress message: WIMLIB_PROGRESS_MSG_HANDLE_ERROR. Applications
may use this to treat some types of errors as non-fatal.
mode), to ensure a fast and feature-rich implementation of each platform/mode.
wimlib is mainly used on x86 and x86_64 CPUs, but it should also work on a
-number of other GCC-supported 32-bit or 64-bit architectures. No assumptions
-are made about endianness, but some code assumes that unaligned memory accesses
-are supported and relatively efficient.
+number of other GCC-supported 32-bit or 64-bit architectures. It has been
+tested on the ARM architecture.
Currently, gcc and clang are the only supported compilers. A few nonstandard
extensions are used in the code.
--- /dev/null
+/*
+ * unaligned.h
+ *
+ * Inline functions for unaligned memory accesses.
+ *
+ * The author dedicates this file to the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#ifndef _WIMLIB_UNALIGNED_H
+#define _WIMLIB_UNALIGNED_H
+
+#include "compiler.h"
+#include "endianness.h"
+#include "types.h"
+
+#define DEFINE_UNALIGNED_TYPE(type) \
+struct type##_unaligned { \
+ type v; \
+} _packed_attribute; \
+ \
+static inline type \
+load_##type##_unaligned(const void *p) \
+{ \
+ return ((const struct type##_unaligned *)p)->v; \
+} \
+ \
+static inline void \
+store_##type##_unaligned(type val, void *p) \
+{ \
+ ((struct type##_unaligned *)p)->v = val; \
+}
+
+DEFINE_UNALIGNED_TYPE(le16);
+DEFINE_UNALIGNED_TYPE(le32);
+DEFINE_UNALIGNED_TYPE(le64);
+
+#endif /* _WIMLIB_UNALIGNED_H */
#include "wimlib/endianness.h"
#include "wimlib/lzms.h"
+#include "wimlib/unaligned.h"
#include "wimlib/util.h"
#include <pthread.h>
if (i - *closest_target_usage_p <= max_trans_offset) {
LZMS_DEBUG("Undid x86 translation at position %d "
"(opcode 0x%02x)", i, data[i]);
- le32 *p32 = (le32*)&data[i + num_op_bytes];
- u32 n = le32_to_cpu(*p32);
- *p32 = cpu_to_le32(n - i);
+ void *p32 = &data[i + num_op_bytes];
+ u32 n = le32_to_cpu(load_le32_unaligned(p32));
+ store_le32_unaligned(cpu_to_le32(n - i), p32);
}
- pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]);
+ pos = i + le16_to_cpu(load_le16_unaligned(&data[i + num_op_bytes]));
} else {
- pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]);
+ pos = i + le16_to_cpu(load_le16_unaligned(&data[i + num_op_bytes]));
if (i - *closest_target_usage_p <= max_trans_offset) {
LZMS_DEBUG("Did x86 translation at position %d "
"(opcode 0x%02x)", i, data[i]);
- le32 *p32 = (le32*)&data[i + num_op_bytes];
- u32 n = le32_to_cpu(*p32);
- *p32 = cpu_to_le32(n + i);
+ void *p32 = &data[i + num_op_bytes];
+ u32 n = le32_to_cpu(load_le32_unaligned(p32));
+ store_le32_unaligned(cpu_to_le32(n + i), p32);
}
}
#include "wimlib/endianness.h"
#include "wimlib/lzx.h"
+#include "wimlib/unaligned.h"
#include "wimlib/util.h"
#ifdef __SSE2__
}
static void
-do_translate_target(sle32 *target, s32 input_pos)
+do_translate_target(void *target, s32 input_pos)
{
s32 abs_offset, rel_offset;
- /* XXX: This assumes unaligned memory accesses are okay. */
- rel_offset = le32_to_cpu(*target);
+ rel_offset = le32_to_cpu(load_le32_unaligned(target));
if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) {
if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) {
/* "good translation" */
/* "compensating translation" */
abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE;
}
- *target = cpu_to_le32(abs_offset);
+ store_le32_unaligned(cpu_to_le32(abs_offset), target);
}
}
static void
-undo_translate_target(sle32 *target, s32 input_pos)
+undo_translate_target(void *target, s32 input_pos)
{
s32 abs_offset, rel_offset;
- /* XXX: This assumes unaligned memory accesses are okay. */
- abs_offset = le32_to_cpu(*target);
+ abs_offset = le32_to_cpu(load_le32_unaligned(target));
if (abs_offset >= 0) {
if (abs_offset < LZX_WIM_MAGIC_FILESIZE) {
/* "good translation" */
rel_offset = abs_offset - input_pos;
- *target = cpu_to_le32(rel_offset);
+ store_le32_unaligned(cpu_to_le32(rel_offset), target);
}
} else {
if (abs_offset >= -input_pos) {
/* "compensating translation" */
rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE;
- *target = cpu_to_le32(rel_offset);
+ store_le32_unaligned(cpu_to_le32(rel_offset), target);
}
}
}
SSE2 case, it bloats the binary more. */
#endif
void
-lzx_e8_filter(u8 *data, u32 size, void (*process_target)(sle32 *, s32))
+lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32))
{
#ifdef __SSE2__
/* SSE2 vectorized implementation for x86_64. This speeds up LZX
/* Do (or undo) the e8 translation. */
u8 *p8 = (u8 *)p128 + bit;
- (*process_target)((sle32 *)(p8 + 1),
+ (*process_target)(p8 + 1,
p8 - data);
/* Don't start an e8 translation in the
u8 *p8_end = data + size - 10;
do {
if (*p8 == 0xe8) {
- (*process_target)((sle32 *)(p8 + 1), p8 - data);
+ (*process_target)(p8 + 1, p8 - data);
p8 += 5;
} else {
p8++;