/*
- * lzms-common.c
- *
- * Code shared between the compressor and decompressor for the LZMS compression
- * format.
+ * lzms-common.c - Common code for LZMS compression and decompression
*/
/*
- * Copyright (C) 2013 Eric Biggers
- *
- * This file is part of wimlib, a library for working with WIM files.
+ * Copyright (C) 2013, 2014 Eric Biggers
*
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
*
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
*/
#ifdef HAVE_CONFIG_H
#include "wimlib/endianness.h"
#include "wimlib/lzms.h"
-#include "wimlib/util.h"
-
-#include <pthread.h>
-
-/***************************************************************
- * Constant tables initialized by lzms_compute_slots(): *
- ***************************************************************/
-
-/* Table: position slot => position slot base value */
-u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1];
-
-/* Table: position slot => number of extra position bits */
-u8 lzms_extra_position_bits[LZMS_MAX_NUM_OFFSET_SYMS];
-
-/* Table: log2(position) => [lower bound, upper bound] on position slot */
-u16 lzms_order_to_position_slot_bounds[30][2];
+#include "wimlib/unaligned.h"
+
+/* Table: offset slot => offset slot base value */
+const u32 lzms_offset_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1] = {
+ 0x00000001, 0x00000002, 0x00000003, 0x00000004,
+ 0x00000005, 0x00000006, 0x00000007, 0x00000008,
+ 0x00000009, 0x0000000d, 0x00000011, 0x00000015,
+ 0x00000019, 0x0000001d, 0x00000021, 0x00000025,
+ 0x00000029, 0x0000002d, 0x00000035, 0x0000003d,
+ 0x00000045, 0x0000004d, 0x00000055, 0x0000005d,
+ 0x00000065, 0x00000075, 0x00000085, 0x00000095,
+ 0x000000a5, 0x000000b5, 0x000000c5, 0x000000d5,
+ 0x000000e5, 0x000000f5, 0x00000105, 0x00000125,
+ 0x00000145, 0x00000165, 0x00000185, 0x000001a5,
+ 0x000001c5, 0x000001e5, 0x00000205, 0x00000225,
+ 0x00000245, 0x00000265, 0x00000285, 0x000002a5,
+ 0x000002c5, 0x000002e5, 0x00000325, 0x00000365,
+ 0x000003a5, 0x000003e5, 0x00000425, 0x00000465,
+ 0x000004a5, 0x000004e5, 0x00000525, 0x00000565,
+ 0x000005a5, 0x000005e5, 0x00000625, 0x00000665,
+ 0x000006a5, 0x00000725, 0x000007a5, 0x00000825,
+ 0x000008a5, 0x00000925, 0x000009a5, 0x00000a25,
+ 0x00000aa5, 0x00000b25, 0x00000ba5, 0x00000c25,
+ 0x00000ca5, 0x00000d25, 0x00000da5, 0x00000e25,
+ 0x00000ea5, 0x00000f25, 0x00000fa5, 0x00001025,
+ 0x000010a5, 0x000011a5, 0x000012a5, 0x000013a5,
+ 0x000014a5, 0x000015a5, 0x000016a5, 0x000017a5,
+ 0x000018a5, 0x000019a5, 0x00001aa5, 0x00001ba5,
+ 0x00001ca5, 0x00001da5, 0x00001ea5, 0x00001fa5,
+ 0x000020a5, 0x000021a5, 0x000022a5, 0x000023a5,
+ 0x000024a5, 0x000026a5, 0x000028a5, 0x00002aa5,
+ 0x00002ca5, 0x00002ea5, 0x000030a5, 0x000032a5,
+ 0x000034a5, 0x000036a5, 0x000038a5, 0x00003aa5,
+ 0x00003ca5, 0x00003ea5, 0x000040a5, 0x000042a5,
+ 0x000044a5, 0x000046a5, 0x000048a5, 0x00004aa5,
+ 0x00004ca5, 0x00004ea5, 0x000050a5, 0x000052a5,
+ 0x000054a5, 0x000056a5, 0x000058a5, 0x00005aa5,
+ 0x00005ca5, 0x00005ea5, 0x000060a5, 0x000064a5,
+ 0x000068a5, 0x00006ca5, 0x000070a5, 0x000074a5,
+ 0x000078a5, 0x00007ca5, 0x000080a5, 0x000084a5,
+ 0x000088a5, 0x00008ca5, 0x000090a5, 0x000094a5,
+ 0x000098a5, 0x00009ca5, 0x0000a0a5, 0x0000a4a5,
+ 0x0000a8a5, 0x0000aca5, 0x0000b0a5, 0x0000b4a5,
+ 0x0000b8a5, 0x0000bca5, 0x0000c0a5, 0x0000c4a5,
+ 0x0000c8a5, 0x0000cca5, 0x0000d0a5, 0x0000d4a5,
+ 0x0000d8a5, 0x0000dca5, 0x0000e0a5, 0x0000e4a5,
+ 0x0000eca5, 0x0000f4a5, 0x0000fca5, 0x000104a5,
+ 0x00010ca5, 0x000114a5, 0x00011ca5, 0x000124a5,
+ 0x00012ca5, 0x000134a5, 0x00013ca5, 0x000144a5,
+ 0x00014ca5, 0x000154a5, 0x00015ca5, 0x000164a5,
+ 0x00016ca5, 0x000174a5, 0x00017ca5, 0x000184a5,
+ 0x00018ca5, 0x000194a5, 0x00019ca5, 0x0001a4a5,
+ 0x0001aca5, 0x0001b4a5, 0x0001bca5, 0x0001c4a5,
+ 0x0001cca5, 0x0001d4a5, 0x0001dca5, 0x0001e4a5,
+ 0x0001eca5, 0x0001f4a5, 0x0001fca5, 0x000204a5,
+ 0x00020ca5, 0x000214a5, 0x00021ca5, 0x000224a5,
+ 0x000234a5, 0x000244a5, 0x000254a5, 0x000264a5,
+ 0x000274a5, 0x000284a5, 0x000294a5, 0x0002a4a5,
+ 0x0002b4a5, 0x0002c4a5, 0x0002d4a5, 0x0002e4a5,
+ 0x0002f4a5, 0x000304a5, 0x000314a5, 0x000324a5,
+ 0x000334a5, 0x000344a5, 0x000354a5, 0x000364a5,
+ 0x000374a5, 0x000384a5, 0x000394a5, 0x0003a4a5,
+ 0x0003b4a5, 0x0003c4a5, 0x0003d4a5, 0x0003e4a5,
+ 0x0003f4a5, 0x000404a5, 0x000414a5, 0x000424a5,
+ 0x000434a5, 0x000444a5, 0x000454a5, 0x000464a5,
+ 0x000474a5, 0x000484a5, 0x000494a5, 0x0004a4a5,
+ 0x0004b4a5, 0x0004c4a5, 0x0004e4a5, 0x000504a5,
+ 0x000524a5, 0x000544a5, 0x000564a5, 0x000584a5,
+ 0x0005a4a5, 0x0005c4a5, 0x0005e4a5, 0x000604a5,
+ 0x000624a5, 0x000644a5, 0x000664a5, 0x000684a5,
+ 0x0006a4a5, 0x0006c4a5, 0x0006e4a5, 0x000704a5,
+ 0x000724a5, 0x000744a5, 0x000764a5, 0x000784a5,
+ 0x0007a4a5, 0x0007c4a5, 0x0007e4a5, 0x000804a5,
+ 0x000824a5, 0x000844a5, 0x000864a5, 0x000884a5,
+ 0x0008a4a5, 0x0008c4a5, 0x0008e4a5, 0x000904a5,
+ 0x000924a5, 0x000944a5, 0x000964a5, 0x000984a5,
+ 0x0009a4a5, 0x0009c4a5, 0x0009e4a5, 0x000a04a5,
+ 0x000a24a5, 0x000a44a5, 0x000a64a5, 0x000aa4a5,
+ 0x000ae4a5, 0x000b24a5, 0x000b64a5, 0x000ba4a5,
+ 0x000be4a5, 0x000c24a5, 0x000c64a5, 0x000ca4a5,
+ 0x000ce4a5, 0x000d24a5, 0x000d64a5, 0x000da4a5,
+ 0x000de4a5, 0x000e24a5, 0x000e64a5, 0x000ea4a5,
+ 0x000ee4a5, 0x000f24a5, 0x000f64a5, 0x000fa4a5,
+ 0x000fe4a5, 0x001024a5, 0x001064a5, 0x0010a4a5,
+ 0x0010e4a5, 0x001124a5, 0x001164a5, 0x0011a4a5,
+ 0x0011e4a5, 0x001224a5, 0x001264a5, 0x0012a4a5,
+ 0x0012e4a5, 0x001324a5, 0x001364a5, 0x0013a4a5,
+ 0x0013e4a5, 0x001424a5, 0x001464a5, 0x0014a4a5,
+ 0x0014e4a5, 0x001524a5, 0x001564a5, 0x0015a4a5,
+ 0x0015e4a5, 0x001624a5, 0x001664a5, 0x0016a4a5,
+ 0x0016e4a5, 0x001724a5, 0x001764a5, 0x0017a4a5,
+ 0x0017e4a5, 0x001824a5, 0x001864a5, 0x0018a4a5,
+ 0x0018e4a5, 0x001924a5, 0x001964a5, 0x0019e4a5,
+ 0x001a64a5, 0x001ae4a5, 0x001b64a5, 0x001be4a5,
+ 0x001c64a5, 0x001ce4a5, 0x001d64a5, 0x001de4a5,
+ 0x001e64a5, 0x001ee4a5, 0x001f64a5, 0x001fe4a5,
+ 0x002064a5, 0x0020e4a5, 0x002164a5, 0x0021e4a5,
+ 0x002264a5, 0x0022e4a5, 0x002364a5, 0x0023e4a5,
+ 0x002464a5, 0x0024e4a5, 0x002564a5, 0x0025e4a5,
+ 0x002664a5, 0x0026e4a5, 0x002764a5, 0x0027e4a5,
+ 0x002864a5, 0x0028e4a5, 0x002964a5, 0x0029e4a5,
+ 0x002a64a5, 0x002ae4a5, 0x002b64a5, 0x002be4a5,
+ 0x002c64a5, 0x002ce4a5, 0x002d64a5, 0x002de4a5,
+ 0x002e64a5, 0x002ee4a5, 0x002f64a5, 0x002fe4a5,
+ 0x003064a5, 0x0030e4a5, 0x003164a5, 0x0031e4a5,
+ 0x003264a5, 0x0032e4a5, 0x003364a5, 0x0033e4a5,
+ 0x003464a5, 0x0034e4a5, 0x003564a5, 0x0035e4a5,
+ 0x003664a5, 0x0036e4a5, 0x003764a5, 0x0037e4a5,
+ 0x003864a5, 0x0038e4a5, 0x003964a5, 0x0039e4a5,
+ 0x003a64a5, 0x003ae4a5, 0x003b64a5, 0x003be4a5,
+ 0x003c64a5, 0x003ce4a5, 0x003d64a5, 0x003de4a5,
+ 0x003ee4a5, 0x003fe4a5, 0x0040e4a5, 0x0041e4a5,
+ 0x0042e4a5, 0x0043e4a5, 0x0044e4a5, 0x0045e4a5,
+ 0x0046e4a5, 0x0047e4a5, 0x0048e4a5, 0x0049e4a5,
+ 0x004ae4a5, 0x004be4a5, 0x004ce4a5, 0x004de4a5,
+ 0x004ee4a5, 0x004fe4a5, 0x0050e4a5, 0x0051e4a5,
+ 0x0052e4a5, 0x0053e4a5, 0x0054e4a5, 0x0055e4a5,
+ 0x0056e4a5, 0x0057e4a5, 0x0058e4a5, 0x0059e4a5,
+ 0x005ae4a5, 0x005be4a5, 0x005ce4a5, 0x005de4a5,
+ 0x005ee4a5, 0x005fe4a5, 0x0060e4a5, 0x0061e4a5,
+ 0x0062e4a5, 0x0063e4a5, 0x0064e4a5, 0x0065e4a5,
+ 0x0066e4a5, 0x0067e4a5, 0x0068e4a5, 0x0069e4a5,
+ 0x006ae4a5, 0x006be4a5, 0x006ce4a5, 0x006de4a5,
+ 0x006ee4a5, 0x006fe4a5, 0x0070e4a5, 0x0071e4a5,
+ 0x0072e4a5, 0x0073e4a5, 0x0074e4a5, 0x0075e4a5,
+ 0x0076e4a5, 0x0077e4a5, 0x0078e4a5, 0x0079e4a5,
+ 0x007ae4a5, 0x007be4a5, 0x007ce4a5, 0x007de4a5,
+ 0x007ee4a5, 0x007fe4a5, 0x0080e4a5, 0x0081e4a5,
+ 0x0082e4a5, 0x0083e4a5, 0x0084e4a5, 0x0085e4a5,
+ 0x0086e4a5, 0x0087e4a5, 0x0088e4a5, 0x0089e4a5,
+ 0x008ae4a5, 0x008be4a5, 0x008ce4a5, 0x008de4a5,
+ 0x008fe4a5, 0x0091e4a5, 0x0093e4a5, 0x0095e4a5,
+ 0x0097e4a5, 0x0099e4a5, 0x009be4a5, 0x009de4a5,
+ 0x009fe4a5, 0x00a1e4a5, 0x00a3e4a5, 0x00a5e4a5,
+ 0x00a7e4a5, 0x00a9e4a5, 0x00abe4a5, 0x00ade4a5,
+ 0x00afe4a5, 0x00b1e4a5, 0x00b3e4a5, 0x00b5e4a5,
+ 0x00b7e4a5, 0x00b9e4a5, 0x00bbe4a5, 0x00bde4a5,
+ 0x00bfe4a5, 0x00c1e4a5, 0x00c3e4a5, 0x00c5e4a5,
+ 0x00c7e4a5, 0x00c9e4a5, 0x00cbe4a5, 0x00cde4a5,
+ 0x00cfe4a5, 0x00d1e4a5, 0x00d3e4a5, 0x00d5e4a5,
+ 0x00d7e4a5, 0x00d9e4a5, 0x00dbe4a5, 0x00dde4a5,
+ 0x00dfe4a5, 0x00e1e4a5, 0x00e3e4a5, 0x00e5e4a5,
+ 0x00e7e4a5, 0x00e9e4a5, 0x00ebe4a5, 0x00ede4a5,
+ 0x00efe4a5, 0x00f1e4a5, 0x00f3e4a5, 0x00f5e4a5,
+ 0x00f7e4a5, 0x00f9e4a5, 0x00fbe4a5, 0x00fde4a5,
+ 0x00ffe4a5, 0x0101e4a5, 0x0103e4a5, 0x0105e4a5,
+ 0x0107e4a5, 0x0109e4a5, 0x010be4a5, 0x010de4a5,
+ 0x010fe4a5, 0x0111e4a5, 0x0113e4a5, 0x0115e4a5,
+ 0x0117e4a5, 0x0119e4a5, 0x011be4a5, 0x011de4a5,
+ 0x011fe4a5, 0x0121e4a5, 0x0123e4a5, 0x0125e4a5,
+ 0x0127e4a5, 0x0129e4a5, 0x012be4a5, 0x012de4a5,
+ 0x012fe4a5, 0x0131e4a5, 0x0133e4a5, 0x0135e4a5,
+ 0x0137e4a5, 0x013be4a5, 0x013fe4a5, 0x0143e4a5,
+ 0x0147e4a5, 0x014be4a5, 0x014fe4a5, 0x0153e4a5,
+ 0x0157e4a5, 0x015be4a5, 0x015fe4a5, 0x0163e4a5,
+ 0x0167e4a5, 0x016be4a5, 0x016fe4a5, 0x0173e4a5,
+ 0x0177e4a5, 0x017be4a5, 0x017fe4a5, 0x0183e4a5,
+ 0x0187e4a5, 0x018be4a5, 0x018fe4a5, 0x0193e4a5,
+ 0x0197e4a5, 0x019be4a5, 0x019fe4a5, 0x01a3e4a5,
+ 0x01a7e4a5, 0x01abe4a5, 0x01afe4a5, 0x01b3e4a5,
+ 0x01b7e4a5, 0x01bbe4a5, 0x01bfe4a5, 0x01c3e4a5,
+ 0x01c7e4a5, 0x01cbe4a5, 0x01cfe4a5, 0x01d3e4a5,
+ 0x01d7e4a5, 0x01dbe4a5, 0x01dfe4a5, 0x01e3e4a5,
+ 0x01e7e4a5, 0x01ebe4a5, 0x01efe4a5, 0x01f3e4a5,
+ 0x01f7e4a5, 0x01fbe4a5, 0x01ffe4a5, 0x0203e4a5,
+ 0x0207e4a5, 0x020be4a5, 0x020fe4a5, 0x0213e4a5,
+ 0x0217e4a5, 0x021be4a5, 0x021fe4a5, 0x0223e4a5,
+ 0x0227e4a5, 0x022be4a5, 0x022fe4a5, 0x0233e4a5,
+ 0x0237e4a5, 0x023be4a5, 0x023fe4a5, 0x0243e4a5,
+ 0x0247e4a5, 0x024be4a5, 0x024fe4a5, 0x0253e4a5,
+ 0x0257e4a5, 0x025be4a5, 0x025fe4a5, 0x0263e4a5,
+ 0x0267e4a5, 0x026be4a5, 0x026fe4a5, 0x0273e4a5,
+ 0x0277e4a5, 0x027be4a5, 0x027fe4a5, 0x0283e4a5,
+ 0x0287e4a5, 0x028be4a5, 0x028fe4a5, 0x0293e4a5,
+ 0x0297e4a5, 0x029be4a5, 0x029fe4a5, 0x02a3e4a5,
+ 0x02a7e4a5, 0x02abe4a5, 0x02afe4a5, 0x02b3e4a5,
+ 0x02bbe4a5, 0x02c3e4a5, 0x02cbe4a5, 0x02d3e4a5,
+ 0x02dbe4a5, 0x02e3e4a5, 0x02ebe4a5, 0x02f3e4a5,
+ 0x02fbe4a5, 0x0303e4a5, 0x030be4a5, 0x0313e4a5,
+ 0x031be4a5, 0x0323e4a5, 0x032be4a5, 0x0333e4a5,
+ 0x033be4a5, 0x0343e4a5, 0x034be4a5, 0x0353e4a5,
+ 0x035be4a5, 0x0363e4a5, 0x036be4a5, 0x0373e4a5,
+ 0x037be4a5, 0x0383e4a5, 0x038be4a5, 0x0393e4a5,
+ 0x039be4a5, 0x03a3e4a5, 0x03abe4a5, 0x03b3e4a5,
+ 0x03bbe4a5, 0x03c3e4a5, 0x03cbe4a5, 0x03d3e4a5,
+ 0x03dbe4a5, 0x03e3e4a5, 0x03ebe4a5, 0x03f3e4a5,
+ 0x03fbe4a5, 0x0403e4a5, 0x040be4a5, 0x0413e4a5,
+ 0x041be4a5, 0x0423e4a5, 0x042be4a5, 0x0433e4a5,
+ 0x043be4a5, 0x0443e4a5, 0x044be4a5, 0x0453e4a5,
+ 0x045be4a5, 0x0463e4a5, 0x046be4a5, 0x0473e4a5,
+ 0x047be4a5, 0x0483e4a5, 0x048be4a5, 0x0493e4a5,
+ 0x049be4a5, 0x04a3e4a5, 0x04abe4a5, 0x04b3e4a5,
+ 0x04bbe4a5, 0x04c3e4a5, 0x04cbe4a5, 0x04d3e4a5,
+ 0x04dbe4a5, 0x04e3e4a5, 0x04ebe4a5, 0x04f3e4a5,
+ 0x04fbe4a5, 0x0503e4a5, 0x050be4a5, 0x0513e4a5,
+ 0x051be4a5, 0x0523e4a5, 0x052be4a5, 0x0533e4a5,
+ 0x053be4a5, 0x0543e4a5, 0x054be4a5, 0x0553e4a5,
+ 0x055be4a5, 0x0563e4a5, 0x056be4a5, 0x0573e4a5,
+ 0x057be4a5, 0x0583e4a5, 0x058be4a5, 0x0593e4a5,
+ 0x059be4a5, 0x05a3e4a5, 0x05abe4a5, 0x05b3e4a5,
+ 0x05bbe4a5, 0x05c3e4a5, 0x05cbe4a5, 0x05d3e4a5,
+ 0x05dbe4a5, 0x05e3e4a5, 0x05ebe4a5, 0x05f3e4a5,
+ 0x05fbe4a5, 0x060be4a5, 0x061be4a5, 0x062be4a5,
+ 0x063be4a5, 0x064be4a5, 0x065be4a5, 0x465be4a5,
+ /* The last entry is extra; it is equal to LZMS_MAX_MATCH_OFFSET + 1 and
+ * is here to aid binary search. */
+};
+
+/* Table: offset slot => number of extra offset bits */
+const u8 lzms_extra_offset_bits[LZMS_MAX_NUM_OFFSET_SYMS] = {
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2,
+ 2 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4,
+ 4 , 4 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5,
+ 5 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6,
+ 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7,
+ 7 , 7 , 7 , 7 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8,
+ 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9,
+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9,
+ 9 , 9 , 9 , 9 , 9 , 9 , 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 30,
+};
/* Table: length slot => length slot base value */
-u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1];
+const u32 lzms_length_slot_base[LZMS_NUM_LENGTH_SYMS + 1] = {
+ 0x00000001, 0x00000002, 0x00000003, 0x00000004,
+ 0x00000005, 0x00000006, 0x00000007, 0x00000008,
+ 0x00000009, 0x0000000a, 0x0000000b, 0x0000000c,
+ 0x0000000d, 0x0000000e, 0x0000000f, 0x00000010,
+ 0x00000011, 0x00000012, 0x00000013, 0x00000014,
+ 0x00000015, 0x00000016, 0x00000017, 0x00000018,
+ 0x00000019, 0x0000001a, 0x0000001b, 0x0000001d,
+ 0x0000001f, 0x00000021, 0x00000023, 0x00000027,
+ 0x0000002b, 0x0000002f, 0x00000033, 0x00000037,
+ 0x0000003b, 0x00000043, 0x0000004b, 0x00000053,
+ 0x0000005b, 0x0000006b, 0x0000007b, 0x0000008b,
+ 0x0000009b, 0x000000ab, 0x000000cb, 0x000000eb,
+ 0x0000012b, 0x000001ab, 0x000002ab, 0x000004ab,
+ 0x000008ab, 0x000108ab, 0x400108ab,
+ /* The last entry is extra; it is equal to LZMS_MAX_MATCH_LEN + 1 and is
+ * here to aid binary search. */
+};
/* Table: length slot => number of extra length bits */
-u8 lzms_extra_length_bits[LZMS_NUM_LEN_SYMS];
-
-/* Table: length (< LZMS_NUM_FAST_LENGTHS only) => length slot */
-u8 lzms_length_slot_fast[LZMS_NUM_FAST_LENGTHS];
-
-u32
+const u8 lzms_extra_length_bits[LZMS_NUM_LENGTH_SYMS] = {
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 1 , 1 , 1 , 1 , 2 , 2 ,
+ 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 ,
+ 4 , 4 , 4 , 4 , 4 , 5 , 5 , 6 ,
+ 7 , 8 , 9 , 10, 16, 30,
+};
+
+unsigned
lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots)
{
- u32 l = 0;
- u32 r = num_slots - 1;
+ unsigned l = 0;
+ unsigned r = num_slots - 1;
for (;;) {
- LZMS_ASSERT(r >= l);
- u32 slot = (l + r) / 2;
+ unsigned slot = (l + r) / 2;
if (value >= slot_base_tab[slot]) {
if (value < slot_base_tab[slot + 1])
return slot;
}
}
-static void
-lzms_decode_delta_rle_slot_bases(u32 slot_bases[],
- u8 extra_bits[],
- const u8 delta_run_lens[],
- u32 num_run_lens,
- u32 final,
- u32 expected_num_slots)
-{
- u32 order = 0;
- u32 delta = 1;
- u32 base = 0;
- u32 slot = 0;
- for (u32 i = 0; i < num_run_lens; i++) {
- u8 run_len = delta_run_lens[i];
- while (run_len--) {
- base += delta;
- if (slot > 0)
- extra_bits[slot - 1] = order;
- slot_bases[slot] = base;
- slot++;
- }
- delta <<= 1;
- order++;
- }
- LZMS_ASSERT(slot == expected_num_slots);
-
- slot_bases[slot] = final;
- extra_bits[slot - 1] = bsr32(slot_bases[slot] - slot_bases[slot - 1]);
-}
-
-/* Initialize the global position and length slot tables. */
-static void
-lzms_compute_slots(void)
+/* Return the number of offset slots used when processing a buffer having the
+ * specified uncompressed size. */
+unsigned
+lzms_get_num_offset_slots(size_t uncompressed_size)
{
- /* If an explicit formula that maps LZMS position and length slots to
- * slot bases exists, then it could be used here. But until one is
- * found, the following code fills in the slots using the observation
- * that the increase from one slot base to the next is an increasing
- * power of 2. Therefore, run-length encoding of the delta of adjacent
- * entries can be used. */
- static const u8 position_slot_delta_run_lens[] = {
- 9, 0, 9, 7, 10, 15, 15, 20,
- 20, 30, 33, 40, 42, 45, 60, 73,
- 80, 85, 95, 105, 6,
- };
-
- static const u8 length_slot_delta_run_lens[] = {
- 27, 4, 6, 4, 5, 2, 1, 1,
- 1, 1, 1, 0, 0, 0, 0, 0,
- 1,
- };
-
- /* Position slots */
- lzms_decode_delta_rle_slot_bases(lzms_position_slot_base,
- lzms_extra_position_bits,
- position_slot_delta_run_lens,
- ARRAY_LEN(position_slot_delta_run_lens),
- 0x7fffffff,
- LZMS_MAX_NUM_OFFSET_SYMS);
-
- for (u32 order = 0; order < 30; order++) {
- lzms_order_to_position_slot_bounds[order][0] =
- lzms_get_slot(1U << order, lzms_position_slot_base,
- LZMS_MAX_NUM_OFFSET_SYMS);
- lzms_order_to_position_slot_bounds[order][1] =
- lzms_get_slot((1U << (order + 1)) - 1, lzms_position_slot_base,
- LZMS_MAX_NUM_OFFSET_SYMS);
- }
-
- /* Length slots */
- lzms_decode_delta_rle_slot_bases(lzms_length_slot_base,
- lzms_extra_length_bits,
- length_slot_delta_run_lens,
- ARRAY_LEN(length_slot_delta_run_lens),
- 0x400108ab,
- LZMS_NUM_LEN_SYMS);
-
- /* Create table mapping short lengths to length slots. */
- for (u32 slot = 0, i = 0; i < LZMS_NUM_FAST_LENGTHS; i++) {
- if (i >= lzms_length_slot_base[slot + 1])
- slot++;
- lzms_length_slot_fast[i] = slot;
- }
+ if (uncompressed_size < 2)
+ return 0;
+ return 1 + lzms_get_offset_slot(uncompressed_size - 1);
}
-/* Initialize the global position and length slot tables if not done so already.
- * */
void
-lzms_init_slots(void)
+lzms_init_probability_entries(struct lzms_probability_entry *entries, size_t count)
{
- static bool done = false;
- static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-
- if (unlikely(!done)) {
- pthread_mutex_lock(&mutex);
- if (!done) {
- lzms_compute_slots();
- done = true;
- }
- pthread_mutex_unlock(&mutex);
+ for (size_t i = 0; i < count; i++) {
+ entries[i].num_recent_zero_bits = LZMS_INITIAL_PROBABILITY;
+ entries[i].recent_bits = LZMS_INITIAL_RECENT_BITS;
}
}
-static s32
-lzms_maybe_do_x86_translation(u8 data[restrict], s32 i, s32 num_op_bytes,
- s32 * restrict closest_target_usage_p,
- s32 last_target_usages[restrict],
- s32 max_trans_offset, bool undo)
-{
- u16 pos;
-
- if (undo) {
- if (i - *closest_target_usage_p <= max_trans_offset) {
- LZMS_DEBUG("Undid x86 translation at position %d "
- "(opcode 0x%02x)", i, data[i]);
- le32 *p32 = (le32*)&data[i + num_op_bytes];
- u32 n = le32_to_cpu(*p32);
- *p32 = cpu_to_le32(n - i);
- }
- pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]);
- } else {
- pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]);
-
- if (i - *closest_target_usage_p <= max_trans_offset) {
- LZMS_DEBUG("Did x86 translation at position %d "
- "(opcode 0x%02x)", i, data[i]);
- le32 *p32 = (le32*)&data[i + num_op_bytes];
- u32 n = le32_to_cpu(*p32);
- *p32 = cpu_to_le32(n + i);
- }
- }
-
- i += num_op_bytes + sizeof(le32) - 1;
-
- if (i - last_target_usages[pos] <= LZMS_X86_MAX_GOOD_TARGET_OFFSET)
- *closest_target_usage_p = i;
-
- last_target_usages[pos] = i;
-
- return i + 1;
-}
-
-static s32
-lzms_may_x86_translate(const u8 p[restrict],
- s32 *restrict max_offset_ret)
+void
+lzms_init_symbol_frequencies(u32 freqs[], size_t num_syms)
{
- /* Switch on first byte of the opcode, assuming it is really an x86
- * instruction. */
- *max_offset_ret = LZMS_X86_MAX_TRANSLATION_OFFSET;
- switch (p[0]) {
- case 0x48:
- if (p[1] == 0x8b) {
- if (p[2] == 0x5 || p[2] == 0xd) {
- /* Load relative (x86_64) */
- return 3;
- }
- } else if (p[1] == 0x8d) {
- if ((p[2] & 0x7) == 0x5) {
- /* Load effective address relative (x86_64) */
- return 3;
- }
- }
- break;
-
- case 0x4c:
- if (p[1] == 0x8d) {
- if ((p[2] & 0x7) == 0x5) {
- /* Load effective address relative (x86_64) */
- return 3;
- }
- }
- break;
-
- case 0xe8:
- /* Call relative */
- *max_offset_ret = LZMS_X86_MAX_TRANSLATION_OFFSET / 2;
- return 1;
-
- case 0xe9:
- /* Jump relative */
- *max_offset_ret = 0;
- return 5;
-
- case 0xf0:
- if (p[1] == 0x83 && p[2] == 0x05) {
- /* Lock add relative */
- return 3;
- }
- break;
-
- case 0xff:
- if (p[1] == 0x15) {
- /* Call indirect */
- return 2;
- }
- break;
- }
- *max_offset_ret = 0;
- return 1;
+ for (size_t i = 0; i < num_syms; i++)
+ freqs[i] = 1;
}
/*
* Translate relative addresses embedded in x86 instructions into absolute
* addresses (@undo == %false), or undo this translation (@undo == %true).
*
- * @last_target_usages is a temporary array of length >= 65536.
+ * Absolute addresses are usually more compressible by LZ factorization.
+ *
+ * @last_target_usages must be a temporary array of length >= 65536.
*/
void
-lzms_x86_filter(u8 data[restrict],
- s32 size,
- s32 last_target_usages[restrict],
- bool undo)
+lzms_x86_filter(u8 data[restrict], s32 size,
+ s32 last_target_usages[restrict], bool undo)
{
- s32 closest_target_usage = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1;
-
- for (s32 i = 0; i < 65536; i++)
- last_target_usages[i] = -LZMS_X86_MAX_GOOD_TARGET_OFFSET - 1;
-
- for (s32 i = 0; i < size - 11; ) {
+ /*
+ * Note: this filter runs unconditionally and uses a custom algorithm to
+ * detect data regions that probably contain x86 code.
+ *
+ * 'last_x86_pos' tracks the most recent position that has a good chance
+ * of being the start of an x86 instruction. When the filter detects a
+ * likely x86 instruction, it updates this variable and considers the
+ * next LZMS_X86_MAX_TRANSLATION_OFFSET bytes of data as valid for x86
+ * translations.
+ *
+ * If part of the data does not, in fact, contain x86 machine code, then
+ * 'last_x86_pos' will, very likely, eventually fall more than
+ * LZMS_X86_MAX_TRANSLATION_OFFSET bytes behind the current position.
+ * This results in x86 translations being disabled until the next likely
+ * x86 instruction is detected.
+ *
+ * To identify "likely x86 instructions", the algorithm attempts to
+ * track the position of the most recent potential relative-addressing
+ * instruction that referenced each possible memory address. If it
+ * finds two references to the same memory address within an
+ * LZMS_X86_ID_WINDOW_SIZE-byte sized window, then the second reference
+ * is flagged as a likely x86 instruction. Since the instructions
+ * considered for translation necessarily use relative addressing, the
+ * algorithm does a tentative translation into absolute addresses. In
+ * addition, so that memory addresses can be looked up in an array of
+ * reasonable size (in this code, 'last_target_usages'), only the
+ * low-order 2 bytes of each address are considered significant.
+ */
+
+ s32 i;
+ s32 tail_idx;
+ u8 saved_byte;
+ s32 last_x86_pos;
+
+ if (size <= 17)
+ return;
+
+ for (i = 0; i < 65536; i++)
+ last_target_usages[i] = -(s32)LZMS_X86_ID_WINDOW_SIZE - 1;
+
+ /*
+ * Optimization: only check for end-of-buffer when we already have a
+ * byte that is a potential opcode for x86 translation. To do this,
+ * overwrite one of the bytes near the end of the buffer, and restore it
+ * later. The correctness of this optimization relies on two
+ * characteristics of compressed format:
+ *
+ * 1. No translation can follow an opcode beginning in the last 16
+ * bytes.
+ * 2. A translation following an opcode starting at the last possible
+ * position (17 bytes from the end) never extends more than 7 bytes.
+ * Consequently, we can overwrite any of the bytes starting at
+ * data[(size - 16) + 7] and have no effect on the result, as long
+ * as we restore those bytes later.
+ */
+ tail_idx = size - 16;
+ saved_byte = data[tail_idx + 8];
+ data[tail_idx + 8] = 0xE8;
+ last_x86_pos = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1;
+
+ /* Note: the very first byte must be ignored completely! */
+ i = 0;
+ for (;;) {
s32 max_trans_offset;
- s32 n;
-
- n = lzms_may_x86_translate(data + i, &max_trans_offset);
- if (max_trans_offset) {
- i = lzms_maybe_do_x86_translation(data, i, n,
- &closest_target_usage,
- last_target_usages,
- max_trans_offset,
- undo);
- } else {
- i += n;
+ s32 opcode_nbytes;
+ u16 target16;
+
+ /*
+ * The following table is used to accelerate the common case
+ * where the byte has nothing to do with x86 translation and
+ * must simply be skipped. This is the fastest (at least on
+ * x86_64) of the implementations I tested. The other
+ * implementations I tested were:
+ * - Jump table with 256 entries
+ * - Switch statement with default
+ */
+ static const u8 is_potential_opcode[256] = {
+ [0x48] = 1, [0x4C] = 1, [0xE8] = 1,
+ [0xE9] = 1, [0xF0] = 1, [0xFF] = 1,
+ };
+
+ for (;;) {
+ if (is_potential_opcode[data[++i]])
+ break;
+ if (is_potential_opcode[data[++i]])
+ break;
+ if (is_potential_opcode[data[++i]])
+ break;
+ if (is_potential_opcode[data[++i]])
+ break;
}
- }
-}
-static void
-lzms_init_lz_lru_queues(struct lzms_lz_lru_queues *lz)
-{
- /* Recent offsets for LZ matches */
- for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++)
- lz->recent_offsets[i] = i + 1;
+ if (i >= tail_idx)
+ break;
+
+ max_trans_offset = LZMS_X86_MAX_TRANSLATION_OFFSET;
+ switch (data[i]) {
+ case 0x48:
+ if (data[i + 1] == 0x8B) {
+ if (data[i + 2] == 0x5 || data[i + 2] == 0xD) {
+ /* Load relative (x86_64) */
+ opcode_nbytes = 3;
+ goto have_opcode;
+ }
+ } else if (data[i + 1] == 0x8D) {
+ if ((data[i + 2] & 0x7) == 0x5) {
+ /* Load effective address relative (x86_64) */
+ opcode_nbytes = 3;
+ goto have_opcode;
+ }
+ }
+ break;
+ case 0x4C:
+ if (data[i + 1] == 0x8D) {
+ if ((data[i + 2] & 0x7) == 0x5) {
+ /* Load effective address relative (x86_64) */
+ opcode_nbytes = 3;
+ goto have_opcode;
+ }
+ }
+ break;
+ case 0xE8:
+ /* Call relative. Note: 'max_trans_offset' must be
+ * halved for this instruction. This means that we must
+ * be more confident that we are in a region of x86
+ * machine code before we will do a translation for this
+ * particular instruction. */
+ opcode_nbytes = 1;
+ max_trans_offset /= 2;
+ goto have_opcode;
+ case 0xE9:
+ /* Jump relative */
+ i += 4;
+ break;
+ case 0xF0:
+ if (data[i + 1] == 0x83 && data[i + 2] == 0x05) {
+ /* Lock add relative */
+ opcode_nbytes = 3;
+ goto have_opcode;
+ }
+ break;
+ case 0xFF:
+ if (data[i + 1] == 0x15) {
+ /* Call indirect */
+ opcode_nbytes = 2;
+ goto have_opcode;
+ }
+ break;
+ }
- lz->prev_offset = 0;
- lz->upcoming_offset = 0;
-}
+ continue;
-static void
-lzms_init_delta_lru_queues(struct lzms_delta_lru_queues *delta)
-{
- /* Recent offsets and powers for LZ matches */
- for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) {
- delta->recent_offsets[i] = i + 1;
- delta->recent_powers[i] = 0;
- }
- delta->prev_offset = 0;
- delta->prev_power = 0;
- delta->upcoming_offset = 0;
- delta->upcoming_power = 0;
-}
+ have_opcode:
+ if (undo) {
+ if (i - last_x86_pos <= max_trans_offset) {
+ LZMS_DEBUG("Undid x86 translation at position %d "
+ "(opcode 0x%02x)", i, data[i]);
+ void *p32 = &data[i + opcode_nbytes];
+ u32 n = get_unaligned_u32_le(p32);
+ put_unaligned_u32_le(n - i, p32);
+ }
+ target16 = i + get_unaligned_u16_le(&data[i + opcode_nbytes]);
+ } else {
+ target16 = i + get_unaligned_u16_le(&data[i + opcode_nbytes]);
+ if (i - last_x86_pos <= max_trans_offset) {
+ LZMS_DEBUG("Did x86 translation at position %d "
+ "(opcode 0x%02x)", i, data[i]);
+ void *p32 = &data[i + opcode_nbytes];
+ u32 n = get_unaligned_u32_le(p32);
+ put_unaligned_u32_le(n + i, p32);
+ }
+ }
+ i += opcode_nbytes + sizeof(le32) - 1;
-void
-lzms_init_lru_queues(struct lzms_lru_queues *lru)
-{
- lzms_init_lz_lru_queues(&lru->lz);
- lzms_init_delta_lru_queues(&lru->delta);
-}
+ if (i - last_target_usages[target16] <= LZMS_X86_ID_WINDOW_SIZE)
+ last_x86_pos = i;
-void
-lzms_update_lz_lru_queues(struct lzms_lz_lru_queues *lz)
-{
- if (lz->prev_offset != 0) {
- for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--)
- lz->recent_offsets[i + 1] = lz->recent_offsets[i];
- lz->recent_offsets[0] = lz->prev_offset;
- }
- lz->prev_offset = lz->upcoming_offset;
-}
+ last_target_usages[target16] = i;
-void
-lzms_update_delta_lru_queues(struct lzms_delta_lru_queues *delta)
-{
- if (delta->prev_offset != 0) {
- for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) {
- delta->recent_offsets[i + 1] = delta->recent_offsets[i];
- delta->recent_powers[i + 1] = delta->recent_powers[i];
- }
- delta->recent_offsets[0] = delta->prev_offset;
- delta->recent_powers[0] = delta->prev_power;
+ continue;
}
- delta->prev_offset = delta->upcoming_offset;
- delta->prev_power = delta->upcoming_power;
-}
-
-void
-lzms_update_lru_queues(struct lzms_lru_queues *lru)
-{
- lzms_update_lz_lru_queues(&lru->lz);
- lzms_update_delta_lru_queues(&lru->delta);
+ data[tail_idx + 8] = saved_byte;
}