]> wimlib.net Git - wimlib/blobdiff - src/lzms-common.c
LZMS: decompression optimizations
[wimlib] / src / lzms-common.c
index a6fc80149d3fcb044cbed00778e4f70bd6bce1dc..eaa237fa1eec7f5c2d08e1c71c7b209691c891df 100644 (file)
@@ -1,27 +1,22 @@
 /*
- * lzms-common.c
- *
- * Code shared between the compressor and decompressor for the LZMS compression
- * format.
+ * lzms-common.c - Common code for LZMS compression and decompression
  */
 
 /*
- * Copyright (C) 2013 Eric Biggers
- *
- * This file is part of wimlib, a library for working with WIM files.
+ * Copyright (C) 2013, 2014 Eric Biggers
  *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
  *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  * details.
  *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
  */
 
 #ifdef HAVE_CONFIG_H
 #endif
 
 #include "wimlib/endianness.h"
-#include "wimlib/error.h"
 #include "wimlib/lzms.h"
-#include "wimlib/util.h"
-
-#include <pthread.h>
-
-/* A table that maps position slots to their base values.  These are constants
- * computed at runtime by lzms_compute_slot_bases().  */
-u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1];
+#include "wimlib/unaligned.h"
+
+/* Table: offset slot => offset slot base value  */
+const u32 lzms_offset_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1] = {
+       0x00000001, 0x00000002, 0x00000003, 0x00000004,
+       0x00000005, 0x00000006, 0x00000007, 0x00000008,
+       0x00000009, 0x0000000d, 0x00000011, 0x00000015,
+       0x00000019, 0x0000001d, 0x00000021, 0x00000025,
+       0x00000029, 0x0000002d, 0x00000035, 0x0000003d,
+       0x00000045, 0x0000004d, 0x00000055, 0x0000005d,
+       0x00000065, 0x00000075, 0x00000085, 0x00000095,
+       0x000000a5, 0x000000b5, 0x000000c5, 0x000000d5,
+       0x000000e5, 0x000000f5, 0x00000105, 0x00000125,
+       0x00000145, 0x00000165, 0x00000185, 0x000001a5,
+       0x000001c5, 0x000001e5, 0x00000205, 0x00000225,
+       0x00000245, 0x00000265, 0x00000285, 0x000002a5,
+       0x000002c5, 0x000002e5, 0x00000325, 0x00000365,
+       0x000003a5, 0x000003e5, 0x00000425, 0x00000465,
+       0x000004a5, 0x000004e5, 0x00000525, 0x00000565,
+       0x000005a5, 0x000005e5, 0x00000625, 0x00000665,
+       0x000006a5, 0x00000725, 0x000007a5, 0x00000825,
+       0x000008a5, 0x00000925, 0x000009a5, 0x00000a25,
+       0x00000aa5, 0x00000b25, 0x00000ba5, 0x00000c25,
+       0x00000ca5, 0x00000d25, 0x00000da5, 0x00000e25,
+       0x00000ea5, 0x00000f25, 0x00000fa5, 0x00001025,
+       0x000010a5, 0x000011a5, 0x000012a5, 0x000013a5,
+       0x000014a5, 0x000015a5, 0x000016a5, 0x000017a5,
+       0x000018a5, 0x000019a5, 0x00001aa5, 0x00001ba5,
+       0x00001ca5, 0x00001da5, 0x00001ea5, 0x00001fa5,
+       0x000020a5, 0x000021a5, 0x000022a5, 0x000023a5,
+       0x000024a5, 0x000026a5, 0x000028a5, 0x00002aa5,
+       0x00002ca5, 0x00002ea5, 0x000030a5, 0x000032a5,
+       0x000034a5, 0x000036a5, 0x000038a5, 0x00003aa5,
+       0x00003ca5, 0x00003ea5, 0x000040a5, 0x000042a5,
+       0x000044a5, 0x000046a5, 0x000048a5, 0x00004aa5,
+       0x00004ca5, 0x00004ea5, 0x000050a5, 0x000052a5,
+       0x000054a5, 0x000056a5, 0x000058a5, 0x00005aa5,
+       0x00005ca5, 0x00005ea5, 0x000060a5, 0x000064a5,
+       0x000068a5, 0x00006ca5, 0x000070a5, 0x000074a5,
+       0x000078a5, 0x00007ca5, 0x000080a5, 0x000084a5,
+       0x000088a5, 0x00008ca5, 0x000090a5, 0x000094a5,
+       0x000098a5, 0x00009ca5, 0x0000a0a5, 0x0000a4a5,
+       0x0000a8a5, 0x0000aca5, 0x0000b0a5, 0x0000b4a5,
+       0x0000b8a5, 0x0000bca5, 0x0000c0a5, 0x0000c4a5,
+       0x0000c8a5, 0x0000cca5, 0x0000d0a5, 0x0000d4a5,
+       0x0000d8a5, 0x0000dca5, 0x0000e0a5, 0x0000e4a5,
+       0x0000eca5, 0x0000f4a5, 0x0000fca5, 0x000104a5,
+       0x00010ca5, 0x000114a5, 0x00011ca5, 0x000124a5,
+       0x00012ca5, 0x000134a5, 0x00013ca5, 0x000144a5,
+       0x00014ca5, 0x000154a5, 0x00015ca5, 0x000164a5,
+       0x00016ca5, 0x000174a5, 0x00017ca5, 0x000184a5,
+       0x00018ca5, 0x000194a5, 0x00019ca5, 0x0001a4a5,
+       0x0001aca5, 0x0001b4a5, 0x0001bca5, 0x0001c4a5,
+       0x0001cca5, 0x0001d4a5, 0x0001dca5, 0x0001e4a5,
+       0x0001eca5, 0x0001f4a5, 0x0001fca5, 0x000204a5,
+       0x00020ca5, 0x000214a5, 0x00021ca5, 0x000224a5,
+       0x000234a5, 0x000244a5, 0x000254a5, 0x000264a5,
+       0x000274a5, 0x000284a5, 0x000294a5, 0x0002a4a5,
+       0x0002b4a5, 0x0002c4a5, 0x0002d4a5, 0x0002e4a5,
+       0x0002f4a5, 0x000304a5, 0x000314a5, 0x000324a5,
+       0x000334a5, 0x000344a5, 0x000354a5, 0x000364a5,
+       0x000374a5, 0x000384a5, 0x000394a5, 0x0003a4a5,
+       0x0003b4a5, 0x0003c4a5, 0x0003d4a5, 0x0003e4a5,
+       0x0003f4a5, 0x000404a5, 0x000414a5, 0x000424a5,
+       0x000434a5, 0x000444a5, 0x000454a5, 0x000464a5,
+       0x000474a5, 0x000484a5, 0x000494a5, 0x0004a4a5,
+       0x0004b4a5, 0x0004c4a5, 0x0004e4a5, 0x000504a5,
+       0x000524a5, 0x000544a5, 0x000564a5, 0x000584a5,
+       0x0005a4a5, 0x0005c4a5, 0x0005e4a5, 0x000604a5,
+       0x000624a5, 0x000644a5, 0x000664a5, 0x000684a5,
+       0x0006a4a5, 0x0006c4a5, 0x0006e4a5, 0x000704a5,
+       0x000724a5, 0x000744a5, 0x000764a5, 0x000784a5,
+       0x0007a4a5, 0x0007c4a5, 0x0007e4a5, 0x000804a5,
+       0x000824a5, 0x000844a5, 0x000864a5, 0x000884a5,
+       0x0008a4a5, 0x0008c4a5, 0x0008e4a5, 0x000904a5,
+       0x000924a5, 0x000944a5, 0x000964a5, 0x000984a5,
+       0x0009a4a5, 0x0009c4a5, 0x0009e4a5, 0x000a04a5,
+       0x000a24a5, 0x000a44a5, 0x000a64a5, 0x000aa4a5,
+       0x000ae4a5, 0x000b24a5, 0x000b64a5, 0x000ba4a5,
+       0x000be4a5, 0x000c24a5, 0x000c64a5, 0x000ca4a5,
+       0x000ce4a5, 0x000d24a5, 0x000d64a5, 0x000da4a5,
+       0x000de4a5, 0x000e24a5, 0x000e64a5, 0x000ea4a5,
+       0x000ee4a5, 0x000f24a5, 0x000f64a5, 0x000fa4a5,
+       0x000fe4a5, 0x001024a5, 0x001064a5, 0x0010a4a5,
+       0x0010e4a5, 0x001124a5, 0x001164a5, 0x0011a4a5,
+       0x0011e4a5, 0x001224a5, 0x001264a5, 0x0012a4a5,
+       0x0012e4a5, 0x001324a5, 0x001364a5, 0x0013a4a5,
+       0x0013e4a5, 0x001424a5, 0x001464a5, 0x0014a4a5,
+       0x0014e4a5, 0x001524a5, 0x001564a5, 0x0015a4a5,
+       0x0015e4a5, 0x001624a5, 0x001664a5, 0x0016a4a5,
+       0x0016e4a5, 0x001724a5, 0x001764a5, 0x0017a4a5,
+       0x0017e4a5, 0x001824a5, 0x001864a5, 0x0018a4a5,
+       0x0018e4a5, 0x001924a5, 0x001964a5, 0x0019e4a5,
+       0x001a64a5, 0x001ae4a5, 0x001b64a5, 0x001be4a5,
+       0x001c64a5, 0x001ce4a5, 0x001d64a5, 0x001de4a5,
+       0x001e64a5, 0x001ee4a5, 0x001f64a5, 0x001fe4a5,
+       0x002064a5, 0x0020e4a5, 0x002164a5, 0x0021e4a5,
+       0x002264a5, 0x0022e4a5, 0x002364a5, 0x0023e4a5,
+       0x002464a5, 0x0024e4a5, 0x002564a5, 0x0025e4a5,
+       0x002664a5, 0x0026e4a5, 0x002764a5, 0x0027e4a5,
+       0x002864a5, 0x0028e4a5, 0x002964a5, 0x0029e4a5,
+       0x002a64a5, 0x002ae4a5, 0x002b64a5, 0x002be4a5,
+       0x002c64a5, 0x002ce4a5, 0x002d64a5, 0x002de4a5,
+       0x002e64a5, 0x002ee4a5, 0x002f64a5, 0x002fe4a5,
+       0x003064a5, 0x0030e4a5, 0x003164a5, 0x0031e4a5,
+       0x003264a5, 0x0032e4a5, 0x003364a5, 0x0033e4a5,
+       0x003464a5, 0x0034e4a5, 0x003564a5, 0x0035e4a5,
+       0x003664a5, 0x0036e4a5, 0x003764a5, 0x0037e4a5,
+       0x003864a5, 0x0038e4a5, 0x003964a5, 0x0039e4a5,
+       0x003a64a5, 0x003ae4a5, 0x003b64a5, 0x003be4a5,
+       0x003c64a5, 0x003ce4a5, 0x003d64a5, 0x003de4a5,
+       0x003ee4a5, 0x003fe4a5, 0x0040e4a5, 0x0041e4a5,
+       0x0042e4a5, 0x0043e4a5, 0x0044e4a5, 0x0045e4a5,
+       0x0046e4a5, 0x0047e4a5, 0x0048e4a5, 0x0049e4a5,
+       0x004ae4a5, 0x004be4a5, 0x004ce4a5, 0x004de4a5,
+       0x004ee4a5, 0x004fe4a5, 0x0050e4a5, 0x0051e4a5,
+       0x0052e4a5, 0x0053e4a5, 0x0054e4a5, 0x0055e4a5,
+       0x0056e4a5, 0x0057e4a5, 0x0058e4a5, 0x0059e4a5,
+       0x005ae4a5, 0x005be4a5, 0x005ce4a5, 0x005de4a5,
+       0x005ee4a5, 0x005fe4a5, 0x0060e4a5, 0x0061e4a5,
+       0x0062e4a5, 0x0063e4a5, 0x0064e4a5, 0x0065e4a5,
+       0x0066e4a5, 0x0067e4a5, 0x0068e4a5, 0x0069e4a5,
+       0x006ae4a5, 0x006be4a5, 0x006ce4a5, 0x006de4a5,
+       0x006ee4a5, 0x006fe4a5, 0x0070e4a5, 0x0071e4a5,
+       0x0072e4a5, 0x0073e4a5, 0x0074e4a5, 0x0075e4a5,
+       0x0076e4a5, 0x0077e4a5, 0x0078e4a5, 0x0079e4a5,
+       0x007ae4a5, 0x007be4a5, 0x007ce4a5, 0x007de4a5,
+       0x007ee4a5, 0x007fe4a5, 0x0080e4a5, 0x0081e4a5,
+       0x0082e4a5, 0x0083e4a5, 0x0084e4a5, 0x0085e4a5,
+       0x0086e4a5, 0x0087e4a5, 0x0088e4a5, 0x0089e4a5,
+       0x008ae4a5, 0x008be4a5, 0x008ce4a5, 0x008de4a5,
+       0x008fe4a5, 0x0091e4a5, 0x0093e4a5, 0x0095e4a5,
+       0x0097e4a5, 0x0099e4a5, 0x009be4a5, 0x009de4a5,
+       0x009fe4a5, 0x00a1e4a5, 0x00a3e4a5, 0x00a5e4a5,
+       0x00a7e4a5, 0x00a9e4a5, 0x00abe4a5, 0x00ade4a5,
+       0x00afe4a5, 0x00b1e4a5, 0x00b3e4a5, 0x00b5e4a5,
+       0x00b7e4a5, 0x00b9e4a5, 0x00bbe4a5, 0x00bde4a5,
+       0x00bfe4a5, 0x00c1e4a5, 0x00c3e4a5, 0x00c5e4a5,
+       0x00c7e4a5, 0x00c9e4a5, 0x00cbe4a5, 0x00cde4a5,
+       0x00cfe4a5, 0x00d1e4a5, 0x00d3e4a5, 0x00d5e4a5,
+       0x00d7e4a5, 0x00d9e4a5, 0x00dbe4a5, 0x00dde4a5,
+       0x00dfe4a5, 0x00e1e4a5, 0x00e3e4a5, 0x00e5e4a5,
+       0x00e7e4a5, 0x00e9e4a5, 0x00ebe4a5, 0x00ede4a5,
+       0x00efe4a5, 0x00f1e4a5, 0x00f3e4a5, 0x00f5e4a5,
+       0x00f7e4a5, 0x00f9e4a5, 0x00fbe4a5, 0x00fde4a5,
+       0x00ffe4a5, 0x0101e4a5, 0x0103e4a5, 0x0105e4a5,
+       0x0107e4a5, 0x0109e4a5, 0x010be4a5, 0x010de4a5,
+       0x010fe4a5, 0x0111e4a5, 0x0113e4a5, 0x0115e4a5,
+       0x0117e4a5, 0x0119e4a5, 0x011be4a5, 0x011de4a5,
+       0x011fe4a5, 0x0121e4a5, 0x0123e4a5, 0x0125e4a5,
+       0x0127e4a5, 0x0129e4a5, 0x012be4a5, 0x012de4a5,
+       0x012fe4a5, 0x0131e4a5, 0x0133e4a5, 0x0135e4a5,
+       0x0137e4a5, 0x013be4a5, 0x013fe4a5, 0x0143e4a5,
+       0x0147e4a5, 0x014be4a5, 0x014fe4a5, 0x0153e4a5,
+       0x0157e4a5, 0x015be4a5, 0x015fe4a5, 0x0163e4a5,
+       0x0167e4a5, 0x016be4a5, 0x016fe4a5, 0x0173e4a5,
+       0x0177e4a5, 0x017be4a5, 0x017fe4a5, 0x0183e4a5,
+       0x0187e4a5, 0x018be4a5, 0x018fe4a5, 0x0193e4a5,
+       0x0197e4a5, 0x019be4a5, 0x019fe4a5, 0x01a3e4a5,
+       0x01a7e4a5, 0x01abe4a5, 0x01afe4a5, 0x01b3e4a5,
+       0x01b7e4a5, 0x01bbe4a5, 0x01bfe4a5, 0x01c3e4a5,
+       0x01c7e4a5, 0x01cbe4a5, 0x01cfe4a5, 0x01d3e4a5,
+       0x01d7e4a5, 0x01dbe4a5, 0x01dfe4a5, 0x01e3e4a5,
+       0x01e7e4a5, 0x01ebe4a5, 0x01efe4a5, 0x01f3e4a5,
+       0x01f7e4a5, 0x01fbe4a5, 0x01ffe4a5, 0x0203e4a5,
+       0x0207e4a5, 0x020be4a5, 0x020fe4a5, 0x0213e4a5,
+       0x0217e4a5, 0x021be4a5, 0x021fe4a5, 0x0223e4a5,
+       0x0227e4a5, 0x022be4a5, 0x022fe4a5, 0x0233e4a5,
+       0x0237e4a5, 0x023be4a5, 0x023fe4a5, 0x0243e4a5,
+       0x0247e4a5, 0x024be4a5, 0x024fe4a5, 0x0253e4a5,
+       0x0257e4a5, 0x025be4a5, 0x025fe4a5, 0x0263e4a5,
+       0x0267e4a5, 0x026be4a5, 0x026fe4a5, 0x0273e4a5,
+       0x0277e4a5, 0x027be4a5, 0x027fe4a5, 0x0283e4a5,
+       0x0287e4a5, 0x028be4a5, 0x028fe4a5, 0x0293e4a5,
+       0x0297e4a5, 0x029be4a5, 0x029fe4a5, 0x02a3e4a5,
+       0x02a7e4a5, 0x02abe4a5, 0x02afe4a5, 0x02b3e4a5,
+       0x02bbe4a5, 0x02c3e4a5, 0x02cbe4a5, 0x02d3e4a5,
+       0x02dbe4a5, 0x02e3e4a5, 0x02ebe4a5, 0x02f3e4a5,
+       0x02fbe4a5, 0x0303e4a5, 0x030be4a5, 0x0313e4a5,
+       0x031be4a5, 0x0323e4a5, 0x032be4a5, 0x0333e4a5,
+       0x033be4a5, 0x0343e4a5, 0x034be4a5, 0x0353e4a5,
+       0x035be4a5, 0x0363e4a5, 0x036be4a5, 0x0373e4a5,
+       0x037be4a5, 0x0383e4a5, 0x038be4a5, 0x0393e4a5,
+       0x039be4a5, 0x03a3e4a5, 0x03abe4a5, 0x03b3e4a5,
+       0x03bbe4a5, 0x03c3e4a5, 0x03cbe4a5, 0x03d3e4a5,
+       0x03dbe4a5, 0x03e3e4a5, 0x03ebe4a5, 0x03f3e4a5,
+       0x03fbe4a5, 0x0403e4a5, 0x040be4a5, 0x0413e4a5,
+       0x041be4a5, 0x0423e4a5, 0x042be4a5, 0x0433e4a5,
+       0x043be4a5, 0x0443e4a5, 0x044be4a5, 0x0453e4a5,
+       0x045be4a5, 0x0463e4a5, 0x046be4a5, 0x0473e4a5,
+       0x047be4a5, 0x0483e4a5, 0x048be4a5, 0x0493e4a5,
+       0x049be4a5, 0x04a3e4a5, 0x04abe4a5, 0x04b3e4a5,
+       0x04bbe4a5, 0x04c3e4a5, 0x04cbe4a5, 0x04d3e4a5,
+       0x04dbe4a5, 0x04e3e4a5, 0x04ebe4a5, 0x04f3e4a5,
+       0x04fbe4a5, 0x0503e4a5, 0x050be4a5, 0x0513e4a5,
+       0x051be4a5, 0x0523e4a5, 0x052be4a5, 0x0533e4a5,
+       0x053be4a5, 0x0543e4a5, 0x054be4a5, 0x0553e4a5,
+       0x055be4a5, 0x0563e4a5, 0x056be4a5, 0x0573e4a5,
+       0x057be4a5, 0x0583e4a5, 0x058be4a5, 0x0593e4a5,
+       0x059be4a5, 0x05a3e4a5, 0x05abe4a5, 0x05b3e4a5,
+       0x05bbe4a5, 0x05c3e4a5, 0x05cbe4a5, 0x05d3e4a5,
+       0x05dbe4a5, 0x05e3e4a5, 0x05ebe4a5, 0x05f3e4a5,
+       0x05fbe4a5, 0x060be4a5, 0x061be4a5, 0x062be4a5,
+       0x063be4a5, 0x064be4a5, 0x065be4a5, 0x465be4a5,
+       /* The last entry is extra; it is equal to LZMS_MAX_MATCH_OFFSET + 1 and
+        * is here to aid binary search.  */
+};
+
+/* Table: offset slot => number of extra offset bits  */
+const u8 lzms_extra_offset_bits[LZMS_MAX_NUM_OFFSET_SYMS] = {
+       0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2,
+       2 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4,
+       4 , 4 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5,
+       5 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6,
+       7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7,
+       7 , 7 , 7 , 7 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8,
+       8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9,
+       9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9,
+       9 , 9 , 9 , 9 , 9 , 9 , 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+       10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+       11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+       11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,
+       12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+       12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+       12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
+       13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+       13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+       13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+       14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16,
+       16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+       16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+       16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+       16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+       16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17,
+       17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+       17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+       17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+       17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+       17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+       18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+       18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+       18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+       18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+       18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+       18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19,
+       19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+       19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+       19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+       19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+       19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+       19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+       19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 30,
+};
+
+/* Table: length slot => length slot base value  */
+const u32 lzms_length_slot_base[LZMS_NUM_LENGTH_SYMS + 1] = {
+       0x00000001, 0x00000002, 0x00000003, 0x00000004,
+       0x00000005, 0x00000006, 0x00000007, 0x00000008,
+       0x00000009, 0x0000000a, 0x0000000b, 0x0000000c,
+       0x0000000d, 0x0000000e, 0x0000000f, 0x00000010,
+       0x00000011, 0x00000012, 0x00000013, 0x00000014,
+       0x00000015, 0x00000016, 0x00000017, 0x00000018,
+       0x00000019, 0x0000001a, 0x0000001b, 0x0000001d,
+       0x0000001f, 0x00000021, 0x00000023, 0x00000027,
+       0x0000002b, 0x0000002f, 0x00000033, 0x00000037,
+       0x0000003b, 0x00000043, 0x0000004b, 0x00000053,
+       0x0000005b, 0x0000006b, 0x0000007b, 0x0000008b,
+       0x0000009b, 0x000000ab, 0x000000cb, 0x000000eb,
+       0x0000012b, 0x000001ab, 0x000002ab, 0x000004ab,
+       0x000008ab, 0x000108ab, 0x400108ab,
+       /* The last entry is extra; it is equal to LZMS_MAX_MATCH_LEN + 1 and is
+        * here to aid binary search.  */
+};
+
+/* Table: length slot => number of extra length bits  */
+const u8 lzms_extra_length_bits[LZMS_NUM_LENGTH_SYMS] = {
+       0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+       0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+       0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+       0 , 0 , 1 , 1 , 1 , 1 , 2 , 2 ,
+       2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 ,
+       4 , 4 , 4 , 4 , 4 , 5 , 5 , 6 ,
+       7 , 8 , 9 , 10, 16, 30,
+};
 
-/* A table that maps length slots to their base values.  These are constants
- * computed at runtime by lzms_compute_slot_bases().  */
-u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1];
-
-/* Return the slot for the specified value.  */
 unsigned
 lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots)
 {
-       unsigned slot = 0;
-
-       while (slot_base_tab[slot + 1] <= value)
-               slot++;
-
-       return slot;
-}
-
-
-static void
-lzms_decode_delta_rle_slot_bases(u32 slot_bases[],
-                                const u8 delta_run_lens[], size_t num_run_lens)
-{
-       u32 delta = 1;
-       u32 base = 0;
-       size_t slot = 0;
-       for (size_t i = 0; i < num_run_lens; i++) {
-               u8 run_len = delta_run_lens[i];
-               while (run_len--) {
-                       base += delta;
-                       slot_bases[slot++] = base;
+       unsigned l = 0;
+       unsigned r = num_slots - 1;
+       for (;;) {
+               unsigned slot = (l + r) / 2;
+               if (value >= slot_base_tab[slot]) {
+                       if (value < slot_base_tab[slot + 1])
+                               return slot;
+                       else
+                               l = slot + 1;
+               } else {
+                       r = slot - 1;
                }
-               delta <<= 1;
        }
 }
 
-/* Initialize the global position and length slot tables.  */
-static void
-lzms_compute_slot_bases(void)
+/* Return the number of offset slots used when processing a buffer having the
+ * specified uncompressed size.  */
+unsigned
+lzms_get_num_offset_slots(size_t uncompressed_size)
 {
-       /* If an explicit formula that maps LZMS position and length slots to
-        * slot bases exists, then it could be used here.  But until one is
-        * found, the following code fills in the slots using the observation
-        * that the increase from one slot base to the next is an increasing
-        * power of 2.  Therefore, run-length encoding of the delta of adjacent
-        * entries can be used.  */
-       static const u8 position_slot_delta_run_lens[] = {
-               9,   0,   9,   7,   10,  15,  15,  20,
-               20,  30,  33,  40,  42,  45,  60,  73,
-               80,  85,  95,  105, 6,
-       };
-
-       static const u8 length_slot_delta_run_lens[] = {
-               27,  4,   6,   4,   5,   2,   1,   1,
-               1,   1,   1,   0,   0,   0,   0,   0,
-               1,
-       };
-
-       lzms_decode_delta_rle_slot_bases(lzms_position_slot_base,
-                                        position_slot_delta_run_lens,
-                                        ARRAY_LEN(position_slot_delta_run_lens));
-
-       lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS] = 0x7fffffff;
-
-       lzms_decode_delta_rle_slot_bases(lzms_length_slot_base,
-                                        length_slot_delta_run_lens,
-                                        ARRAY_LEN(length_slot_delta_run_lens));
-
-       lzms_length_slot_base[LZMS_NUM_LEN_SYMS] = 0x400108ab;
+       if (uncompressed_size < 2)
+               return 0;
+       return 1 + lzms_get_offset_slot(uncompressed_size - 1);
 }
 
-/* Initialize the global position length slot tables if not done so already.  */
 void
-lzms_init_slot_bases(void)
+lzms_init_probability_entries(struct lzms_probability_entry *entries, size_t count)
 {
-       static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-       static bool already_computed = false;
-
-       if (unlikely(!already_computed)) {
-               pthread_mutex_lock(&mutex);
-               if (!already_computed) {
-                       lzms_compute_slot_bases();
-                       already_computed = true;
-               }
-               pthread_mutex_unlock(&mutex);
+       for (size_t i = 0; i < count; i++) {
+               entries[i].num_recent_zero_bits = LZMS_INITIAL_PROBABILITY;
+               entries[i].recent_bits = LZMS_INITIAL_RECENT_BITS;
        }
 }
 
-static s32
-lzms_maybe_do_x86_translation(u8 data[restrict], s32 i, s32 num_op_bytes,
-                             s32 * restrict closest_target_usage_p,
-                             s32 last_target_usages[restrict],
-                             s32 max_trans_offset, bool undo)
-{
-       u16 pos;
-
-       if (undo) {
-               if (i - *closest_target_usage_p <= max_trans_offset) {
-                       LZMS_DEBUG("Undid x86 translation at position %d "
-                                  "(opcode 0x%02x)", i, data[i]);
-                       le32 *p32 = (le32*)&data[i + num_op_bytes];
-                       u32 n = le32_to_cpu(*p32);
-                       *p32 = cpu_to_le32(n - i);
-               }
-               pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]);
-       } else {
-               pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]);
-
-               if (i - *closest_target_usage_p <= max_trans_offset) {
-                       LZMS_DEBUG("Did x86 translation at position %d "
-                                  "(opcode 0x%02x)", i, data[i]);
-                       le32 *p32 = (le32*)&data[i + num_op_bytes];
-                       u32 n = le32_to_cpu(*p32);
-                       *p32 = cpu_to_le32(n + i);
-               }
-       }
-
-       i += num_op_bytes + sizeof(le32) - 1;
-
-       if (i - last_target_usages[pos] <= LZMS_X86_MAX_GOOD_TARGET_OFFSET)
-               *closest_target_usage_p = i;
-
-       last_target_usages[pos] = i;
-
-       return i + 1;
-}
-
-static s32
-lzms_may_x86_translate(const u8 p[restrict],
-                      s32 *restrict max_offset_ret)
+void
+lzms_init_symbol_frequencies(u32 freqs[], size_t num_syms)
 {
-       /* Switch on first byte of the opcode, assuming it is really an x86
-        * instruction.  */
-       *max_offset_ret = LZMS_X86_MAX_TRANSLATION_OFFSET;
-       switch (p[0]) {
-       case 0x48:
-               if (p[1] == 0x8b) {
-                       if (p[2] == 0x5 || p[2] == 0xd) {
-                               /* Load relative (x86_64)  */
-                               return 3;
-                       }
-               } else if (p[1] == 0x8d) {
-                       if ((p[2] & 0x7) == 0x5) {
-                               /* Load effective address relative (x86_64)  */
-                               return 3;
-                       }
-               }
-               break;
-
-       case 0x4c:
-               if (p[1] == 0x8d) {
-                       if ((p[2] & 0x7) == 0x5) {
-                               /* Load effective address relative (x86_64)  */
-                               return 3;
-                       }
-               }
-               break;
-
-       case 0xe8:
-               /* Call relative  */
-               *max_offset_ret = LZMS_X86_MAX_TRANSLATION_OFFSET / 2;
-               return 1;
-
-       case 0xe9:
-               /* Jump relative  */
-               *max_offset_ret = 0;
-               return 5;
-
-       case 0xf0:
-               if (p[1] == 0x83 && p[2] == 0x05) {
-                       /* Lock add relative  */
-                       return 3;
-               }
-               break;
-
-       case 0xff:
-               if (p[1] == 0x15) {
-                       /* Call indirect  */
-                       return 2;
-               }
-               break;
-       }
-       *max_offset_ret = 0;
-       return 1;
+       for (size_t i = 0; i < num_syms; i++)
+               freqs[i] = 1;
 }
 
 /*
  * Translate relative addresses embedded in x86 instructions into absolute
  * addresses (@undo == %false), or undo this translation (@undo == %true).
  *
- * @last_target_usages is a temporary array of length >= 65536.
+ * Absolute addresses are usually more compressible by LZ factorization.
+ *
+ * @last_target_usages must be a temporary array of length >= 65536.
  */
 void
-lzms_x86_filter(u8 data[restrict],
-               s32 size,
-               s32 last_target_usages[restrict],
-               bool undo)
+lzms_x86_filter(u8 data[restrict], s32 size,
+               s32 last_target_usages[restrict], bool undo)
 {
-       s32 closest_target_usage = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1;
+       /*
+        * Note: this filter runs unconditionally and uses a custom algorithm to
+        * detect data regions that probably contain x86 code.
+        *
+        * 'last_x86_pos' tracks the most recent position that has a good chance
+        * of being the start of an x86 instruction.  When the filter detects a
+        * likely x86 instruction, it updates this variable and considers the
+        * next LZMS_X86_MAX_TRANSLATION_OFFSET bytes of data as valid for x86
+        * translations.
+        *
+        * If part of the data does not, in fact, contain x86 machine code, then
+        * 'last_x86_pos' will, very likely, eventually fall more than
+        * LZMS_X86_MAX_TRANSLATION_OFFSET bytes behind the current position.
+        * This results in x86 translations being disabled until the next likely
+        * x86 instruction is detected.
+        *
+        * To identify "likely x86 instructions", the algorithm attempts to
+        * track the position of the most recent potential relative-addressing
+        * instruction that referenced each possible memory address.  If it
+        * finds two references to the same memory address within an
+        * LZMS_X86_ID_WINDOW_SIZE-byte sized window, then the second reference
+        * is flagged as a likely x86 instruction.  Since the instructions
+        * considered for translation necessarily use relative addressing, the
+        * algorithm does a tentative translation into absolute addresses.  In
+        * addition, so that memory addresses can be looked up in an array of
+        * reasonable size (in this code, 'last_target_usages'), only the
+        * low-order 2 bytes of each address are considered significant.
+        */
+
+       s32 i;
+       s32 tail_idx;
+       u8 saved_byte;
+       s32 last_x86_pos;
+
+       if (size <= 17)
+               return;
+
+       for (i = 0; i < 65536; i++)
+               last_target_usages[i] = -(s32)LZMS_X86_ID_WINDOW_SIZE - 1;
+
+       /*
+        * Optimization: only check for end-of-buffer when we already have a
+        * byte that is a potential opcode for x86 translation.  To do this,
+        * overwrite one of the bytes near the end of the buffer, and restore it
+        * later.  The correctness of this optimization relies on two
+        * characteristics of compressed format:
+        *
+        *  1. No translation can follow an opcode beginning in the last 16
+        *     bytes.
+        *  2. A translation following an opcode starting at the last possible
+        *     position (17 bytes from the end) never extends more than 7 bytes.
+        *     Consequently, we can overwrite any of the bytes starting at
+        *     data[(size - 16) + 7] and have no effect on the result, as long
+        *     as we restore those bytes later.
+        */
+       tail_idx = size - 16;
+       saved_byte = data[tail_idx + 8];
+       data[tail_idx + 8] = 0xE8;
+       last_x86_pos = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1;
+
+       /* Note: the very first byte must be ignored completely!  */
+       i = 0;
+       for (;;) {
+               s32 max_trans_offset;
+               s32 opcode_nbytes;
+               u16 target16;
+
+               /*
+                * The following table is used to accelerate the common case
+                * where the byte has nothing to do with x86 translation and
+                * must simply be skipped.  This is the fastest (at least on
+                * x86_64) of the implementations I tested.  The other
+                * implementations I tested were:
+                *      - Jump table with 256 entries
+                *      - Switch statement with default
+                */
+               static const u8 is_potential_opcode[256] = {
+                       [0x48] = 1, [0x4C] = 1, [0xE8] = 1,
+                       [0xE9] = 1, [0xF0] = 1, [0xFF] = 1,
+               };
+
+               for (;;) {
+                       if (is_potential_opcode[data[++i]])
+                               break;
+                       if (is_potential_opcode[data[++i]])
+                               break;
+                       if (is_potential_opcode[data[++i]])
+                               break;
+                       if (is_potential_opcode[data[++i]])
+                               break;
+               }
 
-       for (s32 i = 0; i < 65536; i++)
-               last_target_usages[i] = -LZMS_X86_MAX_GOOD_TARGET_OFFSET - 1;
+               if (i >= tail_idx)
+                       break;
+
+               max_trans_offset = LZMS_X86_MAX_TRANSLATION_OFFSET;
+               switch (data[i]) {
+               case 0x48:
+                       if (data[i + 1] == 0x8B) {
+                               if (data[i + 2] == 0x5 || data[i + 2] == 0xD) {
+                                       /* Load relative (x86_64)  */
+                                       opcode_nbytes = 3;
+                                       goto have_opcode;
+                               }
+                       } else if (data[i + 1] == 0x8D) {
+                               if ((data[i + 2] & 0x7) == 0x5) {
+                                       /* Load effective address relative (x86_64)  */
+                                       opcode_nbytes = 3;
+                                       goto have_opcode;
+                               }
+                       }
+                       break;
+               case 0x4C:
+                       if (data[i + 1] == 0x8D) {
+                               if ((data[i + 2] & 0x7) == 0x5) {
+                                       /* Load effective address relative (x86_64)  */
+                                       opcode_nbytes = 3;
+                                       goto have_opcode;
+                               }
+                       }
+                       break;
+               case 0xE8:
+                       /* Call relative.  Note: 'max_trans_offset' must be
+                        * halved for this instruction.  This means that we must
+                        * be more confident that we are in a region of x86
+                        * machine code before we will do a translation for this
+                        * particular instruction.  */
+                       opcode_nbytes = 1;
+                       max_trans_offset /= 2;
+                       goto have_opcode;
+               case 0xE9:
+                       /* Jump relative  */
+                       i += 4;
+                       break;
+               case 0xF0:
+                       if (data[i + 1] == 0x83 && data[i + 2] == 0x05) {
+                               /* Lock add relative  */
+                               opcode_nbytes = 3;
+                               goto have_opcode;
+                       }
+                       break;
+               case 0xFF:
+                       if (data[i + 1] == 0x15) {
+                               /* Call indirect  */
+                               opcode_nbytes = 2;
+                               goto have_opcode;
+                       }
+                       break;
+               }
 
-       for (s32 i = 0; i < size - 11; ) {
-               s32 max_trans_offset;
-               s32 n;
+               continue;
 
-               n = lzms_may_x86_translate(data + i, &max_trans_offset);
-               if (max_trans_offset) {
-                       i = lzms_maybe_do_x86_translation(data, i, n,
-                                                         &closest_target_usage,
-                                                         last_target_usages,
-                                                         max_trans_offset,
-                                                         undo);
+       have_opcode:
+               if (undo) {
+                       if (i - last_x86_pos <= max_trans_offset) {
+                               LZMS_DEBUG("Undid x86 translation at position %d "
+                                          "(opcode 0x%02x)", i, data[i]);
+                               void *p32 = &data[i + opcode_nbytes];
+                               u32 n = get_unaligned_u32_le(p32);
+                               put_unaligned_u32_le(n - i, p32);
+                       }
+                       target16 = i + get_unaligned_u16_le(&data[i + opcode_nbytes]);
                } else {
-                       i += n;
+                       target16 = i + get_unaligned_u16_le(&data[i + opcode_nbytes]);
+                       if (i - last_x86_pos <= max_trans_offset) {
+                               LZMS_DEBUG("Did x86 translation at position %d "
+                                          "(opcode 0x%02x)", i, data[i]);
+                               void *p32 = &data[i + opcode_nbytes];
+                               u32 n = get_unaligned_u32_le(p32);
+                               put_unaligned_u32_le(n + i, p32);
+                       }
                }
+
+               i += opcode_nbytes + sizeof(le32) - 1;
+
+               if (i - last_target_usages[target16] <= LZMS_X86_ID_WINDOW_SIZE)
+                       last_x86_pos = i;
+
+               last_target_usages[target16] = i;
+
+               continue;
        }
+
+       data[tail_idx + 8] = saved_byte;
 }