X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzms-common.c;h=eaa237fa1eec7f5c2d08e1c71c7b209691c891df;hp=92f28c2d13bc6aa6a7716304646f76a49e6201d5;hb=855b49ef85d274588a2848d9c69974f9b88d343a;hpb=abe65b1f2f12bcedec1103cc7924897720f919be diff --git a/src/lzms-common.c b/src/lzms-common.c index 92f28c2d..eaa237fa 100644 --- a/src/lzms-common.c +++ b/src/lzms-common.c @@ -1,27 +1,22 @@ /* - * lzms-common.c - * - * Code shared between the compressor and decompressor for the LZMS compression - * format. + * lzms-common.c - Common code for LZMS compression and decompression */ /* - * Copyright (C) 2013 Eric Biggers - * - * This file is part of wimlib, a library for working with WIM files. + * Copyright (C) 2013, 2014 Eric Biggers * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) - * any later version. + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * - * You should have received a copy of the GNU General Public License - * along with wimlib; if not, see http://www.gnu.org/licenses/. + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. */ #ifdef HAVE_CONFIG_H @@ -30,40 +25,306 @@ #include "wimlib/endianness.h" #include "wimlib/lzms.h" -#include "wimlib/util.h" - -#include - -/*************************************************************** - * Constant tables initialized by lzms_compute_slots(): * - ***************************************************************/ - -/* Table: position slot => position slot base value */ -u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1]; - -/* Table: position slot => number of extra position bits */ -u8 lzms_extra_position_bits[LZMS_MAX_NUM_OFFSET_SYMS]; - -/* Table: log2(position) => [lower bound, upper bound] on position slot */ -u16 lzms_order_to_position_slot_bounds[30][2]; +#include "wimlib/unaligned.h" + +/* Table: offset slot => offset slot base value */ +const u32 lzms_offset_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1] = { + 0x00000001, 0x00000002, 0x00000003, 0x00000004, + 0x00000005, 0x00000006, 0x00000007, 0x00000008, + 0x00000009, 0x0000000d, 0x00000011, 0x00000015, + 0x00000019, 0x0000001d, 0x00000021, 0x00000025, + 0x00000029, 0x0000002d, 0x00000035, 0x0000003d, + 0x00000045, 0x0000004d, 0x00000055, 0x0000005d, + 0x00000065, 0x00000075, 0x00000085, 0x00000095, + 0x000000a5, 0x000000b5, 0x000000c5, 0x000000d5, + 0x000000e5, 0x000000f5, 0x00000105, 0x00000125, + 0x00000145, 0x00000165, 0x00000185, 0x000001a5, + 0x000001c5, 0x000001e5, 0x00000205, 0x00000225, + 0x00000245, 0x00000265, 0x00000285, 0x000002a5, + 0x000002c5, 0x000002e5, 0x00000325, 0x00000365, + 0x000003a5, 0x000003e5, 0x00000425, 0x00000465, + 0x000004a5, 0x000004e5, 0x00000525, 0x00000565, + 0x000005a5, 0x000005e5, 0x00000625, 0x00000665, + 0x000006a5, 0x00000725, 0x000007a5, 0x00000825, + 0x000008a5, 0x00000925, 0x000009a5, 0x00000a25, + 0x00000aa5, 0x00000b25, 0x00000ba5, 0x00000c25, + 0x00000ca5, 0x00000d25, 0x00000da5, 0x00000e25, + 0x00000ea5, 0x00000f25, 0x00000fa5, 0x00001025, + 0x000010a5, 0x000011a5, 0x000012a5, 0x000013a5, + 0x000014a5, 0x000015a5, 0x000016a5, 0x000017a5, + 0x000018a5, 0x000019a5, 0x00001aa5, 0x00001ba5, + 0x00001ca5, 0x00001da5, 0x00001ea5, 0x00001fa5, + 0x000020a5, 0x000021a5, 0x000022a5, 0x000023a5, + 0x000024a5, 0x000026a5, 0x000028a5, 0x00002aa5, + 0x00002ca5, 0x00002ea5, 0x000030a5, 0x000032a5, + 0x000034a5, 0x000036a5, 0x000038a5, 0x00003aa5, + 0x00003ca5, 0x00003ea5, 0x000040a5, 0x000042a5, + 0x000044a5, 0x000046a5, 0x000048a5, 0x00004aa5, + 0x00004ca5, 0x00004ea5, 0x000050a5, 0x000052a5, + 0x000054a5, 0x000056a5, 0x000058a5, 0x00005aa5, + 0x00005ca5, 0x00005ea5, 0x000060a5, 0x000064a5, + 0x000068a5, 0x00006ca5, 0x000070a5, 0x000074a5, + 0x000078a5, 0x00007ca5, 0x000080a5, 0x000084a5, + 0x000088a5, 0x00008ca5, 0x000090a5, 0x000094a5, + 0x000098a5, 0x00009ca5, 0x0000a0a5, 0x0000a4a5, + 0x0000a8a5, 0x0000aca5, 0x0000b0a5, 0x0000b4a5, + 0x0000b8a5, 0x0000bca5, 0x0000c0a5, 0x0000c4a5, + 0x0000c8a5, 0x0000cca5, 0x0000d0a5, 0x0000d4a5, + 0x0000d8a5, 0x0000dca5, 0x0000e0a5, 0x0000e4a5, + 0x0000eca5, 0x0000f4a5, 0x0000fca5, 0x000104a5, + 0x00010ca5, 0x000114a5, 0x00011ca5, 0x000124a5, + 0x00012ca5, 0x000134a5, 0x00013ca5, 0x000144a5, + 0x00014ca5, 0x000154a5, 0x00015ca5, 0x000164a5, + 0x00016ca5, 0x000174a5, 0x00017ca5, 0x000184a5, + 0x00018ca5, 0x000194a5, 0x00019ca5, 0x0001a4a5, + 0x0001aca5, 0x0001b4a5, 0x0001bca5, 0x0001c4a5, + 0x0001cca5, 0x0001d4a5, 0x0001dca5, 0x0001e4a5, + 0x0001eca5, 0x0001f4a5, 0x0001fca5, 0x000204a5, + 0x00020ca5, 0x000214a5, 0x00021ca5, 0x000224a5, + 0x000234a5, 0x000244a5, 0x000254a5, 0x000264a5, + 0x000274a5, 0x000284a5, 0x000294a5, 0x0002a4a5, + 0x0002b4a5, 0x0002c4a5, 0x0002d4a5, 0x0002e4a5, + 0x0002f4a5, 0x000304a5, 0x000314a5, 0x000324a5, + 0x000334a5, 0x000344a5, 0x000354a5, 0x000364a5, + 0x000374a5, 0x000384a5, 0x000394a5, 0x0003a4a5, + 0x0003b4a5, 0x0003c4a5, 0x0003d4a5, 0x0003e4a5, + 0x0003f4a5, 0x000404a5, 0x000414a5, 0x000424a5, + 0x000434a5, 0x000444a5, 0x000454a5, 0x000464a5, + 0x000474a5, 0x000484a5, 0x000494a5, 0x0004a4a5, + 0x0004b4a5, 0x0004c4a5, 0x0004e4a5, 0x000504a5, + 0x000524a5, 0x000544a5, 0x000564a5, 0x000584a5, + 0x0005a4a5, 0x0005c4a5, 0x0005e4a5, 0x000604a5, + 0x000624a5, 0x000644a5, 0x000664a5, 0x000684a5, + 0x0006a4a5, 0x0006c4a5, 0x0006e4a5, 0x000704a5, + 0x000724a5, 0x000744a5, 0x000764a5, 0x000784a5, + 0x0007a4a5, 0x0007c4a5, 0x0007e4a5, 0x000804a5, + 0x000824a5, 0x000844a5, 0x000864a5, 0x000884a5, + 0x0008a4a5, 0x0008c4a5, 0x0008e4a5, 0x000904a5, + 0x000924a5, 0x000944a5, 0x000964a5, 0x000984a5, + 0x0009a4a5, 0x0009c4a5, 0x0009e4a5, 0x000a04a5, + 0x000a24a5, 0x000a44a5, 0x000a64a5, 0x000aa4a5, + 0x000ae4a5, 0x000b24a5, 0x000b64a5, 0x000ba4a5, + 0x000be4a5, 0x000c24a5, 0x000c64a5, 0x000ca4a5, + 0x000ce4a5, 0x000d24a5, 0x000d64a5, 0x000da4a5, + 0x000de4a5, 0x000e24a5, 0x000e64a5, 0x000ea4a5, + 0x000ee4a5, 0x000f24a5, 0x000f64a5, 0x000fa4a5, + 0x000fe4a5, 0x001024a5, 0x001064a5, 0x0010a4a5, + 0x0010e4a5, 0x001124a5, 0x001164a5, 0x0011a4a5, + 0x0011e4a5, 0x001224a5, 0x001264a5, 0x0012a4a5, + 0x0012e4a5, 0x001324a5, 0x001364a5, 0x0013a4a5, + 0x0013e4a5, 0x001424a5, 0x001464a5, 0x0014a4a5, + 0x0014e4a5, 0x001524a5, 0x001564a5, 0x0015a4a5, + 0x0015e4a5, 0x001624a5, 0x001664a5, 0x0016a4a5, + 0x0016e4a5, 0x001724a5, 0x001764a5, 0x0017a4a5, + 0x0017e4a5, 0x001824a5, 0x001864a5, 0x0018a4a5, + 0x0018e4a5, 0x001924a5, 0x001964a5, 0x0019e4a5, + 0x001a64a5, 0x001ae4a5, 0x001b64a5, 0x001be4a5, + 0x001c64a5, 0x001ce4a5, 0x001d64a5, 0x001de4a5, + 0x001e64a5, 0x001ee4a5, 0x001f64a5, 0x001fe4a5, + 0x002064a5, 0x0020e4a5, 0x002164a5, 0x0021e4a5, + 0x002264a5, 0x0022e4a5, 0x002364a5, 0x0023e4a5, + 0x002464a5, 0x0024e4a5, 0x002564a5, 0x0025e4a5, + 0x002664a5, 0x0026e4a5, 0x002764a5, 0x0027e4a5, + 0x002864a5, 0x0028e4a5, 0x002964a5, 0x0029e4a5, + 0x002a64a5, 0x002ae4a5, 0x002b64a5, 0x002be4a5, + 0x002c64a5, 0x002ce4a5, 0x002d64a5, 0x002de4a5, + 0x002e64a5, 0x002ee4a5, 0x002f64a5, 0x002fe4a5, + 0x003064a5, 0x0030e4a5, 0x003164a5, 0x0031e4a5, + 0x003264a5, 0x0032e4a5, 0x003364a5, 0x0033e4a5, + 0x003464a5, 0x0034e4a5, 0x003564a5, 0x0035e4a5, + 0x003664a5, 0x0036e4a5, 0x003764a5, 0x0037e4a5, + 0x003864a5, 0x0038e4a5, 0x003964a5, 0x0039e4a5, + 0x003a64a5, 0x003ae4a5, 0x003b64a5, 0x003be4a5, + 0x003c64a5, 0x003ce4a5, 0x003d64a5, 0x003de4a5, + 0x003ee4a5, 0x003fe4a5, 0x0040e4a5, 0x0041e4a5, + 0x0042e4a5, 0x0043e4a5, 0x0044e4a5, 0x0045e4a5, + 0x0046e4a5, 0x0047e4a5, 0x0048e4a5, 0x0049e4a5, + 0x004ae4a5, 0x004be4a5, 0x004ce4a5, 0x004de4a5, + 0x004ee4a5, 0x004fe4a5, 0x0050e4a5, 0x0051e4a5, + 0x0052e4a5, 0x0053e4a5, 0x0054e4a5, 0x0055e4a5, + 0x0056e4a5, 0x0057e4a5, 0x0058e4a5, 0x0059e4a5, + 0x005ae4a5, 0x005be4a5, 0x005ce4a5, 0x005de4a5, + 0x005ee4a5, 0x005fe4a5, 0x0060e4a5, 0x0061e4a5, + 0x0062e4a5, 0x0063e4a5, 0x0064e4a5, 0x0065e4a5, + 0x0066e4a5, 0x0067e4a5, 0x0068e4a5, 0x0069e4a5, + 0x006ae4a5, 0x006be4a5, 0x006ce4a5, 0x006de4a5, + 0x006ee4a5, 0x006fe4a5, 0x0070e4a5, 0x0071e4a5, + 0x0072e4a5, 0x0073e4a5, 0x0074e4a5, 0x0075e4a5, + 0x0076e4a5, 0x0077e4a5, 0x0078e4a5, 0x0079e4a5, + 0x007ae4a5, 0x007be4a5, 0x007ce4a5, 0x007de4a5, + 0x007ee4a5, 0x007fe4a5, 0x0080e4a5, 0x0081e4a5, + 0x0082e4a5, 0x0083e4a5, 0x0084e4a5, 0x0085e4a5, + 0x0086e4a5, 0x0087e4a5, 0x0088e4a5, 0x0089e4a5, + 0x008ae4a5, 0x008be4a5, 0x008ce4a5, 0x008de4a5, + 0x008fe4a5, 0x0091e4a5, 0x0093e4a5, 0x0095e4a5, + 0x0097e4a5, 0x0099e4a5, 0x009be4a5, 0x009de4a5, + 0x009fe4a5, 0x00a1e4a5, 0x00a3e4a5, 0x00a5e4a5, + 0x00a7e4a5, 0x00a9e4a5, 0x00abe4a5, 0x00ade4a5, + 0x00afe4a5, 0x00b1e4a5, 0x00b3e4a5, 0x00b5e4a5, + 0x00b7e4a5, 0x00b9e4a5, 0x00bbe4a5, 0x00bde4a5, + 0x00bfe4a5, 0x00c1e4a5, 0x00c3e4a5, 0x00c5e4a5, + 0x00c7e4a5, 0x00c9e4a5, 0x00cbe4a5, 0x00cde4a5, + 0x00cfe4a5, 0x00d1e4a5, 0x00d3e4a5, 0x00d5e4a5, + 0x00d7e4a5, 0x00d9e4a5, 0x00dbe4a5, 0x00dde4a5, + 0x00dfe4a5, 0x00e1e4a5, 0x00e3e4a5, 0x00e5e4a5, + 0x00e7e4a5, 0x00e9e4a5, 0x00ebe4a5, 0x00ede4a5, + 0x00efe4a5, 0x00f1e4a5, 0x00f3e4a5, 0x00f5e4a5, + 0x00f7e4a5, 0x00f9e4a5, 0x00fbe4a5, 0x00fde4a5, + 0x00ffe4a5, 0x0101e4a5, 0x0103e4a5, 0x0105e4a5, + 0x0107e4a5, 0x0109e4a5, 0x010be4a5, 0x010de4a5, + 0x010fe4a5, 0x0111e4a5, 0x0113e4a5, 0x0115e4a5, + 0x0117e4a5, 0x0119e4a5, 0x011be4a5, 0x011de4a5, + 0x011fe4a5, 0x0121e4a5, 0x0123e4a5, 0x0125e4a5, + 0x0127e4a5, 0x0129e4a5, 0x012be4a5, 0x012de4a5, + 0x012fe4a5, 0x0131e4a5, 0x0133e4a5, 0x0135e4a5, + 0x0137e4a5, 0x013be4a5, 0x013fe4a5, 0x0143e4a5, + 0x0147e4a5, 0x014be4a5, 0x014fe4a5, 0x0153e4a5, + 0x0157e4a5, 0x015be4a5, 0x015fe4a5, 0x0163e4a5, + 0x0167e4a5, 0x016be4a5, 0x016fe4a5, 0x0173e4a5, + 0x0177e4a5, 0x017be4a5, 0x017fe4a5, 0x0183e4a5, + 0x0187e4a5, 0x018be4a5, 0x018fe4a5, 0x0193e4a5, + 0x0197e4a5, 0x019be4a5, 0x019fe4a5, 0x01a3e4a5, + 0x01a7e4a5, 0x01abe4a5, 0x01afe4a5, 0x01b3e4a5, + 0x01b7e4a5, 0x01bbe4a5, 0x01bfe4a5, 0x01c3e4a5, + 0x01c7e4a5, 0x01cbe4a5, 0x01cfe4a5, 0x01d3e4a5, + 0x01d7e4a5, 0x01dbe4a5, 0x01dfe4a5, 0x01e3e4a5, + 0x01e7e4a5, 0x01ebe4a5, 0x01efe4a5, 0x01f3e4a5, + 0x01f7e4a5, 0x01fbe4a5, 0x01ffe4a5, 0x0203e4a5, + 0x0207e4a5, 0x020be4a5, 0x020fe4a5, 0x0213e4a5, + 0x0217e4a5, 0x021be4a5, 0x021fe4a5, 0x0223e4a5, + 0x0227e4a5, 0x022be4a5, 0x022fe4a5, 0x0233e4a5, + 0x0237e4a5, 0x023be4a5, 0x023fe4a5, 0x0243e4a5, + 0x0247e4a5, 0x024be4a5, 0x024fe4a5, 0x0253e4a5, + 0x0257e4a5, 0x025be4a5, 0x025fe4a5, 0x0263e4a5, + 0x0267e4a5, 0x026be4a5, 0x026fe4a5, 0x0273e4a5, + 0x0277e4a5, 0x027be4a5, 0x027fe4a5, 0x0283e4a5, + 0x0287e4a5, 0x028be4a5, 0x028fe4a5, 0x0293e4a5, + 0x0297e4a5, 0x029be4a5, 0x029fe4a5, 0x02a3e4a5, + 0x02a7e4a5, 0x02abe4a5, 0x02afe4a5, 0x02b3e4a5, + 0x02bbe4a5, 0x02c3e4a5, 0x02cbe4a5, 0x02d3e4a5, + 0x02dbe4a5, 0x02e3e4a5, 0x02ebe4a5, 0x02f3e4a5, + 0x02fbe4a5, 0x0303e4a5, 0x030be4a5, 0x0313e4a5, + 0x031be4a5, 0x0323e4a5, 0x032be4a5, 0x0333e4a5, + 0x033be4a5, 0x0343e4a5, 0x034be4a5, 0x0353e4a5, + 0x035be4a5, 0x0363e4a5, 0x036be4a5, 0x0373e4a5, + 0x037be4a5, 0x0383e4a5, 0x038be4a5, 0x0393e4a5, + 0x039be4a5, 0x03a3e4a5, 0x03abe4a5, 0x03b3e4a5, + 0x03bbe4a5, 0x03c3e4a5, 0x03cbe4a5, 0x03d3e4a5, + 0x03dbe4a5, 0x03e3e4a5, 0x03ebe4a5, 0x03f3e4a5, + 0x03fbe4a5, 0x0403e4a5, 0x040be4a5, 0x0413e4a5, + 0x041be4a5, 0x0423e4a5, 0x042be4a5, 0x0433e4a5, + 0x043be4a5, 0x0443e4a5, 0x044be4a5, 0x0453e4a5, + 0x045be4a5, 0x0463e4a5, 0x046be4a5, 0x0473e4a5, + 0x047be4a5, 0x0483e4a5, 0x048be4a5, 0x0493e4a5, + 0x049be4a5, 0x04a3e4a5, 0x04abe4a5, 0x04b3e4a5, + 0x04bbe4a5, 0x04c3e4a5, 0x04cbe4a5, 0x04d3e4a5, + 0x04dbe4a5, 0x04e3e4a5, 0x04ebe4a5, 0x04f3e4a5, + 0x04fbe4a5, 0x0503e4a5, 0x050be4a5, 0x0513e4a5, + 0x051be4a5, 0x0523e4a5, 0x052be4a5, 0x0533e4a5, + 0x053be4a5, 0x0543e4a5, 0x054be4a5, 0x0553e4a5, + 0x055be4a5, 0x0563e4a5, 0x056be4a5, 0x0573e4a5, + 0x057be4a5, 0x0583e4a5, 0x058be4a5, 0x0593e4a5, + 0x059be4a5, 0x05a3e4a5, 0x05abe4a5, 0x05b3e4a5, + 0x05bbe4a5, 0x05c3e4a5, 0x05cbe4a5, 0x05d3e4a5, + 0x05dbe4a5, 0x05e3e4a5, 0x05ebe4a5, 0x05f3e4a5, + 0x05fbe4a5, 0x060be4a5, 0x061be4a5, 0x062be4a5, + 0x063be4a5, 0x064be4a5, 0x065be4a5, 0x465be4a5, + /* The last entry is extra; it is equal to LZMS_MAX_MATCH_OFFSET + 1 and + * is here to aid binary search. */ +}; + +/* Table: offset slot => number of extra offset bits */ +const u8 lzms_extra_offset_bits[LZMS_MAX_NUM_OFFSET_SYMS] = { + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2, + 2 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4, + 4 , 4 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5, + 5 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6, + 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7, + 7 , 7 , 7 , 7 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8, + 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9, + 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9, + 9 , 9 , 9 , 9 , 9 , 9 , 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 30, +}; /* Table: length slot => length slot base value */ -u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1]; +const u32 lzms_length_slot_base[LZMS_NUM_LENGTH_SYMS + 1] = { + 0x00000001, 0x00000002, 0x00000003, 0x00000004, + 0x00000005, 0x00000006, 0x00000007, 0x00000008, + 0x00000009, 0x0000000a, 0x0000000b, 0x0000000c, + 0x0000000d, 0x0000000e, 0x0000000f, 0x00000010, + 0x00000011, 0x00000012, 0x00000013, 0x00000014, + 0x00000015, 0x00000016, 0x00000017, 0x00000018, + 0x00000019, 0x0000001a, 0x0000001b, 0x0000001d, + 0x0000001f, 0x00000021, 0x00000023, 0x00000027, + 0x0000002b, 0x0000002f, 0x00000033, 0x00000037, + 0x0000003b, 0x00000043, 0x0000004b, 0x00000053, + 0x0000005b, 0x0000006b, 0x0000007b, 0x0000008b, + 0x0000009b, 0x000000ab, 0x000000cb, 0x000000eb, + 0x0000012b, 0x000001ab, 0x000002ab, 0x000004ab, + 0x000008ab, 0x000108ab, 0x400108ab, + /* The last entry is extra; it is equal to LZMS_MAX_MATCH_LEN + 1 and is + * here to aid binary search. */ +}; /* Table: length slot => number of extra length bits */ -u8 lzms_extra_length_bits[LZMS_NUM_LEN_SYMS]; - -/* Table: length (< LZMS_NUM_FAST_LENGTHS only) => length slot */ -u8 lzms_length_slot_fast[LZMS_NUM_FAST_LENGTHS]; - -u32 +const u8 lzms_extra_length_bits[LZMS_NUM_LENGTH_SYMS] = { + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 1 , 1 , 1 , 1 , 2 , 2 , + 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 , + 4 , 4 , 4 , 4 , 4 , 5 , 5 , 6 , + 7 , 8 , 9 , 10, 16, 30, +}; + +unsigned lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots) { - u32 l = 0; - u32 r = num_slots - 1; + unsigned l = 0; + unsigned r = num_slots - 1; for (;;) { - LZMS_ASSERT(r >= l); - u32 slot = (l + r) / 2; + unsigned slot = (l + r) / 2; if (value >= slot_base_tab[slot]) { if (value < slot_base_tab[slot + 1]) return slot; @@ -75,304 +336,226 @@ lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots) } } -static void -lzms_decode_delta_rle_slot_bases(u32 slot_bases[], - u8 extra_bits[], - const u8 delta_run_lens[], - u32 num_run_lens, - u32 final, - u32 expected_num_slots) -{ - u32 order = 0; - u32 delta = 1; - u32 base = 0; - u32 slot = 0; - for (u32 i = 0; i < num_run_lens; i++) { - u8 run_len = delta_run_lens[i]; - while (run_len--) { - base += delta; - if (slot > 0) - extra_bits[slot - 1] = order; - slot_bases[slot] = base; - slot++; - } - delta <<= 1; - order++; - } - LZMS_ASSERT(slot == expected_num_slots); - - slot_bases[slot] = final; - extra_bits[slot - 1] = bsr32(slot_bases[slot] - slot_bases[slot - 1]); -} - -/* Initialize the global position and length slot tables. */ -static void -lzms_compute_slots(void) +/* Return the number of offset slots used when processing a buffer having the + * specified uncompressed size. */ +unsigned +lzms_get_num_offset_slots(size_t uncompressed_size) { - /* If an explicit formula that maps LZMS position and length slots to - * slot bases exists, then it could be used here. But until one is - * found, the following code fills in the slots using the observation - * that the increase from one slot base to the next is an increasing - * power of 2. Therefore, run-length encoding of the delta of adjacent - * entries can be used. */ - static const u8 position_slot_delta_run_lens[] = { - 9, 0, 9, 7, 10, 15, 15, 20, - 20, 30, 33, 40, 42, 45, 60, 73, - 80, 85, 95, 105, 6, - }; - - static const u8 length_slot_delta_run_lens[] = { - 27, 4, 6, 4, 5, 2, 1, 1, - 1, 1, 1, 0, 0, 0, 0, 0, - 1, - }; - - /* Position slots */ - lzms_decode_delta_rle_slot_bases(lzms_position_slot_base, - lzms_extra_position_bits, - position_slot_delta_run_lens, - ARRAY_LEN(position_slot_delta_run_lens), - 0x7fffffff, - LZMS_MAX_NUM_OFFSET_SYMS); - - for (u32 order = 0; order < 30; order++) { - lzms_order_to_position_slot_bounds[order][0] = - lzms_get_slot(1U << order, lzms_position_slot_base, - LZMS_MAX_NUM_OFFSET_SYMS); - lzms_order_to_position_slot_bounds[order][1] = - lzms_get_slot((1U << (order + 1)) - 1, lzms_position_slot_base, - LZMS_MAX_NUM_OFFSET_SYMS); - } - - /* Length slots */ - lzms_decode_delta_rle_slot_bases(lzms_length_slot_base, - lzms_extra_length_bits, - length_slot_delta_run_lens, - ARRAY_LEN(length_slot_delta_run_lens), - 0x400108ab, - LZMS_NUM_LEN_SYMS); - - /* Create table mapping short lengths to length slots. */ - for (u32 slot = 0, i = 0; i < LZMS_NUM_FAST_LENGTHS; i++) { - if (i >= lzms_length_slot_base[slot + 1]) - slot++; - lzms_length_slot_fast[i] = slot; - } + if (uncompressed_size < 2) + return 0; + return 1 + lzms_get_offset_slot(uncompressed_size - 1); } -/* Initialize the global position and length slot tables if not done so already. - * */ void -lzms_init_slots(void) +lzms_init_probability_entries(struct lzms_probability_entry *entries, size_t count) { - static bool done = false; - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - - if (unlikely(!done)) { - pthread_mutex_lock(&mutex); - if (!done) { - lzms_compute_slots(); - done = true; - } - pthread_mutex_unlock(&mutex); + for (size_t i = 0; i < count; i++) { + entries[i].num_recent_zero_bits = LZMS_INITIAL_PROBABILITY; + entries[i].recent_bits = LZMS_INITIAL_RECENT_BITS; } } -static s32 -lzms_maybe_do_x86_translation(u8 data[restrict], s32 i, s32 num_op_bytes, - s32 * restrict closest_target_usage_p, - s32 last_target_usages[restrict], - s32 max_trans_offset, bool undo) -{ - u16 pos; - - if (undo) { - if (i - *closest_target_usage_p <= max_trans_offset) { - LZMS_DEBUG("Undid x86 translation at position %d " - "(opcode 0x%02x)", i, data[i]); - le32 *p32 = (le32*)&data[i + num_op_bytes]; - u32 n = le32_to_cpu(*p32); - *p32 = cpu_to_le32(n - i); - } - pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]); - } else { - pos = i + le16_to_cpu(*(const le16*)&data[i + num_op_bytes]); - - if (i - *closest_target_usage_p <= max_trans_offset) { - LZMS_DEBUG("Did x86 translation at position %d " - "(opcode 0x%02x)", i, data[i]); - le32 *p32 = (le32*)&data[i + num_op_bytes]; - u32 n = le32_to_cpu(*p32); - *p32 = cpu_to_le32(n + i); - } - } - - i += num_op_bytes + sizeof(le32) - 1; - - if (i - last_target_usages[pos] <= LZMS_X86_MAX_GOOD_TARGET_OFFSET) - *closest_target_usage_p = i; - - last_target_usages[pos] = i; - - return i + 1; -} - -static s32 -lzms_may_x86_translate(const u8 p[restrict], - s32 *restrict max_offset_ret) +void +lzms_init_symbol_frequencies(u32 freqs[], size_t num_syms) { - /* Switch on first byte of the opcode, assuming it is really an x86 - * instruction. */ - *max_offset_ret = LZMS_X86_MAX_TRANSLATION_OFFSET; - switch (p[0]) { - case 0x48: - if (p[1] == 0x8b) { - if (p[2] == 0x5 || p[2] == 0xd) { - /* Load relative (x86_64) */ - return 3; - } - } else if (p[1] == 0x8d) { - if ((p[2] & 0x7) == 0x5) { - /* Load effective address relative (x86_64) */ - return 3; - } - } - break; - - case 0x4c: - if (p[1] == 0x8d) { - if ((p[2] & 0x7) == 0x5) { - /* Load effective address relative (x86_64) */ - return 3; - } - } - break; - - case 0xe8: - /* Call relative */ - *max_offset_ret = LZMS_X86_MAX_TRANSLATION_OFFSET / 2; - return 1; - - case 0xe9: - /* Jump relative */ - *max_offset_ret = 0; - return 5; - - case 0xf0: - if (p[1] == 0x83 && p[2] == 0x05) { - /* Lock add relative */ - return 3; - } - break; - - case 0xff: - if (p[1] == 0x15) { - /* Call indirect */ - return 2; - } - break; - } - *max_offset_ret = 0; - return 1; + for (size_t i = 0; i < num_syms; i++) + freqs[i] = 1; } /* * Translate relative addresses embedded in x86 instructions into absolute * addresses (@undo == %false), or undo this translation (@undo == %true). * - * @last_target_usages is a temporary array of length >= 65536. + * Absolute addresses are usually more compressible by LZ factorization. + * + * @last_target_usages must be a temporary array of length >= 65536. */ void -lzms_x86_filter(u8 data[restrict], - s32 size, - s32 last_target_usages[restrict], - bool undo) +lzms_x86_filter(u8 data[restrict], s32 size, + s32 last_target_usages[restrict], bool undo) { - s32 closest_target_usage = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1; - - for (s32 i = 0; i < 65536; i++) - last_target_usages[i] = -LZMS_X86_MAX_GOOD_TARGET_OFFSET - 1; - - for (s32 i = 0; i < size - 11; ) { + /* + * Note: this filter runs unconditionally and uses a custom algorithm to + * detect data regions that probably contain x86 code. + * + * 'last_x86_pos' tracks the most recent position that has a good chance + * of being the start of an x86 instruction. When the filter detects a + * likely x86 instruction, it updates this variable and considers the + * next LZMS_X86_MAX_TRANSLATION_OFFSET bytes of data as valid for x86 + * translations. + * + * If part of the data does not, in fact, contain x86 machine code, then + * 'last_x86_pos' will, very likely, eventually fall more than + * LZMS_X86_MAX_TRANSLATION_OFFSET bytes behind the current position. + * This results in x86 translations being disabled until the next likely + * x86 instruction is detected. + * + * To identify "likely x86 instructions", the algorithm attempts to + * track the position of the most recent potential relative-addressing + * instruction that referenced each possible memory address. If it + * finds two references to the same memory address within an + * LZMS_X86_ID_WINDOW_SIZE-byte sized window, then the second reference + * is flagged as a likely x86 instruction. Since the instructions + * considered for translation necessarily use relative addressing, the + * algorithm does a tentative translation into absolute addresses. In + * addition, so that memory addresses can be looked up in an array of + * reasonable size (in this code, 'last_target_usages'), only the + * low-order 2 bytes of each address are considered significant. + */ + + s32 i; + s32 tail_idx; + u8 saved_byte; + s32 last_x86_pos; + + if (size <= 17) + return; + + for (i = 0; i < 65536; i++) + last_target_usages[i] = -(s32)LZMS_X86_ID_WINDOW_SIZE - 1; + + /* + * Optimization: only check for end-of-buffer when we already have a + * byte that is a potential opcode for x86 translation. To do this, + * overwrite one of the bytes near the end of the buffer, and restore it + * later. The correctness of this optimization relies on two + * characteristics of compressed format: + * + * 1. No translation can follow an opcode beginning in the last 16 + * bytes. + * 2. A translation following an opcode starting at the last possible + * position (17 bytes from the end) never extends more than 7 bytes. + * Consequently, we can overwrite any of the bytes starting at + * data[(size - 16) + 7] and have no effect on the result, as long + * as we restore those bytes later. + */ + tail_idx = size - 16; + saved_byte = data[tail_idx + 8]; + data[tail_idx + 8] = 0xE8; + last_x86_pos = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1; + + /* Note: the very first byte must be ignored completely! */ + i = 0; + for (;;) { s32 max_trans_offset; - s32 n; - - n = lzms_may_x86_translate(data + i, &max_trans_offset); - if (max_trans_offset) { - i = lzms_maybe_do_x86_translation(data, i, n, - &closest_target_usage, - last_target_usages, - max_trans_offset, - undo); - } else { - i += n; + s32 opcode_nbytes; + u16 target16; + + /* + * The following table is used to accelerate the common case + * where the byte has nothing to do with x86 translation and + * must simply be skipped. This is the fastest (at least on + * x86_64) of the implementations I tested. The other + * implementations I tested were: + * - Jump table with 256 entries + * - Switch statement with default + */ + static const u8 is_potential_opcode[256] = { + [0x48] = 1, [0x4C] = 1, [0xE8] = 1, + [0xE9] = 1, [0xF0] = 1, [0xFF] = 1, + }; + + for (;;) { + if (is_potential_opcode[data[++i]]) + break; + if (is_potential_opcode[data[++i]]) + break; + if (is_potential_opcode[data[++i]]) + break; + if (is_potential_opcode[data[++i]]) + break; } - } -} -static void -lzms_init_lz_lru_queues(struct lzms_lz_lru_queues *lz) -{ - /* Recent offsets for LZ matches */ - for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) - lz->recent_offsets[i] = i + 1; + if (i >= tail_idx) + break; + + max_trans_offset = LZMS_X86_MAX_TRANSLATION_OFFSET; + switch (data[i]) { + case 0x48: + if (data[i + 1] == 0x8B) { + if (data[i + 2] == 0x5 || data[i + 2] == 0xD) { + /* Load relative (x86_64) */ + opcode_nbytes = 3; + goto have_opcode; + } + } else if (data[i + 1] == 0x8D) { + if ((data[i + 2] & 0x7) == 0x5) { + /* Load effective address relative (x86_64) */ + opcode_nbytes = 3; + goto have_opcode; + } + } + break; + case 0x4C: + if (data[i + 1] == 0x8D) { + if ((data[i + 2] & 0x7) == 0x5) { + /* Load effective address relative (x86_64) */ + opcode_nbytes = 3; + goto have_opcode; + } + } + break; + case 0xE8: + /* Call relative. Note: 'max_trans_offset' must be + * halved for this instruction. This means that we must + * be more confident that we are in a region of x86 + * machine code before we will do a translation for this + * particular instruction. */ + opcode_nbytes = 1; + max_trans_offset /= 2; + goto have_opcode; + case 0xE9: + /* Jump relative */ + i += 4; + break; + case 0xF0: + if (data[i + 1] == 0x83 && data[i + 2] == 0x05) { + /* Lock add relative */ + opcode_nbytes = 3; + goto have_opcode; + } + break; + case 0xFF: + if (data[i + 1] == 0x15) { + /* Call indirect */ + opcode_nbytes = 2; + goto have_opcode; + } + break; + } - lz->prev_offset = 0; - lz->upcoming_offset = 0; -} + continue; -static void -lzms_init_delta_lru_queues(struct lzms_delta_lru_queues *delta) -{ - /* Recent offsets and powers for LZ matches */ - for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) { - delta->recent_offsets[i] = i + 1; - delta->recent_powers[i] = 0; - } - delta->prev_offset = 0; - delta->prev_power = 0; - delta->upcoming_offset = 0; - delta->upcoming_power = 0; -} + have_opcode: + if (undo) { + if (i - last_x86_pos <= max_trans_offset) { + LZMS_DEBUG("Undid x86 translation at position %d " + "(opcode 0x%02x)", i, data[i]); + void *p32 = &data[i + opcode_nbytes]; + u32 n = get_unaligned_u32_le(p32); + put_unaligned_u32_le(n - i, p32); + } + target16 = i + get_unaligned_u16_le(&data[i + opcode_nbytes]); + } else { + target16 = i + get_unaligned_u16_le(&data[i + opcode_nbytes]); + if (i - last_x86_pos <= max_trans_offset) { + LZMS_DEBUG("Did x86 translation at position %d " + "(opcode 0x%02x)", i, data[i]); + void *p32 = &data[i + opcode_nbytes]; + u32 n = get_unaligned_u32_le(p32); + put_unaligned_u32_le(n + i, p32); + } + } + i += opcode_nbytes + sizeof(le32) - 1; -void -lzms_init_lru_queues(struct lzms_lru_queues *lru) -{ - lzms_init_lz_lru_queues(&lru->lz); - lzms_init_delta_lru_queues(&lru->delta); -} + if (i - last_target_usages[target16] <= LZMS_X86_ID_WINDOW_SIZE) + last_x86_pos = i; -void -lzms_update_lz_lru_queues(struct lzms_lz_lru_queues *lz) -{ - if (lz->prev_offset != 0) { - for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) - lz->recent_offsets[i + 1] = lz->recent_offsets[i]; - lz->recent_offsets[0] = lz->prev_offset; - } - lz->prev_offset = lz->upcoming_offset; -} + last_target_usages[target16] = i; -void -lzms_update_delta_lru_queues(struct lzms_delta_lru_queues *delta) -{ - if (delta->prev_offset != 0) { - for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) { - delta->recent_offsets[i + 1] = delta->recent_offsets[i]; - delta->recent_powers[i + 1] = delta->recent_powers[i]; - } - delta->recent_offsets[0] = delta->prev_offset; - delta->recent_powers[0] = delta->prev_power; + continue; } - delta->prev_offset = delta->upcoming_offset; - delta->prev_power = delta->upcoming_power; -} - -void -lzms_update_lru_queues(struct lzms_lru_queues *lru) -{ - lzms_update_lz_lru_queues(&lru->lz); - lzms_update_delta_lru_queues(&lru->delta); + data[tail_idx + 8] = saved_byte; }