wimlib.net Git - wimlib/blob - src/lz_hash_chains.c

   1 /*
   2  * lz_hash_chains.c
   3  *
   4  * Hash chain match-finder for Lempel-Ziv compression.
   5  *
   6  * Copyright (c) 2014 Eric Biggers.  All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  *
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  *
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
  23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 #ifdef HAVE_CONFIG_H
  33 #  include "config.h"
  34 #endif
  35
  36 #include "wimlib/lz_mf.h"
  37 #include "wimlib/util.h"
  38 #include <pthread.h>
  39 #include <string.h>
  40
  41 /* Number of hash buckets.  This can be changed, but should be a power of 2 so
  42  * that the correct hash bucket can be selected using a fast bitwise AND.  */
  43 #define LZ_HC_HASH_LEN     (1 << 15)
  44
  45 /* Number of bytes from which the hash code is computed at each position.  This
  46  * can be changed, provided that lz_hc_hash() is updated as well.  */
  47 #define LZ_HC_HASH_BYTES   3
  48
  49 struct lz_hc {
  50         struct lz_mf base;
  51         u32 *hash_tab;
  52         u32 *prev_tab;
  53         u32 next_hash;
  54 };
  55
  56 static u32 crc32_table[256];
  57 static pthread_once_t crc32_table_filled = PTHREAD_ONCE_INIT;
  58
  59 static void
  60 crc32_init(void)
  61 {
  62         for (u32 b = 0; b < 256; b++) {
  63                 u32 r = b;
  64                 for (int i = 0; i < 8; i++) {
  65                         if (r & 1)
  66                                 r = (r >> 1) ^ 0xEDB88320;
  67                         else
  68                                 r >>= 1;
  69                 }
  70                 crc32_table[b] = r;
  71         }
  72 }
  73
  74 /* This hash function is taken from the LZMA SDK.  It seems to work well.
  75  *
  76  * TODO: Maybe use the SSE4.2 CRC32 instruction when available?  */
  77 static inline u32
  78 lz_hc_hash(const u8 *p)
  79 {
  80         u32 hash = 0;
  81
  82         hash ^= crc32_table[p[0]];
  83         hash ^= p[1];
  84         hash ^= (u32)p[2] << 8;
  85
  86         return hash % LZ_HC_HASH_LEN;
  87 }
  88
  89 static void
  90 lz_hc_set_default_params(struct lz_mf_params *params)
  91 {
  92         if (params->min_match_len < LZ_HC_HASH_BYTES)
  93                 params->min_match_len = LZ_HC_HASH_BYTES;
  94
  95         if (params->max_match_len == 0)
  96                 params->max_match_len = params->max_window_size;
  97
  98         if (params->max_search_depth == 0)
  99                 params->max_search_depth = 50;
 100
 101         if (params->nice_match_len == 0)
 102                 params->nice_match_len = 24;
 103
 104         if (params->nice_match_len < params->min_match_len)
 105                 params->nice_match_len = params->min_match_len;
 106
 107         if (params->nice_match_len > params->max_match_len)
 108                 params->nice_match_len = params->max_match_len;
 109 }
 110
 111 static bool
 112 lz_hc_params_valid(const struct lz_mf_params *_params)
 113 {
 114         struct lz_mf_params params = *_params;
 115
 116         lz_hc_set_default_params(&params);
 117
 118         /* Avoid edge case where min_match_len = 3, max_match_len = 2 */
 119         return (params.min_match_len <= params.max_match_len);
 120 }
 121
 122 static u64
 123 lz_hc_get_needed_memory(u32 max_window_size)
 124 {
 125         u64 len = 0;
 126
 127         len += LZ_HC_HASH_LEN;
 128         len += max_window_size;
 129
 130         return len * sizeof(u32);
 131 }
 132
 133 static bool
 134 lz_hc_init(struct lz_mf *_mf)
 135 {
 136         struct lz_hc *mf = (struct lz_hc *)_mf;
 137
 138         lz_hc_set_default_params(&mf->base.params);
 139
 140         /* Allocate space for 'hash_tab' and 'prev_tab'.  */
 141
 142         mf->hash_tab = MALLOC(lz_hc_get_needed_memory(mf->base.params.max_window_size));
 143         if (!mf->hash_tab)
 144                 return false;
 145
 146         mf->prev_tab = mf->hash_tab + LZ_HC_HASH_LEN;
 147
 148         /* Fill in the CRC32 table if not done already.  */
 149         pthread_once(&crc32_table_filled, crc32_init);
 150
 151         return true;
 152 }
 153
 154 static void
 155 lz_hc_load_window(struct lz_mf *_mf, const u8 window[], u32 size)
 156 {
 157         struct lz_hc *mf = (struct lz_hc *)_mf;
 158
 159         memset(mf->hash_tab, 0, LZ_HC_HASH_LEN * sizeof(u32));
 160
 161         if (size >= LZ_HC_HASH_BYTES)
 162                 mf->next_hash = lz_hc_hash(window);
 163 }
 164
 165 static u32
 166 lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[])
 167 {
 168         struct lz_hc *mf = (struct lz_hc *)_mf;
 169         const u8 * const window = mf->base.cur_window;
 170         const u32 cur_pos = mf->base.cur_window_pos;
 171         const u8 * const strptr = &window[cur_pos];
 172         const u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
 173         u32 * const prev_tab = mf->prev_tab;
 174         const u32 nice_len = min(bytes_remaining, mf->base.params.nice_match_len);
 175         u32 best_len = mf->base.params.min_match_len - 1;
 176         u32 depth_remaining = mf->base.params.max_search_depth;
 177         u32 num_matches = 0;
 178         u32 hash;
 179         u32 cur_match;
 180
 181         if (unlikely(bytes_remaining <= mf->base.params.min_match_len))
 182                 goto out;
 183
 184         /* Insert the current position into the appropriate hash chain and set
 185          * 'cur_match' to the previous head.
 186          *
 187          * For a slight performance improvement, we do each hash calculation one
 188          * position in advance and prefetch the necessary hash table entry.  */
 189
 190         hash = mf->next_hash;
 191         mf->next_hash = lz_hc_hash(strptr + 1);
 192         prefetch(&mf->hash_tab[mf->next_hash]);
 193         cur_match = mf->hash_tab[hash];
 194         mf->hash_tab[hash] = cur_pos;
 195         prev_tab[cur_pos] = cur_match;
 196
 197         for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) {
 198
 199                 const u8 * const matchptr = &window[cur_match];
 200                 u32 len;
 201
 202                 /* Considering a match at 'matchptr'.  */
 203
 204                 /* The bytes at index 'best_len' are the most likely to differ,
 205                  * so check them first.
 206                  *
 207                  * The bytes at indices 'best_len - 1' and '0' are less
 208                  * important to check separately.  But doing so still gives a
 209                  * slight performance improvement, probably because they create
 210                  * separate branches for the CPU to predict independently of the
 211                  * branches in the main comparison loops.  */
 212                 if (matchptr[best_len] != strptr[best_len] ||
 213                     matchptr[best_len - 1] != strptr[best_len - 1] ||
 214                     matchptr[0] != strptr[0])
 215                         goto next_match;
 216
 217                 for (len = 1; len < best_len - 1; len++)
 218                         if (matchptr[len] != strptr[len])
 219                                 goto next_match;
 220
 221                 /* We now know the match length is at least 'best_len + 1'.  */
 222
 223                 len = best_len;
 224
 225                 do {
 226                         if (++len == nice_len) {
 227                                 /* 'nice_len' reached; don't waste time
 228                                  * searching for longer matches.  Extend the
 229                                  * match as far as possible, record it, and
 230                                  * return.  */
 231                                 const u32 max_len = min(bytes_remaining,
 232                                                         mf->base.params.max_match_len);
 233                                 while (len < max_len && strptr[len] == matchptr[len])
 234                                         len++;
 235                                 matches[num_matches++] = (struct lz_match) {
 236                                         .len = len,
 237                                         .offset = strptr - matchptr,
 238                                 };
 239                                 goto out;
 240                         }
 241                 } while (matchptr[len] == strptr[len]);
 242
 243                 /* Found a longer match, but 'nice_len' not yet reached.  */
 244                 best_len = len;
 245                 matches[num_matches++] = (struct lz_match) {
 246                         .len = len,
 247                         .offset = strptr - matchptr,
 248                 };
 249
 250         next_match:
 251                 /* Continue to next match in the chain.  */
 252                 ;
 253         }
 254
 255 out:
 256         mf->base.cur_window_pos++;
 257         return num_matches;
 258 }
 259
 260 static void
 261 lz_hc_skip_position(struct lz_hc *mf)
 262 {
 263         const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
 264         u32 hash;
 265
 266         if (unlikely(bytes_remaining <= mf->base.params.min_match_len))
 267                 goto out;
 268
 269         hash = mf->next_hash;
 270         mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1);
 271         prefetch(&mf->hash_tab[mf->next_hash]);
 272         mf->prev_tab[mf->base.cur_window_pos] = mf->hash_tab[hash];
 273         mf->hash_tab[hash] = mf->base.cur_window_pos;
 274
 275 out:
 276         mf->base.cur_window_pos++;
 277 }
 278
 279 static void
 280 lz_hc_skip_positions(struct lz_mf *_mf, u32 n)
 281 {
 282         struct lz_hc *mf = (struct lz_hc *)_mf;
 283
 284         do {
 285                 lz_hc_skip_position(mf);
 286         } while (--n);
 287 }
 288
 289 static void
 290 lz_hc_destroy(struct lz_mf *_mf)
 291 {
 292         struct lz_hc *mf = (struct lz_hc *)_mf;
 293
 294         FREE(mf->hash_tab);
 295 }
 296
 297 const struct lz_mf_ops lz_hash_chains_ops = {
 298         .params_valid      = lz_hc_params_valid,
 299         .get_needed_memory = lz_hc_get_needed_memory,
 300         .init              = lz_hc_init,
 301         .load_window       = lz_hc_load_window,
 302         .get_matches       = lz_hc_get_matches,
 303         .skip_positions    = lz_hc_skip_positions,
 304         .destroy           = lz_hc_destroy,
 305         .struct_size       = sizeof(struct lz_hc),
 306 };