wimlib.net Git - wimlib/blob - src/lz_hash_chains.c

   1 /*
   2  * lz_hash_chains.c
   3  *
   4  * Hash chain match-finder for Lempel-Ziv compression.
   5  *
   6  * Copyright (c) 2014 Eric Biggers.  All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  *
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  *
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
  23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 #ifdef HAVE_CONFIG_H
  33 #  include "config.h"
  34 #endif
  35
  36 #include "wimlib/lz_extend.h"
  37 #include "wimlib/lz_hash3.h"
  38 #include "wimlib/lz_mf.h"
  39 #include "wimlib/util.h"
  40
  41 #include <string.h>
  42
  43 /* log2 of the number of buckets in the hash table.  This can be changed.  */
  44 #define LZ_HC_HASH_ORDER 15
  45
  46 #define LZ_HC_HASH_LEN   (1 << LZ_HC_HASH_ORDER)
  47
  48 struct lz_hc {
  49         struct lz_mf base;
  50         u32 *hash_tab; /* followed by 'prev_tab' in memory */
  51         u32 next_hash;
  52 };
  53
  54 static inline u32
  55 lz_hc_hash(const u8 *p)
  56 {
  57         return lz_hash(p, LZ_HC_HASH_ORDER);
  58 }
  59
  60 static void
  61 lz_hc_set_default_params(struct lz_mf_params *params)
  62 {
  63         if (params->min_match_len < LZ_HASH_NBYTES)
  64                 params->min_match_len = LZ_HASH_NBYTES;
  65
  66         if (params->max_match_len == 0)
  67                 params->max_match_len = UINT32_MAX;
  68
  69         if (params->max_search_depth == 0)
  70                 params->max_search_depth = 50;
  71
  72         if (params->nice_match_len == 0)
  73                 params->nice_match_len = 24;
  74
  75         if (params->nice_match_len < params->min_match_len)
  76                 params->nice_match_len = params->min_match_len;
  77
  78         if (params->nice_match_len > params->max_match_len)
  79                 params->nice_match_len = params->max_match_len;
  80 }
  81
  82 static bool
  83 lz_hc_params_valid(const struct lz_mf_params *_params)
  84 {
  85         struct lz_mf_params params = *_params;
  86
  87         lz_hc_set_default_params(&params);
  88
  89         return (params.min_match_len <= params.max_match_len);
  90 }
  91
  92 static u64
  93 lz_hc_get_needed_memory(u32 max_window_size)
  94 {
  95         u64 len = 0;
  96
  97         len += LZ_HC_HASH_LEN;
  98         len += max_window_size;
  99
 100         return len * sizeof(u32);
 101 }
 102
 103 static bool
 104 lz_hc_init(struct lz_mf *_mf)
 105 {
 106         struct lz_hc *mf = (struct lz_hc *)_mf;
 107
 108         lz_hc_set_default_params(&mf->base.params);
 109
 110         mf->hash_tab = MALLOC(lz_hc_get_needed_memory(mf->base.params.max_window_size));
 111         if (!mf->hash_tab)
 112                 return false;
 113
 114         return true;
 115 }
 116
 117 static void
 118 lz_hc_load_window(struct lz_mf *_mf, const u8 window[], u32 size)
 119 {
 120         struct lz_hc *mf = (struct lz_hc *)_mf;
 121
 122         memset(mf->hash_tab, 0, LZ_HC_HASH_LEN * sizeof(u32));
 123 }
 124
 125 static u32
 126 lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[])
 127 {
 128         struct lz_hc *mf = (struct lz_hc *)_mf;
 129         const u8 * const window = mf->base.cur_window;
 130         const u32 cur_pos = mf->base.cur_window_pos++;
 131         const u8 * const strptr = &window[cur_pos];
 132         const u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
 133         u32 * const prev_tab = mf->hash_tab + LZ_HC_HASH_LEN;
 134         const u32 max_len = min(bytes_remaining, mf->base.params.max_match_len);
 135         const u32 nice_len = min(max_len, mf->base.params.nice_match_len);
 136         u32 best_len = mf->base.params.min_match_len - 1;
 137         u32 depth_remaining = mf->base.params.max_search_depth;
 138         struct lz_match *lz_matchptr = matches;
 139         u32 hash;
 140         u32 cur_match;
 141         u32 sequence;
 142
 143         if (unlikely(bytes_remaining < LZ_HASH_REQUIRED_NBYTES + 1))
 144                 return 0;
 145
 146         /* Insert the current position into the appropriate hash chain and set
 147          * 'cur_match' to the previous head.
 148          *
 149          * For a slight performance improvement, we do each hash calculation one
 150          * position in advance and prefetch the necessary hash table entry.  */
 151
 152         hash = mf->next_hash;
 153         mf->next_hash = lz_hc_hash(strptr + 1);
 154         prefetch(&mf->hash_tab[mf->next_hash]);
 155         cur_match = mf->hash_tab[hash];
 156         mf->hash_tab[hash] = cur_pos;
 157         prev_tab[cur_pos] = cur_match;
 158
 159         /* Ensure we can find a match of at least the requested length.  */
 160         if (unlikely(best_len >= max_len))
 161                 return 0;
 162
 163         if (UNALIGNED_ACCESS_IS_FAST)
 164                 sequence = load_u24_unaligned(strptr);
 165
 166         /* Search the appropriate hash chain for matches.  */
 167         for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) {
 168
 169                 const u8 * const matchptr = &window[cur_match];
 170                 u32 len;
 171
 172                 /* Considering the potential match at 'matchptr':  is it longer
 173                  * than 'best_len'?
 174                  *
 175                  * The bytes at index 'best_len' are the most likely to differ,
 176                  * so check them first.  */
 177                 if (matchptr[best_len] != strptr[best_len])
 178                         goto next_match;
 179
 180                 if (UNALIGNED_ACCESS_IS_FAST) {
 181                         if (load_u24_unaligned(matchptr) != sequence)
 182                                 goto next_match;
 183
 184                         len = lz_extend(strptr, matchptr, 3, max_len);
 185
 186                         if (len > best_len) {
 187                                 best_len = len;
 188
 189                                 *lz_matchptr++ = (struct lz_match) {
 190                                         .len = best_len,
 191                                         .offset = strptr - matchptr,
 192                                 };
 193
 194                                 if (best_len >= nice_len)
 195                                         break;
 196                         }
 197                 } else {
 198
 199                         /* The bytes at indices 'best_len - 1' and '0' are less
 200                          * important to check separately.  But doing so still
 201                          * gives a slight performance improvement, at least on
 202                          * x86_64, probably because they create separate
 203                          * branches for the CPU to predict independently of the
 204                          * branches in the main comparison loops.
 205                          */
 206                          if (matchptr[best_len - 1] != strptr[best_len - 1] ||
 207                              matchptr[0] != strptr[0])
 208                                 goto next_match;
 209
 210                         for (len = 1; len < best_len - 1; len++)
 211                                 if (matchptr[len] != strptr[len])
 212                                         goto next_match;
 213
 214                         /* The match is the longest found so far --- at least
 215                          * 'best_len' + 1 bytes.  Continue extending it.  */
 216
 217                         if (++best_len != max_len &&
 218                             strptr[best_len] == matchptr[best_len])
 219                                 while (++best_len != max_len)
 220                                         if (strptr[best_len] != matchptr[best_len])
 221                                                 break;
 222
 223                         /* Record the match.  */
 224                         *lz_matchptr++ = (struct lz_match) {
 225                                 .len = best_len,
 226                                 .offset = strptr - matchptr,
 227                         };
 228
 229                         /* Terminate the search if 'nice_len' was reached.  */
 230                         if (best_len >= nice_len)
 231                                 break;
 232                 }
 233
 234         next_match:
 235                 /* Continue to next match in the chain.  */
 236                 ;
 237         }
 238
 239         return lz_matchptr - matches;
 240 }
 241
 242 static void
 243 lz_hc_skip_positions(struct lz_mf *_mf, u32 n)
 244 {
 245         struct lz_hc *mf = (struct lz_hc *)_mf;
 246         u32 * const hash_tab = mf->hash_tab;
 247         u32 * const prev_tab = hash_tab + LZ_HC_HASH_LEN;
 248         const u8 * const window = mf->base.cur_window;
 249         u32 cur_pos = mf->base.cur_window_pos;
 250         u32 end_pos = cur_pos + n;
 251         const u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
 252         u32 hash;
 253         u32 next_hash;
 254
 255         mf->base.cur_window_pos = end_pos;
 256
 257         if (unlikely(bytes_remaining < n + (LZ_HASH_REQUIRED_NBYTES + 1) - 1)) {
 258                 /* Nearing end of window.  */
 259                 if (unlikely(bytes_remaining < (LZ_HASH_REQUIRED_NBYTES + 1)))
 260                         return;
 261
 262                 end_pos = cur_pos + bytes_remaining - (LZ_HASH_REQUIRED_NBYTES + 1) + 1;
 263         }
 264
 265         next_hash = mf->next_hash;
 266         do {
 267                 hash = next_hash;
 268                 next_hash = lz_hc_hash(&window[cur_pos + 1]);
 269                 prev_tab[cur_pos] = hash_tab[hash];
 270                 hash_tab[hash] = cur_pos;
 271         } while (++cur_pos != end_pos);
 272
 273         prefetch(&hash_tab[next_hash]);
 274         mf->next_hash = next_hash;
 275 }
 276
 277 static void
 278 lz_hc_destroy(struct lz_mf *_mf)
 279 {
 280         struct lz_hc *mf = (struct lz_hc *)_mf;
 281
 282         FREE(mf->hash_tab);
 283 }
 284
 285 const struct lz_mf_ops lz_hash_chains_ops = {
 286         .params_valid      = lz_hc_params_valid,
 287         .get_needed_memory = lz_hc_get_needed_memory,
 288         .init              = lz_hc_init,
 289         .load_window       = lz_hc_load_window,
 290         .get_matches       = lz_hc_get_matches,
 291         .skip_positions    = lz_hc_skip_positions,
 292         .destroy           = lz_hc_destroy,
 293         .struct_size       = sizeof(struct lz_hc),
 294 };