wimlib.net Git - wimlib/blob - include/wimlib/hc_matchfinder.h

   1 /*
   2  * hc_matchfinder.h
   3  *
   4  * Author:      Eric Biggers
   5  * Year:        2014, 2015
   6  *
   7  * The author dedicates this file to the public domain.
   8  * You can do whatever you want with this file.
   9  *
  10  * ---------------------------------------------------------------------------
  11  *
  12  *                                 Algorithm
  13  *
  14  * This is a Hash Chains (hc) based matchfinder.
  15  *
  16  * The data structure is a hash table where each hash bucket contains a linked
  17  * list (or "chain") of sequences whose first 3 bytes share the same hash code.
  18  * Each sequence is identified by its starting position in the input buffer.
  19  *
  20  * The algorithm processes the input buffer sequentially.  At each byte
  21  * position, the hash code of the first 3 bytes of the sequence beginning at
  22  * that position (the sequence being matched against) is computed.  This
  23  * identifies the hash bucket to use for that position.  Then, this hash
  24  * bucket's linked list is searched for matches.  Then, a new linked list node
  25  * is created to represent the current sequence and is prepended to the list.
  26  *
  27  * This algorithm has several useful properties:
  28  *
  29  * - It only finds true Lempel-Ziv matches; i.e., those where the matching
  30  *   sequence occurs prior to the sequence being matched against.
  31  *
  32  * - The sequences in each linked list are always sorted by decreasing starting
  33  *   position.  Therefore, the closest (smallest offset) matches are found
  34  *   first, which in many compression formats tend to be the cheapest to encode.
  35  *
  36  * - Although fast running time is not guaranteed due to the possibility of the
  37  *   lists getting very long, the worst degenerate behavior can be easily
  38  *   prevented by capping the number of nodes searched at each position.
  39  *
  40  * - If the compressor decides not to search for matches at a certain position,
  41  *   then that position can be quickly inserted without searching the list.
  42  *
  43  * - The algorithm is adaptable to sliding windows: just store the positions
  44  *   relative to a "base" value that is updated from time to time, and stop
  45  *   searching each list when the sequences get too far away.
  46  *
  47  * ---------------------------------------------------------------------------
  48  *
  49  *                              Notes on usage
  50  *
  51  * You must define MATCHFINDER_MAX_WINDOW_ORDER before including this header
  52  * because that determines which integer type to use for positions.  Since
  53  * 16-bit integers are faster than 32-bit integers due to reduced memory usage
  54  * (and therefore reduced cache pressure), the code only uses 32-bit integers if
  55  * they are needed to represent all possible positions.
  56  *
  57  * The number of bytes that must be allocated for a given 'struct
  58  * hc_matchfinder' must be gotten by calling hc_matchfinder_size().
  59  *
  60  * ----------------------------------------------------------------------------
  61  *
  62  *                               Optimizations
  63  *
  64  * The longest_match() and skip_positions() functions are inlined into the
  65  * compressors that use them.  This isn't just about saving the overhead of a
  66  * function call.  These functions are intended to be called from the inner
  67  * loops of compressors, where giving the compiler more control over register
  68  * allocation is very helpful.  There is also significant benefit to be gained
  69  * from allowing the CPU to predict branches independently at each call site.
  70  * For example, "lazy"-style compressors can be written with two calls to
  71  * longest_match(), each of which starts with a different 'best_len' and
  72  * therefore has significantly different performance characteristics.
  73  *
  74  * Although any hash function can be used, a multiplicative hash is fast and
  75  * works well.
  76  *
  77  * On some processors, it is significantly faster to extend matches by whole
  78  * words (32 or 64 bits) instead of by individual bytes.  For this to be the
  79  * case, the processor must implement unaligned memory accesses efficiently and
  80  * must have either a fast "find first set bit" instruction or a fast "find last
  81  * set bit" instruction, depending on the processor's endianness.
  82  *
  83  * The code uses one loop for finding the first match and one loop for finding a
  84  * longer match.  Each of these loops is tuned for its respective task and in
  85  * combination are faster than a single generalized loop that handles both
  86  * tasks.
  87  *
  88  * The code also uses a tight inner loop that only compares the last and first
  89  * bytes of a potential match.  It is only when these bytes match that a full
  90  * match extension is attempted.
  91  *
  92  * ----------------------------------------------------------------------------
  93  */
  94
  95 #ifndef _HC_MATCHFINDER_H
  96 #define _HC_MATCHFINDER_H
  97
  98 #ifndef MATCHFINDER_MAX_WINDOW_ORDER
  99 #  error "MATCHFINDER_MAX_WINDOW_ORDER must be defined!"
 100 #endif
 101
 102 #include <string.h>
 103
 104 #include "wimlib/lz_extend.h"
 105 #include "wimlib/lz_hash.h"
 106 #include "wimlib/unaligned.h"
 107
 108 #if MATCHFINDER_MAX_WINDOW_ORDER < 14
 109 #  define HC_MATCHFINDER_HASH_ORDER 14
 110 #else
 111 #  define HC_MATCHFINDER_HASH_ORDER 15
 112 #endif
 113
 114 #if MATCHFINDER_MAX_WINDOW_ORDER <= 16
 115 typedef u16 pos_t;
 116 #else
 117 typedef u32 pos_t;
 118 #endif
 119
 120 struct hc_matchfinder {
 121         pos_t hash_tab[1UL << HC_MATCHFINDER_HASH_ORDER];
 122         pos_t next_tab[];
 123 };
 124
 125 /* Return the number of bytes that must be allocated for a 'hc_matchfinder' that
 126  * can work with buffers up to the specified size.  */
 127 static inline size_t
 128 hc_matchfinder_size(size_t max_bufsize)
 129 {
 130         return sizeof(struct hc_matchfinder) + (max_bufsize * sizeof(pos_t));
 131 }
 132
 133 /* Prepare the matchfinder for a new input buffer.  */
 134 static inline void
 135 hc_matchfinder_init(struct hc_matchfinder *mf)
 136 {
 137         memset(mf, 0, sizeof(*mf));
 138 }
 139
 140 /*
 141  * Find the longest match longer than 'best_len' bytes.
 142  *
 143  * @mf
 144  *      The matchfinder structure.
 145  * @in_begin
 146  *      Pointer to the beginning of the input buffer.
 147  * @in_next
 148  *      Pointer to the next byte in the input buffer to process.  This is the
 149  *      pointer to the sequence being matched against.
 150  * @best_len
 151  *      Require a match longer than this length.
 152  * @max_len
 153  *      The maximum permissible match length at this position.
 154  * @nice_len
 155  *      Stop searching if a match of at least this length is found.
 156  *      Must be <= @max_len.
 157  * @max_search_depth
 158  *      Limit on the number of potential matches to consider.  Must be >= 1.
 159  * @offset_ret
 160  *      If a match is found, its offset is returned in this location.
 161  *
 162  * Return the length of the match found, or 'best_len' if no match longer than
 163  * 'best_len' was found.
 164  */
 165 static inline unsigned
 166 hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
 167                              const u8 * const in_begin,
 168                              const u8 * const in_next,
 169                              unsigned best_len,
 170                              const unsigned max_len,
 171                              const unsigned nice_len,
 172                              const unsigned max_search_depth,
 173                              unsigned *offset_ret)
 174 {
 175         unsigned depth_remaining = max_search_depth;
 176         const u8 *best_matchptr = best_matchptr; /* uninitialized */
 177         const u8 *matchptr;
 178         unsigned len;
 179         u32 first_3_bytes;
 180         u32 hash;
 181         pos_t cur_node;
 182
 183         /* Insert the current sequence into the appropriate linked list.  */
 184         if (unlikely(max_len < LOAD_U24_REQUIRED_NBYTES))
 185                 goto out;
 186         first_3_bytes = load_u24_unaligned(in_next);
 187         hash = lz_hash(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
 188         cur_node = mf->hash_tab[hash];
 189         mf->next_tab[in_next - in_begin] = cur_node;
 190         mf->hash_tab[hash] = in_next - in_begin;
 191
 192         if (unlikely(best_len >= max_len))
 193                 goto out;
 194
 195         /* Search the appropriate linked list for matches.  */
 196
 197         if (!cur_node)
 198                 goto out;
 199
 200         if (best_len < 3) {
 201                 for (;;) {
 202                         /* No length 3 match found yet.
 203                          * Check the first 3 bytes.  */
 204                         matchptr = &in_begin[cur_node];
 205
 206                         if (load_u24_unaligned(matchptr) == first_3_bytes)
 207                                 break;
 208
 209                         /* The first 3 bytes did not match.  Keep trying.  */
 210                         cur_node = mf->next_tab[cur_node];
 211                         if (!cur_node || !--depth_remaining)
 212                                 goto out;
 213                 }
 214
 215                 /* Found a match of length >= 3.  Extend it to its full length.  */
 216                 best_matchptr = matchptr;
 217                 best_len = lz_extend(in_next, best_matchptr, 3, max_len);
 218                 if (best_len >= nice_len)
 219                         goto out;
 220                 cur_node = mf->next_tab[cur_node];
 221                 if (!cur_node || !--depth_remaining)
 222                         goto out;
 223         }
 224
 225         for (;;) {
 226                 for (;;) {
 227                         matchptr = &in_begin[cur_node];
 228
 229                         /* Already found a length 3 match.  Try for a longer
 230                          * match; start by checking either the last 4 bytes and
 231                          * the first 4 bytes, or the last byte.  (The last byte,
 232                          * the one which would extend the match length by 1, is
 233                          * the most important.)  */
 234                 #if UNALIGNED_ACCESS_IS_FAST
 235                         if ((load_u32_unaligned(matchptr + best_len - 3) ==
 236                              load_u32_unaligned(in_next + best_len - 3)) &&
 237                             (load_u32_unaligned(matchptr) ==
 238                              load_u32_unaligned(in_next)))
 239                 #else
 240                         if (matchptr[best_len] == in_next[best_len])
 241                 #endif
 242                                 break;
 243
 244                         cur_node = mf->next_tab[cur_node];
 245                         if (!cur_node || !--depth_remaining)
 246                                 goto out;
 247                 }
 248
 249         #if UNALIGNED_ACCESS_IS_FAST
 250                 len = 4;
 251         #else
 252                 len = 0;
 253         #endif
 254                 len = lz_extend(in_next, matchptr, len, max_len);
 255                 if (len > best_len) {
 256                         best_len = len;
 257                         best_matchptr = matchptr;
 258                         if (best_len >= nice_len)
 259                                 goto out;
 260                 }
 261                 cur_node = mf->next_tab[cur_node];
 262                 if (!cur_node || !--depth_remaining)
 263                         goto out;
 264         }
 265 out:
 266         *offset_ret = in_next - best_matchptr;
 267         return best_len;
 268 }
 269
 270 /*
 271  * Advance the matchfinder, but don't search for matches.
 272  *
 273  * @mf
 274  *      The matchfinder structure.
 275  * @in_begin
 276  *      Pointer to the beginning of the input buffer.
 277  * @in_next
 278  *      Pointer to the next byte in the input buffer to process.
 279  * @in_end
 280  *      Pointer to the end of the input buffer.
 281  * @count
 282  *      The number of bytes to advance.  Must be > 0.
 283  */
 284 static inline void
 285 hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
 286                               const u8 *in_begin,
 287                               const u8 *in_next,
 288                               const u8 *in_end,
 289                               unsigned count)
 290 {
 291         u32 hash;
 292
 293         if (unlikely(in_next + count >= in_end - LZ_HASH3_REQUIRED_NBYTES))
 294                 return;
 295
 296         do {
 297                 hash = lz_hash_3_bytes(in_next, HC_MATCHFINDER_HASH_ORDER);
 298                 mf->next_tab[in_next - in_begin] = mf->hash_tab[hash];
 299                 mf->hash_tab[hash] = in_next - in_begin;
 300                 in_next++;
 301         } while (--count);
 302 }
 303
 304 #endif /* _HC_MATCHFINDER_H */