wimlib.net Git - wimlib/blob - src/lz_binary_trees.c

   1 /*
   2  * lz_binary_trees.c
   3  *
   4  * Binary tree match-finder for Lempel-Ziv compression.
   5  *
   6  * Copyright (c) 2014 Eric Biggers.  All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  *
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  *
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
  23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 /*
  33  * Note: the binary tree search/update algorithm is based on code from the
  34  * public domain LZMA SDK (authors: Igor Pavlov, Lasse Collin).
  35  */
  36
  37 #ifdef HAVE_CONFIG_H
  38 #  include "config.h"
  39 #endif
  40
  41 #include "wimlib/lz_mf.h"
  42 #include "wimlib/util.h"
  43 #include <pthread.h>
  44 #include <string.h>
  45
  46 /* Number of hash buckets.  This can be changed, but it should be a power of 2
  47  * so that the correct hash bucket can be selected using a fast bitwise AND.  */
  48 #define LZ_BT_HASH_LEN          (1 << 16)
  49
  50 /* Number of bytes from which the hash code is computed at each position.  This
  51  * can be changed, provided that lz_bt_hash() is updated as well.  */
  52 #define LZ_BT_HASH_BYTES   3
  53
  54 /* Number of entries in the digram table.
  55  *
  56  * Note:  You rarely get length-2 matches if you use length-3 hashing.  But
  57  * since binary trees are typically used for higher compression ratios than hash
  58  * chains, it is helpful for this match-finder to find length-2 matches as well.
  59  * Therefore this match-finder also uses a digram table to find length-2 matches
  60  * when the minimum match length is 2.  */
  61 #define LZ_BT_DIGRAM_TAB_LEN    (256 * 256)
  62
  63 struct lz_bt {
  64         struct lz_mf base;
  65         u32 *hash_tab;
  66         u32 *digram_tab;
  67         u32 *child_tab;
  68         u32 next_hash;
  69 };
  70
  71 static u32 crc32_table[256];
  72 static pthread_once_t crc32_table_filled = PTHREAD_ONCE_INIT;
  73
  74 static void
  75 crc32_init(void)
  76 {
  77         for (u32 b = 0; b < 256; b++) {
  78                 u32 r = b;
  79                 for (int i = 0; i < 8; i++) {
  80                         if (r & 1)
  81                                 r = (r >> 1) ^ 0xEDB88320;
  82                         else
  83                                 r >>= 1;
  84                 }
  85                 crc32_table[b] = r;
  86         }
  87 }
  88
  89 /* This hash function is taken from the LZMA SDK.  It seems to work well.
  90
  91  * TODO: Maybe use the SSE4.2 CRC32 instruction when available?  */
  92 static inline u32
  93 lz_bt_hash(const u8 *p)
  94 {
  95         u32 hash = 0;
  96
  97         hash ^= crc32_table[p[0]];
  98         hash ^= p[1];
  99         hash ^= (u32)p[2] << 8;
 100
 101         return hash % LZ_BT_HASH_LEN;
 102 }
 103
 104 static void
 105 lz_bt_set_default_params(struct lz_mf_params *params)
 106 {
 107         if (params->min_match_len == 0)
 108                 params->min_match_len = 2;
 109
 110         if (params->max_match_len == 0)
 111                 params->max_match_len = params->max_window_size;
 112
 113         if (params->max_search_depth == 0)
 114                 params->max_search_depth = 50;
 115
 116         if (params->nice_match_len == 0)
 117                 params->nice_match_len = 24;
 118
 119         if (params->nice_match_len < params->min_match_len)
 120                 params->nice_match_len = params->min_match_len;
 121
 122         if (params->nice_match_len > params->max_match_len)
 123                 params->nice_match_len = params->max_match_len;
 124 }
 125
 126 static bool
 127 lz_bt_params_valid(const struct lz_mf_params *params)
 128 {
 129         return true;
 130 }
 131
 132 static u64
 133 lz_bt_get_needed_memory(u32 max_window_size)
 134 {
 135         u64 len = 0;
 136
 137         len += LZ_BT_HASH_LEN;           /* hash_tab */
 138         len += LZ_BT_DIGRAM_TAB_LEN;     /* digram_tab */
 139         len += 2 * (u64)max_window_size; /* child_tab */
 140
 141         return len * sizeof(u32);
 142 }
 143
 144 static bool
 145 lz_bt_init(struct lz_mf *_mf)
 146 {
 147         struct lz_bt *mf = (struct lz_bt *)_mf;
 148         struct lz_mf_params *params = &mf->base.params;
 149         size_t len = 0;
 150
 151         lz_bt_set_default_params(params);
 152
 153         /* Allocate space for 'hash_tab', 'digram_tab', and 'child_tab'.  */
 154
 155         len += LZ_BT_HASH_LEN;
 156         if (params->min_match_len == 2)
 157                 len += LZ_BT_DIGRAM_TAB_LEN;
 158         len += 2 * params->max_window_size;
 159
 160         mf->hash_tab = MALLOC(len * sizeof(u32));
 161         if (!mf->hash_tab)
 162                 return false;
 163
 164         if (params->min_match_len == 2) {
 165                 mf->digram_tab = mf->hash_tab + LZ_BT_HASH_LEN;
 166                 mf->child_tab = mf->digram_tab + LZ_BT_DIGRAM_TAB_LEN;
 167         } else {
 168                 mf->digram_tab = NULL;
 169                 mf->child_tab = mf->hash_tab + LZ_BT_HASH_LEN;
 170         }
 171
 172         /* Fill in the CRC32 table if not done already.  */
 173         pthread_once(&crc32_table_filled, crc32_init);
 174
 175         return true;
 176 }
 177
 178 static void
 179 lz_bt_load_window(struct lz_mf *_mf, const u8 window[], u32 size)
 180 {
 181         struct lz_bt *mf = (struct lz_bt *)_mf;
 182         size_t clear_len;
 183
 184         /* Clear hash_tab and digram_tab.
 185          * Note: child_tab need not be cleared.  */
 186         clear_len = LZ_BT_HASH_LEN;
 187         if (mf->digram_tab)
 188                 clear_len += LZ_BT_DIGRAM_TAB_LEN;
 189         memset(mf->hash_tab, 0, clear_len * sizeof(u32));
 190
 191         if (size >= LZ_BT_HASH_BYTES)
 192                 mf->next_hash = lz_bt_hash(window);
 193 }
 194
 195 /*
 196  * Search the binary tree of the current hash code for matches.  At the same
 197  * time, update this tree to add the current position in the window.
 198  *
 199  * @window
 200  *      The window being searched.
 201  * @cur_window_pos
 202  *      The current position in the window.
 203  * @child_tab
 204  *      Table of child pointers for the binary tree.  The children of the node
 205  *      for position 'i' in the window are child_tab[i * 2] and child_tab[i*2 +
 206  *      1].  Zero is reserved for the 'null' value (no child).  Consequently, we
 207  *      don't recognize matches beginning at position 0.   In fact, the node for
 208  *      position 0 in the window will not be used at all, which is just as well
 209  *      because we use 0-based indices which don't work for position 0.
 210  * @cur_match
 211  *      The position in the window at which the binary tree for the current hash
 212  *      code is rooted.  This can be 0, which indicates that the binary tree for
 213  *      the current hash code is empty.
 214  * @min_len
 215  *      Ignore matches shorter than this length.  This must be at least 1.
 216  * @max_len
 217  *      Don't produce any matches longer than this length.  If we find a match
 218  *      this long, terminate the search and return.
 219  * @max_search_depth
 220  *      Stop if we reach this depth in the binary tree.
 221  * @matches
 222  *      The array in which to produce the matches.  The matches will be produced
 223  *      in order of increasing length and increasing offset.  No more than one
 224  *      match shall have any given length, nor shall any match be shorter than
 225  *      @min_len, nor shall any match be longer than @max_len, nor shall any two
 226  *      matches have the same offset.
 227  *
 228  * Returns the number of matches found and written to @matches.
 229  */
 230 static u32
 231 do_search(const u8 window[restrict],
 232           const u32 cur_window_pos,
 233           u32 child_tab[restrict],
 234           u32 cur_match,
 235           const u32 min_len,
 236           const u32 max_len,
 237           const u32 max_search_depth,
 238           struct lz_match matches[restrict])
 239 {
 240         /*
 241          * Here's my explanation of how this code actually works.  Beware: this
 242          * algorithm is a *lot* trickier than searching for matches via hash
 243          * chains.  But it can be significantly better, especially when doing
 244          * "optimal" parsing, which is why it gets used, e.g. in LZMA as well as
 245          * here.
 246          *
 247          * ---------------------------------------------------------------------
 248          *
 249          *                              Data structure
 250          *
 251          * Basically, there is not just one binary tree, but rather one binary
 252          * tree per hash code.  For a given hash code, the binary tree indexes
 253          * previous positions in the window that have that same hash code.  The
 254          * key for each node is the "string", or byte sequence, beginning at the
 255          * corresponding position in the window.
 256          *
 257          * Each tree maintains the invariant that if node C is a child of node
 258          * P, then the window position represented by node C is smaller than
 259          * ("left of") the window position represented by node P.  Equivalently,
 260          * while descending into a tree, the match distances ("offsets") from
 261          * the current position are non-decreasing --- actually strictly
 262          * increasing, because each node represents a unique position.
 263          *
 264          * In addition, not all previous positions sharing the same hash code
 265          * will necessarily be represented in each binary tree; see the
 266          * "Updating" section.
 267          *
 268          * ---------------------------------------------------------------------
 269          *
 270          *                                Searching
 271          *
 272          * Suppose we want to search for LZ77-style matches with the string
 273          * beginning at the current window position and extending for @max_len
 274          * bytes.  To do this, we can search for this string in the binary tree
 275          * for this string's hash code.  Each node visited during the search is
 276          * a potential match.  This method will find the matches efficiently
 277          * because they will converge on the current string, due to the nature
 278          * of the binary search.
 279          *
 280          * Naively, when visiting a node that represents a match of length N, we
 281          * must compare N + 1 bytes in order to determine the length of that
 282          * match and the lexicographic ordering of that match relative to the
 283          * current string (which determines whether we need to step left or
 284          * right into the next level of the tree, as per the standard binary
 285          * tree search algorithm).  However, as an optimization, we need not
 286          * explicitly examine the full length of the match at each node.  To see
 287          * that this is true, suppose that we examine a node during the search,
 288          * and we find that the corresponding match is less (alt. greater) than
 289          * the current string.  Then, because of how binary tree search
 290          * operates, the match must be lexicographically greater (alt. lesser)
 291          * than any ancestor node that corresponded to a match lexicographically
 292          * lesser (alt. greater) than the current string.  Therefore, the match
 293          * must be at least as long as the match for any such ancestor node.
 294          * Therefore, the lengths of lexicographically-lesser (alt. greater)
 295          * matches must be non-decreasing as they are encountered by the tree
 296          * search.
 297          *
 298          * Using this observation, we can maintain two variables,
 299          * 'longest_lt_match_len' and 'longest_gt_match_len', that represent the
 300          * length of the longest lexicographically lesser and greater,
 301          * respectively, match that has been examined so far.   Then, when
 302          * examining a new match, we need only start comparing at the index
 303          * min(longest_lt_match_len, longest_gt_match_len) byte.  Note that we
 304          * cannot know beforehand whether the match will be lexicographically
 305          * lesser or greater, hence the need for taking the minimum of these two
 306          * lengths.
 307          *
 308          * As noted earlier, as we descend into the tree, the potential matches
 309          * will have strictly increasing offsets.  To make things faster for
 310          * higher-level parsing / match-choosing code, we do not want to return
 311          * a shorter match that has a larger offset than a longer match.  This
 312          * is because a longer match can always be truncated to a shorter match
 313          * if needed, and smaller offsets usually (depending on the compression
 314          * format) take fewer bits to encode than larger offsets.
 315          * Consequently, we keep a potential match only if it is longer than the
 316          * previous longest match that has been found.  This has the added
 317          * advantage of producing the array of matches sorted by strictly
 318          * increasing lengths as well as strictly decreasing offsets.
 319          *
 320          * In degenerate cases, the binary tree might become severely
 321          * unbalanced.  To prevent excessive running times, we stop immediately
 322          * (and return any matches that happen to have been found so far) if the
 323          * current depth exceeds @max_search_depth.  Note that this cutoff can
 324          * occur before the longest match has been found, which is usually bad
 325          * for the compression ratio.
 326          *
 327          * ---------------------------------------------------------------------
 328          *
 329          *                              Updating
 330          *
 331          * I've explained how to find matches by searching the binary tree of
 332          * the current hash code.  But how do we get the binary tree in the
 333          * first place?  Since the tree is built incrementally, the real
 334          * question is how do we update the tree to "add" the current window
 335          * position.
 336          *
 337          * The tree maintains the invariant that a node's parent always has a
 338          * larger position (a.k.a. smaller match offset) than itself.
 339          * Therefore, the root node must always have the largest position; and
 340          * since the current position is larger than any previous position, the
 341          * current position must become the root of the tree.
 342          *
 343          * A correct, but silly, approach is to simply add the previous root as
 344          * a child of the new root, using either the left or right child pointer
 345          * depending on the lexicographic ordering of the strings.  This works,
 346          * but it really just produces a linked list, so it's not sufficient.
 347          *
 348          * Instead, we can initially mark the new root's left child pointer as
 349          * "pending (less than)" and its right child pointer as "pending
 350          * (greater than)".  Then, during the search, when we examine a match
 351          * that is lexicographically less than the current string, we link the
 352          * "pending (less than)" pointer to the node of that match, then set the
 353          * right child pointer of *that* node as "pending (less than)".
 354          * Similarly, when we examine a match that is lexicographically greater
 355          * than the current string, we link the "pending (greater than)" pointer
 356          * to the node of that match, then set the left child pointer of *that*
 357          * node as "pending (greater than)".
 358          *
 359          * If the search terminates before the current string is found (up to a
 360          * precision of @max_len bytes), then we set "pending (less than)" and
 361          * "pending (greater than)" to point to nothing.  Alternatively, if the
 362          * search terminates due to finding the current string (up to a
 363          * precision of @max_len bytes), then we set "pending (less than)" and
 364          * "pending (greater than)" to point to the appropriate children of that
 365          * match.
 366          *
 367          * Why does this work?  Well, we can think of it this way: the "pending
 368          * (less than)" pointer is reserved for the next match we find that is
 369          * lexicographically *less than* the current string, and the "pending
 370          * (greater than)" pointer is reserved for the next match we find that
 371          * is lexicographically *greater than* the current string.  This
 372          * explains why when we find a match that is lexicographically less than
 373          * the current string, we set the "pending (less than)" pointer to point
 374          * to that match.  And the reason we change "pending (less than)" to the
 375          * right pointer of the match in that case is because we're walking down
 376          * into that subtree, and the next match lexicographically *less than*
 377          * the current string is guaranteed to be lexicographically *greater
 378          * than* that match, so it should be set as the right subtree of that
 379          * match.  But the next match in that subtree that is lexicographically
 380          * *greater than* the current string will need to be moved to the
 381          * "pending (greater than)" pointer farther up the tree.
 382          *
 383          * It's complicated, but it should make sense if you think about it.
 384          * The algorithm basically just moves subtrees into the correct
 385          * locations as it walks down the tree for the search.  But also, if the
 386          * algorithm actually finds a match of length @max_len with the current
 387          * string, it no longer needs that match node and can discard it.  The
 388          * algorithm also will discard nodes if the search terminates due to the
 389          * depth limit.  For these reasons, the binary tree might not, in fact,
 390          * contain all valid positions.
 391          */
 392
 393         u32 num_matches = 0;
 394         u32 longest_lt_match_len = 0;
 395         u32 longest_gt_match_len = 0;
 396         u32 longest_match_len = min_len - 1;
 397         u32 *pending_lt_ptr = &child_tab[cur_window_pos * 2 + 0];
 398         u32 *pending_gt_ptr = &child_tab[cur_window_pos * 2 + 1];
 399         const u8 *strptr = &window[cur_window_pos];
 400         u32 depth_remaining = max_search_depth;
 401         for (;;) {
 402                 const u8 *matchptr;
 403                 u32 len;
 404
 405                 if (depth_remaining-- == 0 || cur_match == 0) {
 406                         *pending_lt_ptr = 0;
 407                         *pending_gt_ptr = 0;
 408                         return num_matches;
 409                 }
 410
 411                 matchptr = &window[cur_match];
 412                 len = min(longest_lt_match_len, longest_gt_match_len);
 413
 414                 if (matchptr[len] == strptr[len]) {
 415
 416                         if (++len != max_len && matchptr[len] == strptr[len])
 417                                 while (++len != max_len)
 418                                         if (matchptr[len] != strptr[len])
 419                                                 break;
 420
 421                         if (len > longest_match_len) {
 422                                 longest_match_len = len;
 423
 424                                 matches[num_matches++] = (struct lz_match) {
 425                                         .len = len,
 426                                         .offset = strptr - matchptr,
 427                                 };
 428
 429                                 if (len == max_len) {
 430                                         *pending_lt_ptr = child_tab[cur_match * 2 + 0];
 431                                         *pending_gt_ptr = child_tab[cur_match * 2 + 1];
 432                                         return num_matches;
 433                                 }
 434                         }
 435                 }
 436
 437                 if (matchptr[len] < strptr[len]) {
 438                         *pending_lt_ptr = cur_match;
 439                         pending_lt_ptr = &child_tab[cur_match * 2 + 1];
 440                         cur_match = *pending_lt_ptr;
 441                         longest_lt_match_len = len;
 442                 } else {
 443                         *pending_gt_ptr = cur_match;
 444                         pending_gt_ptr = &child_tab[cur_match * 2 + 0];
 445                         cur_match = *pending_gt_ptr;
 446                         longest_gt_match_len = len;
 447                 }
 448         }
 449 }
 450
 451 static u32
 452 lz_bt_get_matches(struct lz_mf *_mf, struct lz_match matches[])
 453 {
 454         struct lz_bt *mf = (struct lz_bt *)_mf;
 455         const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
 456         u32 hash;
 457         u32 cur_match;
 458         u32 min_len;
 459         u32 num_matches = 0;
 460
 461         if (unlikely(bytes_remaining <= mf->base.params.min_match_len + 1))
 462                 goto out;
 463
 464         if (mf->digram_tab) {
 465                 /* Search the digram table for a length 2 match.  */
 466
 467                 const u16 digram = *(const u16 *)lz_mf_get_window_ptr(&mf->base);
 468                 cur_match = mf->digram_tab[digram];
 469                 mf->digram_tab[digram] = mf->base.cur_window_pos;
 470
 471                 /* We're only interested in matches of length exactly 2, since
 472                  * those won't be found during the binary tree search.
 473                  *
 474                  * Note: it's possible to extend this match as much as possible,
 475                  * then use its length plus 1 as min_len for the binary tree
 476                  * search.  However I found this actually *reduced* performance
 477                  * slightly, evidently because the binary tree still needs to be
 478                  * searched/updated starting from the root in either case.  */
 479                 if (cur_match != 0 &&
 480                     (mf->base.cur_window[cur_match + 2] !=
 481                      mf->base.cur_window[mf->base.cur_window_pos + 2]))
 482                 {
 483                         matches[num_matches++] = (struct lz_match) {
 484                                 .len = 2,
 485                                 .offset = mf->base.cur_window_pos - cur_match,
 486                         };
 487                 }
 488                 min_len = 3;
 489         } else {
 490                 min_len = mf->base.params.min_match_len;
 491         }
 492
 493         hash = mf->next_hash;
 494         mf->next_hash = lz_bt_hash(lz_mf_get_window_ptr(&mf->base) + 1);
 495         prefetch(&mf->hash_tab[mf->next_hash]);
 496         cur_match = mf->hash_tab[hash];
 497         mf->hash_tab[hash] = mf->base.cur_window_pos;
 498
 499         /* Search the binary tree of 'hash' for matches while re-rooting it at
 500          * the current position.  */
 501         num_matches += do_search(mf->base.cur_window,
 502                                  mf->base.cur_window_pos,
 503                                  mf->child_tab,
 504                                  cur_match,
 505                                  min_len,
 506                                  min(bytes_remaining, mf->base.params.nice_match_len),
 507                                  mf->base.params.max_search_depth,
 508                                  &matches[num_matches]);
 509
 510         /* If the longest match is @nice_match_len in length, it may have been
 511          * truncated.  Try extending it up to the maximum match length.  */
 512         if (num_matches != 0 &&
 513             matches[num_matches - 1].len == mf->base.params.nice_match_len)
 514         {
 515                 const u8 * const strptr = lz_mf_get_window_ptr(&mf->base);
 516                 const u8 * const matchptr = strptr - matches[num_matches - 1].offset;
 517                 const u32 len_limit = min(bytes_remaining, mf->base.params.max_match_len);
 518                 u32 len;
 519
 520                 len = matches[num_matches - 1].len;
 521                 while (len < len_limit && strptr[len] == matchptr[len])
 522                         len++;
 523                 matches[num_matches - 1].len = len;
 524         }
 525
 526 out:
 527         /* Advance to the next position.  */
 528         mf->base.cur_window_pos++;
 529
 530         /* Return the number of matches found.  */
 531         return num_matches;
 532 }
 533
 534 /* This is the same as do_search(), but it does not save any matches.
 535  * See do_search() for explanatory comments.  */
 536 static void
 537 do_skip(const u8 window[restrict],
 538         const u32 cur_window_pos,
 539         u32 child_tab[restrict],
 540         u32 cur_match,
 541         const u32 max_len,
 542         const u32 max_search_depth)
 543 {
 544         u32 longest_lt_match_len = 0;
 545         u32 longest_gt_match_len = 0;
 546         u32 *pending_lt_ptr = &child_tab[cur_window_pos * 2 + 0];
 547         u32 *pending_gt_ptr = &child_tab[cur_window_pos * 2 + 1];
 548         const u8 * const strptr = &window[cur_window_pos];
 549         u32 depth_remaining = max_search_depth;
 550         for (;;) {
 551                 const u8 *matchptr;
 552                 u32 len;
 553
 554                 if (depth_remaining-- == 0 || cur_match == 0) {
 555                         *pending_lt_ptr = 0;
 556                         *pending_gt_ptr = 0;
 557                         return;
 558                 }
 559
 560                 matchptr = &window[cur_match];
 561                 len = min(longest_lt_match_len, longest_gt_match_len);
 562
 563                 if (matchptr[len] == strptr[len]) {
 564                         do {
 565                                 if (++len == max_len) {
 566                                         *pending_lt_ptr = child_tab[cur_match * 2 + 0];
 567                                         *pending_gt_ptr = child_tab[cur_match * 2 + 1];
 568                                         return;
 569                                 }
 570                         } while (matchptr[len] == strptr[len]);
 571                 }
 572                 if (matchptr[len] < strptr[len]) {
 573                         *pending_lt_ptr = cur_match;
 574                         pending_lt_ptr = &child_tab[cur_match * 2 + 1];
 575                         cur_match = *pending_lt_ptr;
 576                         longest_lt_match_len = len;
 577                 } else {
 578                         *pending_gt_ptr = cur_match;
 579                         pending_gt_ptr = &child_tab[cur_match * 2 + 0];
 580                         cur_match = *pending_gt_ptr;
 581                         longest_gt_match_len = len;
 582                 }
 583         }
 584 }
 585
 586 static void
 587 lz_bt_skip_position(struct lz_bt *mf)
 588 {
 589         const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
 590         u32 hash;
 591         u32 cur_match;
 592
 593         if (unlikely(bytes_remaining <= mf->base.params.min_match_len + 1))
 594                 goto out;
 595
 596         /* Update the digram table.  */
 597         if (mf->digram_tab) {
 598                 const u16 digram = *(const u16 *)lz_mf_get_window_ptr(&mf->base);
 599                 mf->digram_tab[digram] = mf->base.cur_window_pos;
 600         }
 601
 602         /* Update the hash table.  */
 603         hash = mf->next_hash;
 604         mf->next_hash = lz_bt_hash(lz_mf_get_window_ptr(&mf->base) + 1);
 605         prefetch(&mf->hash_tab[mf->next_hash]);
 606         cur_match = mf->hash_tab[hash];
 607         mf->hash_tab[hash] = mf->base.cur_window_pos;
 608
 609         /* Update the binary tree for the appropriate hash code.  */
 610         do_skip(mf->base.cur_window,
 611                 mf->base.cur_window_pos,
 612                 mf->child_tab,
 613                 cur_match,
 614                 min(bytes_remaining, mf->base.params.nice_match_len),
 615                 mf->base.params.max_search_depth);
 616
 617 out:
 618         /* Advance to the next position.  */
 619         mf->base.cur_window_pos++;
 620 }
 621
 622 static void
 623 lz_bt_skip_positions(struct lz_mf *_mf, u32 n)
 624 {
 625         struct lz_bt *mf = (struct lz_bt *)_mf;
 626
 627         do {
 628                 lz_bt_skip_position(mf);
 629         } while (--n);
 630 }
 631
 632 static void
 633 lz_bt_destroy(struct lz_mf *_mf)
 634 {
 635         struct lz_bt *mf = (struct lz_bt *)_mf;
 636
 637         FREE(mf->hash_tab);
 638         /* mf->hash_tab shares storage with mf->digram_tab and mf->child_tab. */
 639 }
 640
 641 const struct lz_mf_ops lz_binary_trees_ops = {
 642         .params_valid      = lz_bt_params_valid,
 643         .get_needed_memory = lz_bt_get_needed_memory,
 644         .init              = lz_bt_init,
 645         .load_window       = lz_bt_load_window,
 646         .get_matches       = lz_bt_get_matches,
 647         .skip_positions    = lz_bt_skip_positions,
 648         .destroy           = lz_bt_destroy,
 649         .struct_size       = sizeof(struct lz_bt),
 650 };