wimlib.net Git - wimlib/blob - src/lz_binary_trees.c

   1 /*
   2  * lz_binary_trees.c
   3  *
   4  * Binary tree match-finder for Lempel-Ziv compression.
   5  *
   6  * Copyright (c) 2014 Eric Biggers.  All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  *
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  *
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
  23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 /*
  33  * Note: the binary tree search/update algorithm is based on LzFind.c from
  34  * 7-Zip, which was written by Igor Pavlov and released into the public domain.
  35  */
  36
  37 #ifdef HAVE_CONFIG_H
  38 #  include "config.h"
  39 #endif
  40
  41 #include "wimlib/lz_extend.h"
  42 #include "wimlib/lz_hash3.h"
  43 #include "wimlib/lz_mf.h"
  44 #include "wimlib/util.h"
  45
  46 #include <string.h>
  47
  48 /* log2 of the number of buckets in the hash table.  This can be changed.  */
  49 #define LZ_BT_HASH_ORDER 16
  50
  51 #define LZ_BT_HASH_LEN (1 << LZ_BT_HASH_ORDER)
  52
  53 /* Number of entries in the digram table.
  54  *
  55  * Note:  You rarely get length-2 matches if you use length-3 hashing.  But
  56  * since binary trees are typically used for higher compression ratios than hash
  57  * chains, it is helpful for this match-finder to find length-2 matches as well.
  58  * Therefore this match-finder also uses a digram table to find length-2 matches
  59  * when the minimum match length is 2.  */
  60 #define LZ_BT_DIGRAM_TAB_LEN    (256 * 256)
  61
  62 struct lz_bt {
  63         struct lz_mf base;
  64         u32 *hash_tab;
  65         u32 *digram_tab;
  66         u32 *child_tab;
  67         u32 next_hash;
  68         u16 next_digram;
  69 };
  70
  71 static inline u32
  72 lz_bt_hash(const u8 *p)
  73 {
  74         return lz_hash(p, LZ_BT_HASH_ORDER);
  75 }
  76
  77 static void
  78 lz_bt_set_default_params(struct lz_mf_params *params)
  79 {
  80         if (params->min_match_len == 0)
  81                 params->min_match_len = 2;
  82
  83         if (params->max_match_len == 0)
  84                 params->max_match_len = UINT32_MAX;
  85
  86         if (params->max_search_depth == 0)
  87                 params->max_search_depth = 50;
  88
  89         if (params->nice_match_len == 0)
  90                 params->nice_match_len = 24;
  91
  92         if (params->nice_match_len < params->min_match_len)
  93                 params->nice_match_len = params->min_match_len;
  94
  95         if (params->nice_match_len > params->max_match_len)
  96                 params->nice_match_len = params->max_match_len;
  97 }
  98
  99 static bool
 100 lz_bt_params_valid(const struct lz_mf_params *params)
 101 {
 102         return true;
 103 }
 104
 105 static u64
 106 lz_bt_get_needed_memory(u32 max_window_size)
 107 {
 108         u64 len = 0;
 109
 110         len += LZ_BT_HASH_LEN;           /* hash_tab */
 111         len += LZ_BT_DIGRAM_TAB_LEN;     /* digram_tab */
 112         len += 2 * (u64)max_window_size; /* child_tab */
 113
 114         return len * sizeof(u32);
 115 }
 116
 117 static bool
 118 lz_bt_init(struct lz_mf *_mf)
 119 {
 120         struct lz_bt *mf = (struct lz_bt *)_mf;
 121         struct lz_mf_params *params = &mf->base.params;
 122         size_t len = 0;
 123
 124         lz_bt_set_default_params(params);
 125
 126         /* Allocate space for 'hash_tab', 'digram_tab', and 'child_tab'.  */
 127
 128         len += LZ_BT_HASH_LEN;
 129         if (params->min_match_len == 2)
 130                 len += LZ_BT_DIGRAM_TAB_LEN;
 131         len += 2 * params->max_window_size;
 132
 133         mf->hash_tab = MALLOC(len * sizeof(u32));
 134         if (!mf->hash_tab)
 135                 return false;
 136
 137         if (params->min_match_len == 2) {
 138                 mf->digram_tab = mf->hash_tab + LZ_BT_HASH_LEN;
 139                 mf->child_tab = mf->digram_tab + LZ_BT_DIGRAM_TAB_LEN;
 140         } else {
 141                 mf->child_tab = mf->hash_tab + LZ_BT_HASH_LEN;
 142         }
 143
 144         return true;
 145 }
 146
 147 static void
 148 lz_bt_load_window(struct lz_mf *_mf, const u8 window[], u32 size)
 149 {
 150         struct lz_bt *mf = (struct lz_bt *)_mf;
 151         size_t clear_len;
 152
 153         /* Clear hash_tab and digram_tab.
 154          * Note: child_tab need not be cleared.  */
 155         clear_len = LZ_BT_HASH_LEN;
 156         if (mf->digram_tab)
 157                 clear_len += LZ_BT_DIGRAM_TAB_LEN;
 158         memset(mf->hash_tab, 0, clear_len * sizeof(u32));
 159 }
 160
 161 /*
 162  * Search the binary tree of the current hash code for matches.  At the same
 163  * time, update this tree to add the current position in the window.
 164  *
 165  * @window
 166  *      The window being searched.
 167  * @cur_pos
 168  *      The current position in the window.
 169  * @child_tab
 170  *      Table of child pointers for the binary tree.  The children of the node
 171  *      for position 'i' in the window are child_tab[i * 2] and child_tab[i*2 +
 172  *      1].  Zero is reserved for the 'null' value (no child).  Consequently, we
 173  *      don't recognize matches beginning at position 0.   In fact, the node for
 174  *      position 0 in the window will not be used at all, which is just as well
 175  *      because we use 0-based indices which don't work for position 0.
 176  * @cur_match
 177  *      The position in the window at which the binary tree for the current hash
 178  *      code is rooted.  This can be 0, which indicates that the binary tree for
 179  *      the current hash code is empty.
 180  * @min_len
 181  *      Ignore matches shorter than this length.  This must be at least 1.
 182  * @nice_len
 183  *      Stop searching if a match of this length or longer is found.  This must
 184  *      be less than or equal to @max_len.
 185  * @max_len
 186  *      Maximum length of matches to return.  This can be longer than @nice_len,
 187  *      in which case a match of length @nice_len will still cause the search to
 188  *      be terminated, but the match will be extended up to @max_len bytes
 189  *      first.
 190  * @max_search_depth
 191  *      Stop if we reach this depth in the binary tree.
 192  * @matches
 193  *      The array in which to produce the matches.  The matches will be produced
 194  *      in order of increasing length and increasing offset.  No more than one
 195  *      match shall have any given length, nor shall any match be shorter than
 196  *      @min_len, nor shall any match be longer than @max_len, nor shall any two
 197  *      matches have the same offset.
 198  *
 199  * Returns a pointer to the next free slot in @matches.
 200  */
 201 static struct lz_match *
 202 do_search(const u8 window[restrict],
 203           const u32 cur_pos,
 204           u32 child_tab[restrict],
 205           u32 cur_match,
 206           const u32 min_len,
 207           const u32 nice_len,
 208           const u32 max_len,
 209           const u32 max_search_depth,
 210           struct lz_match *lz_matchptr)
 211 {
 212         /*
 213          * Here's my explanation of how this code actually works.  Beware: this
 214          * algorithm is a *lot* trickier than searching for matches via hash
 215          * chains.  But it can be significantly better, especially when doing
 216          * "optimal" parsing, which is why it gets used, e.g. in LZMA as well as
 217          * here.
 218          *
 219          * ---------------------------------------------------------------------
 220          *
 221          *                              Data structure
 222          *
 223          * Basically, there is not just one binary tree, but rather one binary
 224          * tree per hash code.  For a given hash code, the binary tree indexes
 225          * previous positions in the window that have that same hash code.  The
 226          * key for each node is the "string", or byte sequence, beginning at the
 227          * corresponding position in the window.
 228          *
 229          * Each tree maintains the invariant that if node C is a child of node
 230          * P, then the window position represented by node C is smaller than
 231          * ("left of") the window position represented by node P.  Equivalently,
 232          * while descending into a tree, the match distances ("offsets") from
 233          * the current position are non-decreasing --- actually strictly
 234          * increasing, because each node represents a unique position.
 235          *
 236          * In addition, not all previous positions sharing the same hash code
 237          * will necessarily be represented in each binary tree; see the
 238          * "Updating" section.
 239          *
 240          * ---------------------------------------------------------------------
 241          *
 242          *                                Searching
 243          *
 244          * Suppose we want to search for LZ77-style matches with the string
 245          * beginning at the current window position and extending for @max_len
 246          * bytes.  To do this, we can search for this string in the binary tree
 247          * for this string's hash code.  Each node visited during the search is
 248          * a potential match.  This method will find the matches efficiently
 249          * because they will converge on the current string, due to the nature
 250          * of the binary search.
 251          *
 252          * Naively, when visiting a node that represents a match of length N, we
 253          * must compare N + 1 bytes in order to determine the length of that
 254          * match and the lexicographic ordering of that match relative to the
 255          * current string (which determines whether we need to step left or
 256          * right into the next level of the tree, as per the standard binary
 257          * tree search algorithm).  However, as an optimization, we need not
 258          * explicitly examine the full length of the match at each node.  To see
 259          * that this is true, suppose that we examine a node during the search,
 260          * and we find that the corresponding match is less (alt. greater) than
 261          * the current string.  Then, because of how binary tree search
 262          * operates, the match must be lexicographically greater (alt. lesser)
 263          * than any ancestor node that corresponded to a match lexicographically
 264          * lesser (alt. greater) than the current string.  Therefore, the match
 265          * must be at least as long as the match for any such ancestor node.
 266          * Therefore, the lengths of lexicographically-lesser (alt. greater)
 267          * matches must be non-decreasing as they are encountered by the tree
 268          * search.
 269          *
 270          * Using this observation, we can maintain two variables, 'best_lt_len'
 271          * and 'best_gt_len', that represent the length of the longest
 272          * lexicographically lesser and greater, respectively, match that has
 273          * been examined so far.   Then, when examining a new match, we need
 274          * only start comparing at the index min(best_lt_len, best_gt_len) byte.
 275          * Note that we cannot know beforehand whether the match will be
 276          * lexicographically lesser or greater, hence the need for taking the
 277          * minimum of these two lengths.
 278          *
 279          * As noted earlier, as we descend into the tree, the potential matches
 280          * will have strictly increasing offsets.  To make things faster for
 281          * higher-level parsing / match-choosing code, we do not want to return
 282          * a shorter match that has a larger offset than a longer match.  This
 283          * is because a longer match can always be truncated to a shorter match
 284          * if needed, and smaller offsets usually (depending on the compression
 285          * format) take fewer bits to encode than larger offsets.
 286          * Consequently, we keep a potential match only if it is longer than the
 287          * previous longest match that has been found.  This has the added
 288          * advantage of producing the array of matches sorted by strictly
 289          * increasing lengths as well as strictly decreasing offsets.
 290          *
 291          * In degenerate cases, the binary tree might become severely
 292          * unbalanced.  To prevent excessive running times, we stop immediately
 293          * (and return any matches that happen to have been found so far) if the
 294          * current depth exceeds @max_search_depth.  Note that this cutoff can
 295          * occur before the longest match has been found, which is usually bad
 296          * for the compression ratio.
 297          *
 298          * ---------------------------------------------------------------------
 299          *
 300          *                              Updating
 301          *
 302          * I've explained how to find matches by searching the binary tree of
 303          * the current hash code.  But how do we get the binary tree in the
 304          * first place?  Since the tree is built incrementally, the real
 305          * question is how do we update the tree to "add" the current window
 306          * position.
 307          *
 308          * The tree maintains the invariant that a node's parent always has a
 309          * larger position (a.k.a. smaller match offset) than itself.
 310          * Therefore, the root node must always have the largest position; and
 311          * since the current position is larger than any previous position, the
 312          * current position must become the root of the tree.
 313          *
 314          * A correct, but silly, approach is to simply add the previous root as
 315          * a child of the new root, using either the left or right child pointer
 316          * depending on the lexicographic ordering of the strings.  This works,
 317          * but it really just produces a linked list, so it's not sufficient.
 318          *
 319          * Instead, we can initially mark the new root's left child pointer as
 320          * "pending (less than)" and its right child pointer as "pending
 321          * (greater than)".  Then, during the search, when we examine a match
 322          * that is lexicographically less than the current string, we link the
 323          * "pending (less than)" pointer to the node of that match, then set the
 324          * right child pointer of *that* node as "pending (less than)".
 325          * Similarly, when we examine a match that is lexicographically greater
 326          * than the current string, we link the "pending (greater than)" pointer
 327          * to the node of that match, then set the left child pointer of *that*
 328          * node as "pending (greater than)".
 329          *
 330          * If the search terminates before the current string is found (up to a
 331          * precision of @nice_len bytes), then we set "pending (less than)" and
 332          * "pending (greater than)" to point to nothing.  Alternatively, if the
 333          * search terminates due to finding the current string (up to a
 334          * precision of @nice_len bytes), then we set "pending (less than)" and
 335          * "pending (greater than)" to point to the appropriate children of that
 336          * match.
 337          *
 338          * Why does this work?  Well, we can think of it this way: the "pending
 339          * (less than)" pointer is reserved for the next match we find that is
 340          * lexicographically *less than* the current string, and the "pending
 341          * (greater than)" pointer is reserved for the next match we find that
 342          * is lexicographically *greater than* the current string.  This
 343          * explains why when we find a match that is lexicographically less than
 344          * the current string, we set the "pending (less than)" pointer to point
 345          * to that match.  And the reason we change "pending (less than)" to the
 346          * right pointer of the match in that case is because we're walking down
 347          * into that subtree, and the next match lexicographically *less than*
 348          * the current string is guaranteed to be lexicographically *greater
 349          * than* that match, so it should be set as the right subtree of that
 350          * match.  But the next match in that subtree that is lexicographically
 351          * *greater than* the current string will need to be moved to the
 352          * "pending (greater than)" pointer farther up the tree.
 353          *
 354          * It's complicated, but it should make sense if you think about it.
 355          * The algorithm basically just moves subtrees into the correct
 356          * locations as it walks down the tree for the search.  But also, if the
 357          * algorithm actually finds a match of length @nice_len with the current
 358          * string, it no longer needs that match node and can discard it.  The
 359          * algorithm also will discard nodes if the search terminates due to the
 360          * depth limit.  For these reasons, the binary tree might not, in fact,
 361          * contain all valid positions.
 362          */
 363
 364         u32 best_lt_len = 0;
 365         u32 best_gt_len = 0;
 366         u32 best_len = min_len - 1;
 367         u32 *pending_lt_ptr = &child_tab[cur_pos * 2 + 0];
 368         u32 *pending_gt_ptr = &child_tab[cur_pos * 2 + 1];
 369         const u8 * const strptr = &window[cur_pos];
 370         u32 depth_remaining = max_search_depth;
 371
 372         for (;;) {
 373                 const u8 *matchptr;
 374                 u32 len;
 375
 376                 if (cur_match == 0 || depth_remaining-- == 0) {
 377                         *pending_lt_ptr = 0;
 378                         *pending_gt_ptr = 0;
 379                         return lz_matchptr;
 380                 }
 381
 382                 matchptr = &window[cur_match];
 383                 len = min(best_lt_len, best_gt_len);
 384
 385                 if (matchptr[len] == strptr[len]) {
 386
 387                         len = lz_extend(strptr, matchptr, len + 1, max_len);
 388
 389                         if (len > best_len) {
 390                                 best_len = len;
 391
 392                                 *lz_matchptr++ = (struct lz_match) {
 393                                         .len = len,
 394                                         .offset = strptr - matchptr,
 395                                 };
 396
 397                                 if (len >= nice_len) {
 398                                         *pending_lt_ptr = child_tab[cur_match * 2 + 0];
 399                                         *pending_gt_ptr = child_tab[cur_match * 2 + 1];
 400                                         return lz_matchptr;
 401                                 }
 402                         }
 403                 }
 404
 405                 if (matchptr[len] < strptr[len]) {
 406                         *pending_lt_ptr = cur_match;
 407                         pending_lt_ptr = &child_tab[cur_match * 2 + 1];
 408                         cur_match = *pending_lt_ptr;
 409                         best_lt_len = len;
 410                 } else {
 411                         *pending_gt_ptr = cur_match;
 412                         pending_gt_ptr = &child_tab[cur_match * 2 + 0];
 413                         cur_match = *pending_gt_ptr;
 414                         best_gt_len = len;
 415                 }
 416         }
 417 }
 418
 419 static u32
 420 lz_bt_get_matches(struct lz_mf *_mf, struct lz_match matches[])
 421 {
 422         struct lz_bt *mf = (struct lz_bt *)_mf;
 423         const u8 * const window = mf->base.cur_window;
 424         const u32 cur_pos = mf->base.cur_window_pos++;
 425         const u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
 426         u32 min_len;
 427         const u32 max_len = min(bytes_remaining, mf->base.params.max_match_len);
 428         const u32 nice_len = min(max_len, mf->base.params.nice_match_len);
 429         u32 hash;
 430         u32 cur_match;
 431         struct lz_match *lz_matchptr = matches;
 432
 433         if (unlikely(bytes_remaining < LZ_HASH_REQUIRED_NBYTES + 1))
 434                 return 0;
 435
 436         if (mf->digram_tab) {
 437                 /* Search the digram table for a length 2 match.  */
 438
 439                 const u16 digram = mf->next_digram;
 440                 mf->next_digram = load_u16_unaligned(&window[cur_pos + 1]);
 441                 prefetch(&mf->digram_tab[mf->next_digram]);
 442                 cur_match = mf->digram_tab[digram];
 443                 mf->digram_tab[digram] = cur_pos;
 444
 445                 /* We're only interested in matches of length exactly 2, since
 446                  * those won't be found during the binary tree search.
 447                  *
 448                  * Note: it's possible to extend this match as much as possible,
 449                  * then use its length plus 1 as min_len for the binary tree
 450                  * search.  However I found this actually *reduced* performance
 451                  * slightly, evidently because the binary tree still needs to be
 452                  * searched/updated starting from the root in either case.  */
 453                 if (cur_match != 0 && window[cur_match + 2] != window[cur_pos + 2]) {
 454                         *lz_matchptr++ = (struct lz_match) {
 455                                 .len = 2,
 456                                 .offset = cur_pos - cur_match,
 457                         };
 458                 }
 459                 min_len = 3;
 460         } else {
 461                 min_len = mf->base.params.min_match_len;
 462         }
 463
 464         hash = mf->next_hash;
 465         mf->next_hash = lz_bt_hash(&window[cur_pos + 1]);
 466         prefetch(&mf->hash_tab[mf->next_hash]);
 467         cur_match = mf->hash_tab[hash];
 468         mf->hash_tab[hash] = cur_pos;
 469
 470         /* Search the binary tree of 'hash' for matches while re-rooting it at
 471          * the current position.  */
 472         lz_matchptr = do_search(window,
 473                                 cur_pos,
 474                                 mf->child_tab,
 475                                 cur_match,
 476                                 min_len,
 477                                 nice_len,
 478                                 max_len,
 479                                 mf->base.params.max_search_depth,
 480                                 lz_matchptr);
 481
 482         /* Return the number of matches found.  */
 483         return lz_matchptr - matches;
 484 }
 485
 486 /* This is very similar to do_search(), but it does not save any matches.
 487  * See do_search() for explanatory comments.  */
 488 static void
 489 do_skip(const u8 window[restrict],
 490         const u32 cur_pos,
 491         u32 child_tab[restrict],
 492         u32 cur_match,
 493         const u32 nice_len,
 494         const u32 max_search_depth)
 495 {
 496         u32 best_lt_len = 0;
 497         u32 best_gt_len = 0;
 498         u32 *pending_lt_ptr = &child_tab[cur_pos * 2 + 0];
 499         u32 *pending_gt_ptr = &child_tab[cur_pos * 2 + 1];
 500         const u8 * const strptr = &window[cur_pos];
 501         u32 depth_remaining = max_search_depth;
 502         for (;;) {
 503                 const u8 *matchptr;
 504                 u32 len;
 505
 506                 if (cur_match == 0 || depth_remaining-- == 0) {
 507                         *pending_lt_ptr = 0;
 508                         *pending_gt_ptr = 0;
 509                         return;
 510                 }
 511
 512                 matchptr = &window[cur_match];
 513                 len = min(best_lt_len, best_gt_len);
 514
 515                 if (matchptr[len] == strptr[len]) {
 516                         len = lz_extend(strptr, matchptr, len + 1, nice_len);
 517                         if (len == nice_len) {
 518                                 *pending_lt_ptr = child_tab[cur_match * 2 + 0];
 519                                 *pending_gt_ptr = child_tab[cur_match * 2 + 1];
 520                                 return;
 521                         }
 522                 }
 523                 if (matchptr[len] < strptr[len]) {
 524                         *pending_lt_ptr = cur_match;
 525                         pending_lt_ptr = &child_tab[cur_match * 2 + 1];
 526                         cur_match = *pending_lt_ptr;
 527                         best_lt_len = len;
 528                 } else {
 529                         *pending_gt_ptr = cur_match;
 530                         pending_gt_ptr = &child_tab[cur_match * 2 + 0];
 531                         cur_match = *pending_gt_ptr;
 532                         best_gt_len = len;
 533                 }
 534         }
 535 }
 536
 537 static void
 538 lz_bt_skip_positions(struct lz_mf *_mf, u32 n)
 539 {
 540         struct lz_bt *mf = (struct lz_bt *)_mf;
 541         const u8 * const window = mf->base.cur_window;
 542         u32 cur_pos = mf->base.cur_window_pos;
 543         u32 end_pos = cur_pos + n;
 544         u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
 545         u32 hash;
 546         u32 next_hash;
 547         u32 cur_match;
 548         u16 digram;
 549         u16 next_digram;
 550
 551         mf->base.cur_window_pos = end_pos;
 552
 553         if (unlikely(bytes_remaining < n + (LZ_HASH_REQUIRED_NBYTES + 1) - 1)) {
 554                 /* Nearing end of window.  */
 555                 if (unlikely(bytes_remaining < (LZ_HASH_REQUIRED_NBYTES + 1)))
 556                         return;
 557
 558                 end_pos = cur_pos + bytes_remaining - (LZ_HASH_REQUIRED_NBYTES + 1) + 1;
 559         }
 560
 561         next_hash = mf->next_hash;
 562         next_digram = mf->next_digram;
 563         do {
 564                 if (mf->digram_tab) {
 565                         digram = next_digram;
 566                         next_digram = load_u16_unaligned(&window[cur_pos + 1]);
 567                         mf->digram_tab[digram] = cur_pos;
 568                 }
 569
 570                 hash = next_hash;
 571                 next_hash = lz_bt_hash(&window[cur_pos + 1]);
 572                 cur_match = mf->hash_tab[hash];
 573                 mf->hash_tab[hash] = cur_pos;
 574
 575                 /* Update the binary tree for the appropriate hash code.  */
 576                 do_skip(window,
 577                         cur_pos,
 578                         mf->child_tab,
 579                         cur_match,
 580                         min(bytes_remaining, mf->base.params.nice_match_len),
 581                         mf->base.params.max_search_depth);
 582
 583                 bytes_remaining--;
 584         } while (++cur_pos != end_pos);
 585
 586         if (mf->digram_tab) {
 587                 prefetch(&mf->digram_tab[next_digram]);
 588                 mf->next_digram = next_digram;
 589         }
 590
 591         prefetch(&mf->hash_tab[next_hash]);
 592         mf->next_hash = next_hash;
 593 }
 594
 595 static void
 596 lz_bt_destroy(struct lz_mf *_mf)
 597 {
 598         struct lz_bt *mf = (struct lz_bt *)_mf;
 599
 600         FREE(mf->hash_tab);
 601         /* mf->hash_tab shares storage with mf->digram_tab and mf->child_tab. */
 602 }
 603
 604 const struct lz_mf_ops lz_binary_trees_ops = {
 605         .params_valid      = lz_bt_params_valid,
 606         .get_needed_memory = lz_bt_get_needed_memory,
 607         .init              = lz_bt_init,
 608         .load_window       = lz_bt_load_window,
 609         .get_matches       = lz_bt_get_matches,
 610         .skip_positions    = lz_bt_skip_positions,
 611         .destroy           = lz_bt_destroy,
 612         .struct_size       = sizeof(struct lz_bt),
 613 };