- if (matchptr[best_len] != strptr[best_len] ||
- matchptr[best_len - 1] != strptr[best_len - 1] ||
- matchptr[0] != strptr[0])
+ /* Considering the potential match at 'matchptr': is it longer
+ * than 'best_len'?
+ *
+ * The bytes at index 'best_len' are the most likely to differ,
+ * so check them first. */
+ if (matchptr[best_len] != strptr[best_len])
+ goto next_match;
+
+ #if HAVE_FAST_LZ_EXTEND
+ if ((*(const u32 *)strptr & 0xFFFFFF) !=
+ (*(const u32 *)matchptr & 0xFFFFFF))
+ goto next_match;
+
+ len = lz_extend(strptr, matchptr, 3, max_len);
+
+ if (len > best_len) {
+ best_len = len;
+
+ *lz_matchptr++ = (struct lz_match) {
+ .len = best_len,
+ .offset = strptr - matchptr,
+ };
+
+ if (best_len >= nice_len)
+ break;
+ }
+
+ #else /* HAVE_FAST_LZ_EXTEND */
+
+ /* The bytes at indices 'best_len - 1' and '0' are less
+ * important to check separately. But doing so still gives a
+ * slight performance improvement, at least on x86_64, probably
+ * because they create separate branches for the CPU to predict
+ * independently of the branches in the main comparison loops.
+ */
+ if (matchptr[best_len - 1] != strptr[best_len - 1] ||
+ matchptr[0] != strptr[0])