*next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
cur_node = mf->hash_tab[hash];
mf->hash_tab[hash] = in_next - in_begin;
- prefetch(&mf->hash_tab[*next_hash]);
+ prefetchw(&mf->hash_tab[*next_hash]);
pending_lt_ptr = bt_left_child(mf, in_next - in_begin);
pending_gt_ptr = bt_right_child(mf, in_next - in_begin);
*next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
cur_node = mf->hash_tab[hash];
mf->hash_tab[hash] = in_next - in_begin;
- prefetch(&mf->hash_tab[*next_hash]);
+ prefetchw(&mf->hash_tab[*next_hash]);
depth_remaining = max_search_depth;
pending_lt_ptr = bt_left_child(mf, in_next - in_begin);
#define _may_alias_attribute __attribute__((may_alias))
#define likely(expr) __builtin_expect(!!(expr), 1)
#define unlikely(expr) __builtin_expect(!!(expr), 0)
-#define prefetch(addr) __builtin_prefetch(addr)
+#define prefetchr(addr) __builtin_prefetch((addr), 0)
+#define prefetchw(addr) __builtin_prefetch((addr), 1)
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
# define _cold_attribute __attribute__((cold))
#endif
# define unlikely(expr) (expr)
#endif
-#ifndef prefetch
-# define prefetch(addr)
+/* prefetchr() - prefetch into L1 cache for read */
+#ifndef prefetchr
+# define prefetchr(addr)
+#endif
+
+/* prefetchw() - prefetch into L1 cache for write */
+#ifndef prefetchw
+# define prefetchw(addr)
#endif
#ifndef _cold_attribute
u32 h = 0;
for (u32 i = 0; i < n; i++) {
const u32 r = ISA[i];
- prefetch(&SA_and_LCP[ISA[i + PREFETCH_SAFETY]]);
+ prefetchw(&SA_and_LCP[ISA[i + PREFETCH_SAFETY]]);
if (r > 0) {
const u32 j = SA_and_LCP[r - 1] & POS_MASK;
const u32 lim = min(n - i, n - j);
const u32 next_lcp = SA_and_LCP[r] & LCP_MASK;
const u32 top_lcp = *top & LCP_MASK;
- prefetch(&pos_data[SA_and_LCP[r + PREFETCH_SAFETY] & POS_MASK]);
+ prefetchw(&pos_data[SA_and_LCP[r + PREFETCH_SAFETY] & POS_MASK]);
if (next_lcp == top_lcp) {
/* Continuing the deepest open interval */
ref = pos_data[cur_pos];
/* Prefetch the deepest lcp-interval containing the *next* suffix. */
- prefetch(&intervals[pos_data[cur_pos + 1] & POS_MASK]);
+ prefetchw(&intervals[pos_data[cur_pos + 1] & POS_MASK]);
/* There is no "next suffix" after the current one. */
pos_data[cur_pos] = 0;
u32 h = 0;
for (u32 i = 0; i < n; i++) {
const u32 r = ISA[i];
- prefetch(&SA_and_LCP64[ISA[i + PREFETCH_SAFETY]]);
+ prefetchw(&SA_and_LCP64[ISA[i + PREFETCH_SAFETY]]);
if (r > 0) {
const u32 j = SA_and_LCP64[r - 1] & HUGE_POS_MASK;
const u32 lim = min(n - i, n - j);
const u64 next_lcp = SA_and_LCP64[r] & HUGE_LCP_MASK;
const u64 top_lcp = intervals64[*top];
- prefetch(&pos_data[SA_and_LCP64[r + PREFETCH_SAFETY] & HUGE_POS_MASK]);
+ prefetchw(&pos_data[SA_and_LCP64[r + PREFETCH_SAFETY] & HUGE_POS_MASK]);
if (next_lcp == top_lcp) {
/* Continuing the deepest open interval */
struct lz_match *matchptr;
interval_idx = pos_data[cur_pos];
- prefetch(&pos_data[intervals64[pos_data[cur_pos + 1]] & HUGE_POS_MASK]);
- prefetch(&intervals64[pos_data[cur_pos + 2]]);
+ prefetchw(&pos_data[intervals64[pos_data[cur_pos + 1]] & HUGE_POS_MASK]);
+ prefetchw(&intervals64[pos_data[cur_pos + 2]]);
pos_data[cur_pos] = 0;
while ((next = intervals64[interval_idx]) & HUGE_UNVISITED_TAG) {
c->delta_hash_table[hash] =
(power << DELTA_SOURCE_POWER_SHIFT) | pos;
c->next_delta_hashes[power] = next_hash;
- prefetch(&c->delta_hash_table[next_hash]);
+ prefetchw(&c->delta_hash_table[next_hash]);
}
} while (in_next++, pos++, --count);
}
c->delta_hash_table[hash] = (power << DELTA_SOURCE_POWER_SHIFT) | pos;
c->next_delta_hashes[power] = next_hash;
- prefetch(&c->delta_hash_table[next_hash]);
+ prefetchw(&c->delta_hash_table[next_hash]);
if (power != cur_match >> DELTA_SOURCE_POWER_SHIFT)
continue;