From de58d5f57732df8129fbfd71d46ae5968ac59646 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 19 Sep 2015 13:56:28 -0500 Subject: [PATCH 1/1] Split prefetch() into prefetchr() and prefetchw() --- include/wimlib/bt_matchfinder.h | 4 ++-- include/wimlib/compiler-gcc.h | 3 ++- include/wimlib/compiler.h | 10 ++++++++-- src/lcpit_matchfinder.c | 14 +++++++------- src/lzms_compress.c | 4 ++-- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/include/wimlib/bt_matchfinder.h b/include/wimlib/bt_matchfinder.h index 536ead6a..4fe754c9 100644 --- a/include/wimlib/bt_matchfinder.h +++ b/include/wimlib/bt_matchfinder.h @@ -178,7 +178,7 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf, *next_hash = bt_matchfinder_hash_3_bytes(in_next + 1); cur_node = mf->hash_tab[hash]; mf->hash_tab[hash] = in_next - in_begin; - prefetch(&mf->hash_tab[*next_hash]); + prefetchw(&mf->hash_tab[*next_hash]); pending_lt_ptr = bt_left_child(mf, in_next - in_begin); pending_gt_ptr = bt_right_child(mf, in_next - in_begin); @@ -285,7 +285,7 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf, *next_hash = bt_matchfinder_hash_3_bytes(in_next + 1); cur_node = mf->hash_tab[hash]; mf->hash_tab[hash] = in_next - in_begin; - prefetch(&mf->hash_tab[*next_hash]); + prefetchw(&mf->hash_tab[*next_hash]); depth_remaining = max_search_depth; pending_lt_ptr = bt_left_child(mf, in_next - in_begin); diff --git a/include/wimlib/compiler-gcc.h b/include/wimlib/compiler-gcc.h index cb42c894..3a9d8e7e 100644 --- a/include/wimlib/compiler-gcc.h +++ b/include/wimlib/compiler-gcc.h @@ -19,7 +19,8 @@ #define _may_alias_attribute __attribute__((may_alias)) #define likely(expr) __builtin_expect(!!(expr), 1) #define unlikely(expr) __builtin_expect(!!(expr), 0) -#define prefetch(addr) __builtin_prefetch(addr) +#define prefetchr(addr) __builtin_prefetch((addr), 0) +#define prefetchw(addr) __builtin_prefetch((addr), 1) #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) # define _cold_attribute __attribute__((cold)) #endif diff --git a/include/wimlib/compiler.h b/include/wimlib/compiler.h index 72c100a2..1ea66968 100644 --- a/include/wimlib/compiler.h +++ b/include/wimlib/compiler.h @@ -38,8 +38,14 @@ # define unlikely(expr) (expr) #endif -#ifndef prefetch -# define prefetch(addr) +/* prefetchr() - prefetch into L1 cache for read */ +#ifndef prefetchr +# define prefetchr(addr) +#endif + +/* prefetchw() - prefetch into L1 cache for write */ +#ifndef prefetchw +# define prefetchw(addr) #endif #ifndef _cold_attribute diff --git a/src/lcpit_matchfinder.c b/src/lcpit_matchfinder.c index 65e391d9..cc1a4480 100644 --- a/src/lcpit_matchfinder.c +++ b/src/lcpit_matchfinder.c @@ -78,7 +78,7 @@ build_LCP(u32 SA_and_LCP[restrict], const u32 ISA[restrict], u32 h = 0; for (u32 i = 0; i < n; i++) { const u32 r = ISA[i]; - prefetch(&SA_and_LCP[ISA[i + PREFETCH_SAFETY]]); + prefetchw(&SA_and_LCP[ISA[i + PREFETCH_SAFETY]]); if (r > 0) { const u32 j = SA_and_LCP[r - 1] & POS_MASK; const u32 lim = min(n - i, n - j); @@ -170,7 +170,7 @@ build_LCPIT(u32 intervals[restrict], u32 pos_data[restrict], const u32 n) const u32 next_lcp = SA_and_LCP[r] & LCP_MASK; const u32 top_lcp = *top & LCP_MASK; - prefetch(&pos_data[SA_and_LCP[r + PREFETCH_SAFETY] & POS_MASK]); + prefetchw(&pos_data[SA_and_LCP[r + PREFETCH_SAFETY] & POS_MASK]); if (next_lcp == top_lcp) { /* Continuing the deepest open interval */ @@ -290,7 +290,7 @@ lcpit_advance_one_byte(const u32 cur_pos, ref = pos_data[cur_pos]; /* Prefetch the deepest lcp-interval containing the *next* suffix. */ - prefetch(&intervals[pos_data[cur_pos + 1] & POS_MASK]); + prefetchw(&intervals[pos_data[cur_pos + 1] & POS_MASK]); /* There is no "next suffix" after the current one. */ pos_data[cur_pos] = 0; @@ -364,7 +364,7 @@ build_LCP_huge(u64 SA_and_LCP64[restrict], const u32 ISA[restrict], u32 h = 0; for (u32 i = 0; i < n; i++) { const u32 r = ISA[i]; - prefetch(&SA_and_LCP64[ISA[i + PREFETCH_SAFETY]]); + prefetchw(&SA_and_LCP64[ISA[i + PREFETCH_SAFETY]]); if (r > 0) { const u32 j = SA_and_LCP64[r - 1] & HUGE_POS_MASK; const u32 lim = min(n - i, n - j); @@ -411,7 +411,7 @@ build_LCPIT_huge(u64 intervals64[restrict], u32 pos_data[restrict], const u32 n) const u64 next_lcp = SA_and_LCP64[r] & HUGE_LCP_MASK; const u64 top_lcp = intervals64[*top]; - prefetch(&pos_data[SA_and_LCP64[r + PREFETCH_SAFETY] & HUGE_POS_MASK]); + prefetchw(&pos_data[SA_and_LCP64[r + PREFETCH_SAFETY] & HUGE_POS_MASK]); if (next_lcp == top_lcp) { /* Continuing the deepest open interval */ @@ -476,8 +476,8 @@ lcpit_advance_one_byte_huge(const u32 cur_pos, struct lz_match *matchptr; interval_idx = pos_data[cur_pos]; - prefetch(&pos_data[intervals64[pos_data[cur_pos + 1]] & HUGE_POS_MASK]); - prefetch(&intervals64[pos_data[cur_pos + 2]]); + prefetchw(&pos_data[intervals64[pos_data[cur_pos + 1]] & HUGE_POS_MASK]); + prefetchw(&intervals64[pos_data[cur_pos + 2]]); pos_data[cur_pos] = 0; while ((next = intervals64[interval_idx]) & HUGE_UNVISITED_TAG) { diff --git a/src/lzms_compress.c b/src/lzms_compress.c index 099a9dbf..ef67edb2 100644 --- a/src/lzms_compress.c +++ b/src/lzms_compress.c @@ -1248,7 +1248,7 @@ lzms_delta_matchfinder_skip_bytes(struct lzms_compressor *c, c->delta_hash_table[hash] = (power << DELTA_SOURCE_POWER_SHIFT) | pos; c->next_delta_hashes[power] = next_hash; - prefetch(&c->delta_hash_table[next_hash]); + prefetchw(&c->delta_hash_table[next_hash]); } } while (in_next++, pos++, --count); } @@ -1726,7 +1726,7 @@ begin: c->delta_hash_table[hash] = (power << DELTA_SOURCE_POWER_SHIFT) | pos; c->next_delta_hashes[power] = next_hash; - prefetch(&c->delta_hash_table[next_hash]); + prefetchw(&c->delta_hash_table[next_hash]); if (power != cur_match >> DELTA_SOURCE_POWER_SHIFT) continue; -- 2.43.0