Split prefetch() into prefetchr() and prefetchw()
authorEric Biggers <ebiggers3@gmail.com>
Sat, 19 Sep 2015 18:56:28 +0000 (13:56 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Fri, 25 Sep 2015 01:41:34 +0000 (20:41 -0500)
include/wimlib/bt_matchfinder.h
include/wimlib/compiler-gcc.h
include/wimlib/compiler.h
src/lcpit_matchfinder.c
src/lzms_compress.c

index 536ead6..4fe754c 100644 (file)
@@ -178,7 +178,7 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
        *next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
        cur_node = mf->hash_tab[hash];
        mf->hash_tab[hash] = in_next - in_begin;
-       prefetch(&mf->hash_tab[*next_hash]);
+       prefetchw(&mf->hash_tab[*next_hash]);
 
        pending_lt_ptr = bt_left_child(mf, in_next - in_begin);
        pending_gt_ptr = bt_right_child(mf, in_next - in_begin);
@@ -285,7 +285,7 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
        *next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
        cur_node = mf->hash_tab[hash];
        mf->hash_tab[hash] = in_next - in_begin;
-       prefetch(&mf->hash_tab[*next_hash]);
+       prefetchw(&mf->hash_tab[*next_hash]);
 
        depth_remaining = max_search_depth;
        pending_lt_ptr = bt_left_child(mf, in_next - in_begin);
index cb42c89..3a9d8e7 100644 (file)
@@ -19,7 +19,8 @@
 #define _may_alias_attribute   __attribute__((may_alias))
 #define likely(expr)           __builtin_expect(!!(expr), 1)
 #define unlikely(expr)         __builtin_expect(!!(expr), 0)
-#define prefetch(addr)         __builtin_prefetch(addr)
+#define prefetchr(addr)                __builtin_prefetch((addr), 0)
+#define prefetchw(addr)                __builtin_prefetch((addr), 1)
 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
 #  define _cold_attribute      __attribute__((cold))
 #endif
index 72c100a..1ea6696 100644 (file)
 #  define unlikely(expr)       (expr)
 #endif
 
-#ifndef prefetch
-#  define prefetch(addr)
+/* prefetchr() - prefetch into L1 cache for read  */
+#ifndef prefetchr
+#  define prefetchr(addr)
+#endif
+
+/* prefetchw() - prefetch into L1 cache for write  */
+#ifndef prefetchw
+#  define prefetchw(addr)
 #endif
 
 #ifndef _cold_attribute
index 65e391d..cc1a448 100644 (file)
@@ -78,7 +78,7 @@ build_LCP(u32 SA_and_LCP[restrict], const u32 ISA[restrict],
        u32 h = 0;
        for (u32 i = 0; i < n; i++) {
                const u32 r = ISA[i];
-               prefetch(&SA_and_LCP[ISA[i + PREFETCH_SAFETY]]);
+               prefetchw(&SA_and_LCP[ISA[i + PREFETCH_SAFETY]]);
                if (r > 0) {
                        const u32 j = SA_and_LCP[r - 1] & POS_MASK;
                        const u32 lim = min(n - i, n - j);
@@ -170,7 +170,7 @@ build_LCPIT(u32 intervals[restrict], u32 pos_data[restrict], const u32 n)
                const u32 next_lcp = SA_and_LCP[r] & LCP_MASK;
                const u32 top_lcp = *top & LCP_MASK;
 
-               prefetch(&pos_data[SA_and_LCP[r + PREFETCH_SAFETY] & POS_MASK]);
+               prefetchw(&pos_data[SA_and_LCP[r + PREFETCH_SAFETY] & POS_MASK]);
 
                if (next_lcp == top_lcp) {
                        /* Continuing the deepest open interval  */
@@ -290,7 +290,7 @@ lcpit_advance_one_byte(const u32 cur_pos,
        ref = pos_data[cur_pos];
 
        /* Prefetch the deepest lcp-interval containing the *next* suffix. */
-       prefetch(&intervals[pos_data[cur_pos + 1] & POS_MASK]);
+       prefetchw(&intervals[pos_data[cur_pos + 1] & POS_MASK]);
 
        /* There is no "next suffix" after the current one.  */
        pos_data[cur_pos] = 0;
@@ -364,7 +364,7 @@ build_LCP_huge(u64 SA_and_LCP64[restrict], const u32 ISA[restrict],
        u32 h = 0;
        for (u32 i = 0; i < n; i++) {
                const u32 r = ISA[i];
-               prefetch(&SA_and_LCP64[ISA[i + PREFETCH_SAFETY]]);
+               prefetchw(&SA_and_LCP64[ISA[i + PREFETCH_SAFETY]]);
                if (r > 0) {
                        const u32 j = SA_and_LCP64[r - 1] & HUGE_POS_MASK;
                        const u32 lim = min(n - i, n - j);
@@ -411,7 +411,7 @@ build_LCPIT_huge(u64 intervals64[restrict], u32 pos_data[restrict], const u32 n)
                const u64 next_lcp = SA_and_LCP64[r] & HUGE_LCP_MASK;
                const u64 top_lcp = intervals64[*top];
 
-               prefetch(&pos_data[SA_and_LCP64[r + PREFETCH_SAFETY] & HUGE_POS_MASK]);
+               prefetchw(&pos_data[SA_and_LCP64[r + PREFETCH_SAFETY] & HUGE_POS_MASK]);
 
                if (next_lcp == top_lcp) {
                        /* Continuing the deepest open interval  */
@@ -476,8 +476,8 @@ lcpit_advance_one_byte_huge(const u32 cur_pos,
        struct lz_match *matchptr;
 
        interval_idx = pos_data[cur_pos];
-       prefetch(&pos_data[intervals64[pos_data[cur_pos + 1]] & HUGE_POS_MASK]);
-       prefetch(&intervals64[pos_data[cur_pos + 2]]);
+       prefetchw(&pos_data[intervals64[pos_data[cur_pos + 1]] & HUGE_POS_MASK]);
+       prefetchw(&intervals64[pos_data[cur_pos + 2]]);
        pos_data[cur_pos] = 0;
 
        while ((next = intervals64[interval_idx]) & HUGE_UNVISITED_TAG) {
index 099a9db..ef67edb 100644 (file)
@@ -1248,7 +1248,7 @@ lzms_delta_matchfinder_skip_bytes(struct lzms_compressor *c,
                        c->delta_hash_table[hash] =
                                (power << DELTA_SOURCE_POWER_SHIFT) | pos;
                        c->next_delta_hashes[power] = next_hash;
-                       prefetch(&c->delta_hash_table[next_hash]);
+                       prefetchw(&c->delta_hash_table[next_hash]);
                }
        } while (in_next++, pos++, --count);
 }
@@ -1726,7 +1726,7 @@ begin:
 
                                c->delta_hash_table[hash] = (power << DELTA_SOURCE_POWER_SHIFT) | pos;
                                c->next_delta_hashes[power] = next_hash;
-                               prefetch(&c->delta_hash_table[next_hash]);
+                               prefetchw(&c->delta_hash_table[next_hash]);
 
                                if (power != cur_match >> DELTA_SOURCE_POWER_SHIFT)
                                        continue;