Module: Mesa
Branch: main
Commit: f98871608cbae6f7fd16561e9c92f5c22334e5f1
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f98871608cbae6f7fd16561e9c92f5c22334e5f1

Author: Marek Olšák <[email protected]>
Date:   Thu Apr 27 03:49:10 2023 -0400

ac/llvm: rewrite and unify how GLC, DLC, SLC are set

Use ACCESS_* flags in call sites instead of GLC/DLC/SLC.

ACCESS_* flags are extended to describe other aspects of memory instructions
like load/store/atomic/smem.

Then add a function that converts the access flags to GLC, DLC, SLC.

The new functions are also usable by ACO.

Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22770>

---

 src/amd/common/ac_shader_util.c                    | 140 +++++++++++++++++++++
 src/amd/common/ac_shader_util.h                    |  40 ++++++
 src/amd/llvm/ac_llvm_build.c                       |  84 ++++++-------
 src/amd/llvm/ac_llvm_build.h                       |  29 ++---
 src/amd/llvm/ac_nir_to_llvm.c                      |  89 +++++--------
 src/gallium/drivers/radeonsi/si_shader_llvm_tess.c |  11 +-
 6 files changed, 270 insertions(+), 123 deletions(-)

diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c
index 29015ec271e..464a2fccd13 100644
--- a/src/amd/common/ac_shader_util.c
+++ b/src/amd/common/ac_shader_util.c
@@ -1014,3 +1014,143 @@ void ac_get_scratch_tmpring_size(const struct 
radeon_info *info,
    *tmpring_size = S_0286E8_WAVES(max_scratch_waves) |
                    S_0286E8_WAVESIZE(*max_seen_bytes_per_wave >> size_shift);
 }
+
+/* Get chip-agnostic memory instruction access flags (as opposed to 
chip-specific GLC/DLC/SLC)
+ * from a NIR memory intrinsic.
+ */
+enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr 
*instr)
+{
+   enum gl_access_qualifier access =
+      nir_intrinsic_has_access(instr) ? nir_intrinsic_access(instr) : 0;
+
+   /* Determine ACCESS_MAY_STORE_SUBDWORD. (for the GFX6 TC L1 bug workaround) 
*/
+   if (!nir_intrinsic_infos[instr->intrinsic].has_dest) {
+      switch (instr->intrinsic) {
+      case nir_intrinsic_bindless_image_store:
+         access |= ACCESS_MAY_STORE_SUBDWORD;
+         break;
+
+      case nir_intrinsic_store_ssbo:
+      case nir_intrinsic_store_buffer_amd:
+      case nir_intrinsic_store_global:
+      case nir_intrinsic_store_global_amd:
+         if (access & ACCESS_USES_FORMAT_AMD ||
+             (nir_intrinsic_has_align_offset(instr) && 
nir_intrinsic_align(instr) % 4 != 0) ||
+             ((instr->src[0].ssa->bit_size / 8) * 
instr->src[0].ssa->num_components) % 4 != 0)
+            access |= ACCESS_MAY_STORE_SUBDWORD;
+         break;
+
+      default:
+         unreachable("unexpected store instruction");
+      }
+   }
+
+   return access;
+}
+
+/* Convert chip-agnostic memory access flags into hw-specific cache flags.
+ *
+ * "access" must be a result of ac_get_mem_access_flags() with the appropriate 
ACCESS_TYPE_*
+ * flags set.
+ */
+union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
+                                              enum gl_access_qualifier access)
+{
+   union ac_hw_cache_flags result;
+   result.value = 0;
+
+   assert(util_bitcount(access & (ACCESS_TYPE_LOAD | ACCESS_TYPE_STORE |
+                                  ACCESS_TYPE_ATOMIC)) == 1);
+   assert(!(access & ACCESS_TYPE_SMEM) || access & ACCESS_TYPE_LOAD);
+   assert(!(access & ACCESS_IS_SWIZZLED_AMD) || !(access & ACCESS_TYPE_SMEM));
+   assert(!(access & ACCESS_MAY_STORE_SUBDWORD) || access & ACCESS_TYPE_STORE);
+
+   bool scope_is_device = access & (ACCESS_COHERENT | ACCESS_VOLATILE);
+
+   if (gfx_level >= GFX11) {
+      /* GFX11 simplified it and exposes what is actually useful.
+       *
+       * GLC means device scope for loads only. (stores and atomics are always 
device scope)
+       * SLC means non-temporal for GL1 and GL2 caches. (GL1 = hit-evict, GL2 
= stream, unavailable in SMEM)
+       * DLC means non-temporal for MALL. (noalloc, i.e. coherent bypass)
+       *
+       * GL0 doesn't have a non-temporal flag, so you always get LRU caching 
in CU scope.
+       */
+      if (access & ACCESS_TYPE_LOAD && scope_is_device)
+         result.value |= ac_glc;
+
+      if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM))
+         result.value |= ac_slc;
+   } else if (gfx_level >= GFX10) {
+      /* GFX10-10.3:
+       *
+       * VMEM and SMEM loads (SMEM only supports the first four):
+       * !GLC && !DLC && !SLC means CU scope          <== use for normal loads 
with CU scope
+       *  GLC && !DLC && !SLC means SA scope
+       * !GLC &&  DLC && !SLC means CU scope, GL1 bypass
+       *  GLC &&  DLC && !SLC means device scope      <== use for normal loads 
with device scope
+       * !GLC && !DLC &&  SLC means CU scope, non-temporal (GL0 = GL1 = 
hit-evict, GL2 = stream)  <== use for non-temporal loads with CU scope
+       *  GLC && !DLC &&  SLC means SA scope, non-temporal (GL1 = hit-evict, 
GL2 = stream)
+       * !GLC &&  DLC &&  SLC means CU scope, GL0 non-temporal, GL1-GL2 
coherent bypass (GL0 = hit-evict, GL1 = bypass, GL2 = noalloc)
+       *  GLC &&  DLC &&  SLC means device scope, GL2 coherent bypass 
(noalloc)  <== use for non-temporal loads with device scope
+       *
+       * VMEM stores/atomics (stores are CU scope only if they overwrite the 
whole cache line,
+       * atomics are always device scope, GL1 is always bypassed):
+       * !GLC && !DLC && !SLC means CU scope          <== use for normal 
stores with CU scope
+       *  GLC && !DLC && !SLC means device scope      <== use for normal 
stores with device scope
+       * !GLC &&  DLC && !SLC means CU scope, GL2 non-coherent bypass
+       *  GLC &&  DLC && !SLC means device scope, GL2 non-coherent bypass
+       * !GLC && !DLC &&  SLC means CU scope, GL2 non-temporal (stream)  <== 
use for non-temporal stores with CU scope
+       *  GLC && !DLC &&  SLC means device scope, GL2 non-temporal (stream)  
<== use for non-temporal stores with device scope
+       * !GLC &&  DLC &&  SLC means CU scope, GL2 coherent bypass (noalloc)
+       *  GLC &&  DLC &&  SLC means device scope, GL2 coherent bypass (noalloc)
+       *
+       * "stream" allows write combining in GL2. "coherent bypass" doesn't.
+       * "non-coherent bypass" doesn't guarantee ordering with any coherent 
stores.
+       */
+      if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC))
+         result.value |= ac_glc | (access & ACCESS_TYPE_LOAD ? ac_dlc : 0);
+
+      if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM))
+         result.value |= ac_slc;
+   } else {
+      /* GFX6-GFX9:
+       *
+       * VMEM loads:
+       * !GLC && !SLC means CU scope
+       *  GLC && !SLC means (GFX6: device scope, GFX7-9: device scope [*])
+       * !GLC &&  SLC means (GFX6: CU scope, GFX7: device scope, GFX8-9: CU 
scope), GL2 non-temporal (stream)
+       *  GLC &&  SLC means device scope, GL2 non-temporal (stream)
+       *
+       * VMEM stores (atomics don't have [*]):
+       * !GLC && !SLC means (GFX6: CU scope, GFX7-9: device scope [*])
+       *  GLC && !SLC means (GFX6-7: device scope, GFX8-9: device scope [*])
+       * !GLC &&  SLC means (GFX6: CU scope, GFX7-9: device scope [*]), GL2 
non-temporal (stream)
+       *  GLC &&  SLC means device scope, GL2 non-temporal (stream)
+       *
+       * [*] data can be cached in GL1 for future CU scope
+       *
+       * SMEM loads:
+       *  GLC means device scope (available on GFX8+)
+       */
+      if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC)) {
+         /* SMEM doesn't support the device scope on GFX6-7. */
+         assert(gfx_level >= GFX8 || !(access & ACCESS_TYPE_SMEM));
+         result.value |= ac_glc;
+      }
+
+      if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM))
+         result.value |= ac_slc;
+
+      /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All 
store opcodes not
+       * aligned to a dword are affected.
+       */
+      if (gfx_level == GFX6 && access & ACCESS_MAY_STORE_SUBDWORD)
+         result.value |= ac_glc;
+   }
+
+   if (access & ACCESS_IS_SWIZZLED_AMD)
+      result.value |= ac_swizzled;
+
+   return result;
+}
diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h
index 7c2b5ea4035..dc36cb373ea 100644
--- a/src/amd/common/ac_shader_util.h
+++ b/src/amd/common/ac_shader_util.h
@@ -46,6 +46,41 @@ extern "C" {
 #define AC_SENDMSG_GS_OP_EMIT     (2 << 4)
 #define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4)
 
+/* An extension of gl_access_qualifier describing other aspects of memory 
operations
+ * for code generation.
+ */
+enum {
+   /* Only one of LOAD/STORE/ATOMIC can be set. */
+   ACCESS_TYPE_LOAD            = BITFIELD_BIT(27),
+   ACCESS_TYPE_STORE           = BITFIELD_BIT(28),
+   ACCESS_TYPE_ATOMIC          = BITFIELD_BIT(29),
+
+   /* This access is expected to use an SMEM instruction if source operands 
are non-divergent.
+    * Only loads can set this.
+    */
+   ACCESS_TYPE_SMEM            = BITFIELD_BIT(30),
+
+   /* Whether a store offset or size alignment is less than 4. */
+   ACCESS_MAY_STORE_SUBDWORD   = BITFIELD_BIT(31),
+};
+
+/* The meaning of these enums is different between chips. They match LLVM 
definitions,
+ * but they can also be used by ACO. Use ac_get_hw_cache_flags to get these.
+ */
+enum ac_cache_flags
+{
+   ac_glc = BITFIELD_BIT(0),
+   ac_slc = BITFIELD_BIT(1),
+   ac_dlc = BITFIELD_BIT(2),
+   ac_swizzled = BITFIELD_BIT(3),
+};
+
+union ac_hw_cache_flags
+{
+   /* NOTE: This will contain more fields in the future. */
+   enum ac_cache_flags value;
+};
+
 enum ac_image_dim
 {
    ac_image_1d,
@@ -199,6 +234,11 @@ ac_ngg_get_scratch_lds_size(gl_shader_stage stage,
                             bool streamout_enabled,
                             bool can_cull);
 
+enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr 
*instr);
+
+union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
+                                              enum gl_access_qualifier access);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index 89c3af49c93..423cedd04b0 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -1221,23 +1221,15 @@ LLVMValueRef 
ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
    return ac_build_load_custom(ctx, ptr.t, ptr.v, index, true, true, false);
 }
 
-static unsigned get_load_cache_policy(struct ac_llvm_context *ctx, unsigned 
cache_policy)
+static unsigned get_cache_flags(struct ac_llvm_context *ctx, enum 
gl_access_qualifier access)
 {
-   return cache_policy |
-          (ctx->gfx_level >= GFX10 && ctx->gfx_level < GFX11 && cache_policy & 
ac_glc ? ac_dlc : 0);
-}
-
-static unsigned get_store_cache_policy(struct ac_llvm_context *ctx, unsigned 
cache_policy)
-{
-   if (ctx->gfx_level >= GFX11)
-      cache_policy &= ~ac_glc; /* GLC has no effect on stores */
-   return cache_policy;
+   return ac_get_hw_cache_flags(ctx->gfx_level, access).value;
 }
 
 static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, 
LLVMValueRef rsrc,
                                          LLVMValueRef data, LLVMValueRef 
vindex,
                                          LLVMValueRef voffset, LLVMValueRef 
soffset,
-                                         unsigned cache_policy, bool 
use_format)
+                                         enum gl_access_qualifier access, bool 
use_format)
 {
    LLVMValueRef args[6];
    int idx = 0;
@@ -1247,7 +1239,7 @@ static void ac_build_buffer_store_common(struct 
ac_llvm_context *ctx, LLVMValueR
       args[idx++] = vindex ? vindex : ctx->i32_0;
    args[idx++] = voffset ? voffset : ctx->i32_0;
    args[idx++] = soffset ? soffset : ctx->i32_0;
-   args[idx++] = LLVMConstInt(ctx->i32, get_store_cache_policy(ctx, 
cache_policy), 0);
+   args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | 
ACCESS_TYPE_STORE), 0);
    const char *indexing_kind = vindex ? "struct" : "raw";
    char name[256], type_name[8];
 
@@ -1264,15 +1256,15 @@ static void ac_build_buffer_store_common(struct 
ac_llvm_context *ctx, LLVMValueR
 }
 
 void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef 
rsrc, LLVMValueRef data,
-                                  LLVMValueRef vindex, LLVMValueRef voffset, 
unsigned cache_policy)
+                                  LLVMValueRef vindex, LLVMValueRef voffset, 
enum gl_access_qualifier access)
 {
-   ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, 
cache_policy, true);
+   ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, 
access, true);
 }
 
 /* buffer_store_dword(,x2,x3,x4) <- the suffix is selected by the type of 
vdata. */
 void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef 
rsrc, LLVMValueRef vdata,
                                  LLVMValueRef vindex, LLVMValueRef voffset, 
LLVMValueRef soffset,
-                                 unsigned cache_policy)
+                                 enum gl_access_qualifier access)
 {
    unsigned num_channels = ac_get_llvm_num_components(vdata);
 
@@ -1288,19 +1280,19 @@ void ac_build_buffer_store_dword(struct ac_llvm_context 
*ctx, LLVMValueRef rsrc,
       voffset2 = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0,
                               LLVMConstInt(ctx->i32, 8, 0), "");
 
-      ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, 
cache_policy);
-      ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset2, soffset, 
cache_policy);
+      ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, 
access);
+      ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset2, soffset, 
access);
       return;
    }
 
    ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, 
voffset, soffset,
-                                cache_policy, false);
+                                access, false);
 }
 
 static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, 
LLVMValueRef rsrc,
                                                 LLVMValueRef vindex, 
LLVMValueRef voffset,
                                                 LLVMValueRef soffset, unsigned 
num_channels,
-                                                LLVMTypeRef channel_type, 
unsigned cache_policy,
+                                                LLVMTypeRef channel_type, enum 
gl_access_qualifier access,
                                                 bool can_speculate, bool 
use_format)
 {
    LLVMValueRef args[5];
@@ -1310,7 +1302,7 @@ static LLVMValueRef ac_build_buffer_load_common(struct 
ac_llvm_context *ctx, LLV
       args[idx++] = vindex;
    args[idx++] = voffset ? voffset : ctx->i32_0;
    args[idx++] = soffset ? soffset : ctx->i32_0;
-   args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, 
cache_policy), 0);
+   args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | 
ACCESS_TYPE_LOAD), 0);
    unsigned func =
       !ac_has_vec3_support(ctx->gfx_level, use_format) && num_channels == 3 ? 
4 : num_channels;
    const char *indexing_kind = vindex ? "struct" : "raw";
@@ -1339,11 +1331,10 @@ static LLVMValueRef ac_build_buffer_load_common(struct 
ac_llvm_context *ctx, LLV
 
 LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef 
rsrc, int num_channels,
                                   LLVMValueRef vindex, LLVMValueRef voffset, 
LLVMValueRef soffset,
-                                  LLVMTypeRef channel_type, unsigned 
cache_policy,
+                                  LLVMTypeRef channel_type, enum 
gl_access_qualifier access,
                                   bool can_speculate, bool allow_smem)
 {
-   if (allow_smem && !(cache_policy & ac_slc) &&
-       (!(cache_policy & ac_glc) || ctx->gfx_level >= GFX8)) {
+   if (allow_smem && (!(access & ACCESS_COHERENT) || ctx->gfx_level >= GFX8)) {
       assert(vindex == NULL);
 
       LLVMValueRef result[32];
@@ -1365,7 +1356,8 @@ LLVMValueRef ac_build_buffer_load(struct ac_llvm_context 
*ctx, LLVMValueRef rsrc
          LLVMValueRef args[3] = {
             rsrc,
             offset,
-            LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 
0),
+            LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | 
ACCESS_TYPE_LOAD |
+                                                        ACCESS_TYPE_SMEM), 0),
          };
          result[i] = ac_build_intrinsic(ctx, name, channel_type, args, 3, 
AC_ATTR_INVARIANT_LOAD);
       }
@@ -1386,7 +1378,7 @@ LLVMValueRef ac_build_buffer_load(struct ac_llvm_context 
*ctx, LLVMValueRef rsrc
                          LLVMConstInt(ctx->i32, i * 
ac_get_type_size(channel_type), 0), "");
       LLVMValueRef item =
          ac_build_buffer_load_common(ctx, rsrc, vindex, fetch_voffset, 
soffset, fetch_num_channels,
-                                     channel_type, cache_policy, 
can_speculate, false);
+                                     channel_type, access, can_speculate, 
false);
       result = ac_build_concat(ctx, result, item);
    }
 
@@ -1395,13 +1387,13 @@ LLVMValueRef ac_build_buffer_load(struct 
ac_llvm_context *ctx, LLVMValueRef rsrc
 
 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, 
LLVMValueRef rsrc,
                                          LLVMValueRef vindex, LLVMValueRef 
voffset,
-                                         unsigned num_channels, unsigned 
cache_policy,
+                                         unsigned num_channels, enum 
gl_access_qualifier access,
                                          bool can_speculate, bool d16, bool 
tfe)
 {
    if (tfe) {
       assert(!d16);
 
-      cache_policy = get_load_cache_policy(ctx, cache_policy);
+      unsigned cache_flags = get_cache_flags(ctx, access | ACCESS_TYPE_LOAD);
 
       char code[256];
       /* The definition in the assembly and the one in the constraint string
@@ -1415,9 +1407,9 @@ LLVMValueRef ac_build_buffer_load_format(struct 
ac_llvm_context *ctx, LLVMValueR
                "v_mov_b32 v4, 0\n"
                "buffer_load_format_xyzw v[0:3], $1, $2, 0, idxen offen %s %s 
tfe %s\n"
                "s_waitcnt vmcnt(0)",
-               cache_policy & ac_glc ? "glc" : "",
-               cache_policy & ac_slc ? "slc" : "",
-               cache_policy & ac_dlc ? "dlc" : "");
+               cache_flags & ac_glc ? "glc" : "",
+               cache_flags & ac_slc ? "slc" : "",
+               cache_flags & ac_dlc ? "dlc" : "");
 
       LLVMTypeRef param_types[] = {ctx->v2i32, ctx->v4i32};
       LLVMTypeRef calltype = LLVMFunctionType(LLVMVectorType(ctx->f32, 5), 
param_types, 2, false);
@@ -1435,7 +1427,7 @@ LLVMValueRef ac_build_buffer_load_format(struct 
ac_llvm_context *ctx, LLVMValueR
    }
 
    return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0,
-                                      num_channels, d16 ? ctx->f16 : ctx->f32, 
cache_policy,
+                                      num_channels, d16 ? ctx->f16 : ctx->f32, 
access,
                                       can_speculate, true);
 }
 
@@ -1443,7 +1435,7 @@ static LLVMValueRef ac_build_tbuffer_load(struct 
ac_llvm_context *ctx, LLVMValue
                                           LLVMValueRef vindex, LLVMValueRef 
voffset,
                                           LLVMValueRef soffset, unsigned 
num_channels,
                                           unsigned tbuffer_format, LLVMTypeRef 
channel_type,
-                                          unsigned cache_policy, bool 
can_speculate)
+                                          enum gl_access_qualifier access, 
bool can_speculate)
 {
    LLVMValueRef args[6];
    int idx = 0;
@@ -1453,7 +1445,7 @@ static LLVMValueRef ac_build_tbuffer_load(struct 
ac_llvm_context *ctx, LLVMValue
    args[idx++] = voffset ? voffset : ctx->i32_0;
    args[idx++] = soffset ? soffset : ctx->i32_0;
    args[idx++] = LLVMConstInt(ctx->i32, tbuffer_format, 0);
-   args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, 
cache_policy), 0);
+   args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | 
ACCESS_TYPE_LOAD), 0);
    const char *indexing_kind = vindex ? "struct" : "raw";
    char name[256], type_name[8];
 
@@ -1474,7 +1466,7 @@ LLVMValueRef ac_build_safe_tbuffer_load(struct 
ac_llvm_context *ctx, LLVMValueRe
                                         unsigned align_offset,
                                         unsigned align_mul,
                                         unsigned num_channels,
-                                        unsigned cache_policy,
+                                        enum gl_access_qualifier access,
                                         bool can_speculate)
 {
    const unsigned max_channels = vtx_info->num_channels;
@@ -1503,7 +1495,7 @@ LLVMValueRef ac_build_safe_tbuffer_load(struct 
ac_llvm_context *ctx, LLVMValueRe
       LLVMValueRef item =
          ac_build_tbuffer_load(ctx, rsrc, vidx, fetch_voffset, soffset,
                                fetch_num_channels, fetch_format, channel_type,
-                               cache_policy, can_speculate);
+                               access, can_speculate);
       result = ac_build_concat(ctx, result, item);
    }
 
@@ -1513,35 +1505,35 @@ LLVMValueRef ac_build_safe_tbuffer_load(struct 
ac_llvm_context *ctx, LLVMValueRe
 
 LLVMValueRef ac_build_buffer_load_short(struct ac_llvm_context *ctx, 
LLVMValueRef rsrc,
                                         LLVMValueRef voffset, LLVMValueRef 
soffset,
-                                        unsigned cache_policy)
+                                        enum gl_access_qualifier access)
 {
    return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, 
ctx->i16,
-                                      cache_policy, false, false);
+                                      access, false, false);
 }
 
 LLVMValueRef ac_build_buffer_load_byte(struct ac_llvm_context *ctx, 
LLVMValueRef rsrc,
                                        LLVMValueRef voffset, LLVMValueRef 
soffset,
-                                       unsigned cache_policy)
+                                       enum gl_access_qualifier access)
 {
-   return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, 
ctx->i8, cache_policy,
+   return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, 
ctx->i8, access,
                                       false, false);
 }
 
 void ac_build_buffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef 
rsrc,
                                  LLVMValueRef vdata, LLVMValueRef voffset, 
LLVMValueRef soffset,
-                                 unsigned cache_policy)
+                                 enum gl_access_qualifier access)
 {
    vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
 
-   ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, 
cache_policy, false);
+   ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, 
access, false);
 }
 
 void ac_build_buffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef 
rsrc, LLVMValueRef vdata,
-                                LLVMValueRef voffset, LLVMValueRef soffset, 
unsigned cache_policy)
+                                LLVMValueRef voffset, LLVMValueRef soffset, 
enum gl_access_qualifier access)
 {
    vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
 
-   ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, 
cache_policy, false);
+   ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, 
access, false);
 }
 
 /**
@@ -2025,7 +2017,11 @@ LLVMValueRef ac_build_image_opcode(struct 
ac_llvm_context *ctx, struct ac_image_
 
    args[num_args++] = a->tfe ? ctx->i32_1 : ctx->i32_0; /* texfailctrl */
    args[num_args++] = LLVMConstInt(
-      ctx->i32, load ? get_load_cache_policy(ctx, a->cache_policy) : 
a->cache_policy, false);
+      ctx->i32, get_cache_flags(ctx,
+                                a->access |
+                                (atomic ? ACCESS_TYPE_ATOMIC :
+                                 load ? ACCESS_TYPE_LOAD : ACCESS_TYPE_STORE)),
+      false);
 
    const char *name;
    const char *atomic_subop = "";
diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h
index b80e86e5af0..840ab74b172 100644
--- a/src/amd/llvm/ac_llvm_build.h
+++ b/src/amd/llvm/ac_llvm_build.h
@@ -281,28 +281,28 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct 
ac_llvm_context *ctx,
 
 void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef 
rsrc, LLVMValueRef vdata,
                                  LLVMValueRef vindex, LLVMValueRef voffset, 
LLVMValueRef soffset,
-                                 unsigned cache_policy);
+                                 enum gl_access_qualifier access);
 
 void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef 
rsrc, LLVMValueRef data,
-                                  LLVMValueRef vindex, LLVMValueRef voffset, 
unsigned cache_policy);
+                                  LLVMValueRef vindex, LLVMValueRef voffset, 
enum gl_access_qualifier access);
 
 LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef 
rsrc, int num_channels,
                                   LLVMValueRef vindex, LLVMValueRef voffset, 
LLVMValueRef soffset,
-                                  LLVMTypeRef channel_type, unsigned 
cache_policy,
+                                  LLVMTypeRef channel_type, enum 
gl_access_qualifier access,
                                   bool can_speculate, bool allow_smem);
 
 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, 
LLVMValueRef rsrc,
                                          LLVMValueRef vindex, LLVMValueRef 
voffset,
-                                         unsigned num_channels, unsigned 
cache_policy,
+                                         unsigned num_channels, enum 
gl_access_qualifier access,
                                          bool can_speculate, bool d16, bool 
tfe);
 
 LLVMValueRef ac_build_buffer_load_short(struct ac_llvm_context *ctx, 
LLVMValueRef rsrc,
                                         LLVMValueRef voffset, LLVMValueRef 
soffset,
-                                        unsigned cache_policy);
+                                        enum gl_access_qualifier access);
 
 LLVMValueRef ac_build_buffer_load_byte(struct ac_llvm_context *ctx, 
LLVMValueRef rsrc,
                                        LLVMValueRef voffset, LLVMValueRef 
soffset,
-                                       unsigned cache_policy);
+                                       enum gl_access_qualifier access);
 
 LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, 
LLVMValueRef rsrc,
                                         LLVMValueRef vindex, LLVMValueRef 
voffset,
@@ -312,15 +312,15 @@ LLVMValueRef ac_build_safe_tbuffer_load(struct 
ac_llvm_context *ctx, LLVMValueRe
                                         unsigned align_offset,
                                         unsigned align_mul,
                                         unsigned num_channels,
-                                        unsigned cache_policy,
+                                        enum gl_access_qualifier access,
                                         bool can_speculate);
 
 void ac_build_buffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef 
rsrc,
                                  LLVMValueRef vdata, LLVMValueRef voffset, 
LLVMValueRef soffset,
-                                 unsigned cache_policy);
+                                 enum gl_access_qualifier access);
 
 void ac_build_buffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef 
rsrc, LLVMValueRef vdata,
-                                LLVMValueRef voffset, LLVMValueRef soffset, 
unsigned cache_policy);
+                                LLVMValueRef voffset, LLVMValueRef soffset, 
enum gl_access_qualifier access);
 
 void ac_set_range_metadata(struct ac_llvm_context *ctx, LLVMValueRef value, 
unsigned lo,
                            unsigned hi);
@@ -391,21 +391,12 @@ enum ac_atomic_op
    ac_atomic_fmax,
 };
 
-/* These cache policy bits match the definitions used by the LLVM intrinsics. 
*/
-enum ac_image_cache_policy
-{
-   ac_glc = 1 << 0,      /* per-CU cache control */
-   ac_slc = 1 << 1,      /* global L2 cache control */
-   ac_dlc = 1 << 2,      /* per-shader-array cache control */
-   ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store 
merging */
-};
-
 struct ac_image_args {
    enum ac_image_opcode opcode;
    enum ac_atomic_op atomic; /* for the ac_image_atomic opcode */
    enum ac_image_dim dim;
+   enum gl_access_qualifier access;
    unsigned dmask : 4;
-   unsigned cache_policy : 3;
    bool unorm : 1;
    bool level_zero : 1;
    bool d16 : 1;        /* GFX8+: data and return values are 16-bit */
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index fe887798ea4..97373ee5c62 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -1804,26 +1804,6 @@ static LLVMValueRef extract_vector_range(struct 
ac_llvm_context *ctx, LLVMValueR
    }
 }
 
-static unsigned get_cache_policy(struct ac_nir_context *ctx, enum 
gl_access_qualifier access,
-                                 bool may_store_unaligned)
-{
-   unsigned cache_policy = 0;
-
-   /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores.  All
-    * store opcodes not aligned to a dword are affected. The only way to
-    * get unaligned stores is through shader images.
-    */
-   if (((may_store_unaligned && ctx->ac.gfx_level == GFX6) ||
-        access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
-      cache_policy |= ac_glc;
-   }
-
-   if (access & ACCESS_NON_TEMPORAL)
-      cache_policy |= ac_slc | ac_glc;
-
-   return cache_policy;
-}
-
 static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, struct 
waterfall_context *wctx,
                                          const nir_intrinsic_instr *instr, 
nir_src src)
 {
@@ -1841,8 +1821,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, 
nir_intrinsic_instr *in
    LLVMValueRef src_data = get_src(ctx, instr->src[0]);
    int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
    unsigned writemask = nir_intrinsic_write_mask(instr);
-   enum gl_access_qualifier access = nir_intrinsic_access(instr);
-   unsigned cache_policy = get_cache_policy(ctx, access, false);
+   enum gl_access_qualifier access = ac_get_mem_access_flags(instr);
 
    struct waterfall_context wctx;
    LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, 
instr->src[1]);
@@ -1897,9 +1876,9 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, 
nir_intrinsic_instr *in
                             LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, 
false), "");
 
       if (num_bytes == 1) {
-         ac_build_buffer_store_byte(&ctx->ac, rsrc, data, offset, 
ctx->ac.i32_0, cache_policy);
+         ac_build_buffer_store_byte(&ctx->ac, rsrc, data, offset, 
ctx->ac.i32_0, access);
       } else if (num_bytes == 2) {
-         ac_build_buffer_store_short(&ctx->ac, rsrc, data, offset, 
ctx->ac.i32_0, cache_policy);
+         ac_build_buffer_store_short(&ctx->ac, rsrc, data, offset, 
ctx->ac.i32_0, access);
       } else {
          switch (num_bytes) {
          case 16: /* v4f32 */
@@ -1920,7 +1899,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, 
nir_intrinsic_instr *in
          data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
 
          ac_build_buffer_store_dword(&ctx->ac, rsrc, data, NULL, offset,
-                                     ctx->ac.i32_0, cache_policy);
+                                     ctx->ac.i32_0, access);
       }
    }
 
@@ -2066,11 +2045,16 @@ static LLVMValueRef visit_atomic_ssbo(struct 
ac_nir_context *ctx, nir_intrinsic_
          data = ac_to_float(&ctx->ac, data);
          return_type = LLVMTypeOf(data);
       }
+
+      unsigned cache_flags =
+         ac_get_hw_cache_flags(ctx->ac.gfx_level,
+                              ac_get_mem_access_flags(instr) | 
ACCESS_TYPE_ATOMIC).value;
+
       params[arg_count++] = data;
       params[arg_count++] = descriptor;
       params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
       params[arg_count++] = ctx->ac.i32_0;               /* soffset */
-      params[arg_count++] = ctx->ac.i32_0;               /* slc */
+      params[arg_count++] = LLVMConstInt(ctx->ac.i32, cache_flags, 0);
 
       ac_build_type_name_for_intr(return_type, type, sizeof(type));
       snprintf(name, sizeof(name), "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, 
type);
@@ -2095,8 +2079,7 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx, nir_intrinsic_
 
    int elem_size_bytes = instr->dest.ssa.bit_size / 8;
    int num_components = instr->num_components;
-   enum gl_access_qualifier access = nir_intrinsic_access(instr);
-   unsigned cache_policy = get_cache_policy(ctx, access, false);
+   enum gl_access_qualifier access = ac_get_mem_access_flags(instr);
 
    LLVMValueRef offset = get_src(ctx, instr->src[1]);
    LLVMValueRef rsrc = ctx->abi->load_ssbo ?
@@ -2122,16 +2105,16 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx, nir_intrinsic_
 
       if (load_bytes == 1) {
          ret = ac_build_buffer_load_byte(&ctx->ac, rsrc, voffset, 
ctx->ac.i32_0,
-                                          cache_policy);
+                                          access);
       } else if (load_bytes == 2) {
          ret = ac_build_buffer_load_short(&ctx->ac, rsrc, voffset, 
ctx->ac.i32_0,
-                                           cache_policy);
+                                           access);
       } else {
          int num_channels = util_next_power_of_two(load_bytes) / 4;
          bool can_speculate = access & ACCESS_CAN_REORDER;
 
          ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, 
voffset, ctx->ac.i32_0,
-                                    ctx->ac.f32, cache_policy, can_speculate, 
false);
+                                    ctx->ac.f32, access, can_speculate, false);
       }
 
       LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, 
ac_get_type_size(LLVMTypeOf(ret)));
@@ -2507,7 +2490,7 @@ static LLVMValueRef visit_image_load(struct 
ac_nir_context *ctx, const nir_intri
 
    struct ac_image_args args = {0};
 
-   args.cache_policy = get_cache_policy(ctx, access, false);
+   args.access = ac_get_mem_access_flags(instr);
    args.tfe = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
 
    if (dim == GLSL_SAMPLER_DIM_BUF) {
@@ -2523,7 +2506,7 @@ static LLVMValueRef visit_image_load(struct 
ac_nir_context *ctx, const nir_intri
       assert(instr->dest.is_ssa);
       bool can_speculate = access & ACCESS_CAN_REORDER;
       res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, 
num_channels,
-                                        args.cache_policy, can_speculate,
+                                        args.access, can_speculate,
                                         instr->dest.ssa.bit_size == 16,
                                         args.tfe);
       res = ac_build_expand(&ctx->ac, res, num_channels, args.tfe ? 5 : 4);
@@ -2588,14 +2571,13 @@ static void visit_image_store(struct ac_nir_context 
*ctx, const nir_intrinsic_in
    }
 
    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
-   enum gl_access_qualifier access = nir_intrinsic_access(instr);
    bool is_array = nir_intrinsic_image_array(instr);
 
    struct waterfall_context wctx;
    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
 
    struct ac_image_args args = {0};
-   args.cache_policy = get_cache_policy(ctx, access, true);
+   args.access = ac_get_mem_access_flags(instr);
 
    LLVMValueRef src = get_src(ctx, instr->src[3]);
    if (instr->src[3].ssa->bit_size == 64) {
@@ -2617,7 +2599,7 @@ static void visit_image_store(struct ac_nir_context *ctx, 
const nir_intrinsic_in
       vindex =
          LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), 
ctx->ac.i32_0, "");
 
-      ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, 
args.cache_policy);
+      ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, 
args.access);
    } else {
       bool level_zero = nir_src_is_const(instr->src[4]) && 
nir_src_as_uint(instr->src[4]) == 0;
 
@@ -2730,9 +2712,12 @@ static LLVMValueRef visit_image_atomic(struct 
ac_nir_context *ctx, const nir_int
       } else {
          LLVMTypeRef data_type = LLVMTypeOf(params[0]);
          char type[8];
+         unsigned cache_flags =
+            ac_get_hw_cache_flags(ctx->ac.gfx_level,
+                                 ac_get_mem_access_flags(instr) | 
ACCESS_TYPE_ATOMIC).value;
 
          params[param_count++] = ctx->ac.i32_0; /* soffset */
-         params[param_count++] = ctx->ac.i32_0; /* slc */
+         params[param_count++] = LLVMConstInt(ctx->ac.i32, cache_flags, 0);
 
          ac_build_type_name_for_intr(data_type, type, sizeof(type));
          length = snprintf(intrinsic_name, sizeof(intrinsic_name),
@@ -2752,6 +2737,7 @@ static LLVMValueRef visit_image_atomic(struct 
ac_nir_context *ctx, const nir_int
       args.resource = ctx->abi->load_sampler_desc(ctx->abi, dynamic_index, 
AC_DESC_IMAGE);
       get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
       args.dim = ac_get_image_dim(ctx->ac.gfx_level, dim, is_array);
+      args.access = ac_get_mem_access_flags(instr);
 
       result = ac_build_image_opcode(&ctx->ac, &args);
    }
@@ -3805,19 +3791,9 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, 
nir_intrinsic_instr *ins
       LLVMValueRef vidx = idxen ? get_src(ctx, instr->src[src_base + 3]) : 
NULL;
       unsigned num_components = instr->dest.ssa.num_components;
       unsigned const_offset = nir_intrinsic_base(instr);
-      bool swizzled = nir_intrinsic_access(instr) & ACCESS_IS_SWIZZLED_AMD;
       bool reorder = nir_intrinsic_can_reorder(instr);
-      bool coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT;
-      bool slc = nir_intrinsic_access(instr) & ACCESS_NON_TEMPORAL;
-      bool uses_format = nir_intrinsic_access(instr) & ACCESS_USES_FORMAT_AMD;
-
-      enum ac_image_cache_policy cache_policy = 0;
-      if (swizzled)
-         cache_policy |= ac_swizzled;
-      if (slc)
-         cache_policy |= ac_slc;
-      if (coherent)
-         cache_policy |= ac_glc;
+      enum gl_access_qualifier access = ac_get_mem_access_flags(instr);
+      bool uses_format = access & ACCESS_USES_FORMAT_AMD;
 
       LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr_voffset,
                                           LLVMConstInt(ctx->ac.i32, 
const_offset, 0), "");
@@ -3825,12 +3801,12 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, 
nir_intrinsic_instr *ins
       if (instr->intrinsic == nir_intrinsic_load_buffer_amd && uses_format) {
          assert(instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 
32);
          result = ac_build_buffer_load_format(&ctx->ac, descriptor, vidx, 
voffset, num_components,
-                                              cache_policy, reorder,
+                                              access, reorder,
                                               instr->dest.ssa.bit_size == 16, 
false);
          result = ac_to_integer(&ctx->ac, result);
       } else if (instr->intrinsic == nir_intrinsic_store_buffer_amd && 
uses_format) {
          assert(instr->src[0].ssa->bit_size == 16 || 
instr->src[0].ssa->bit_size == 32);
-         ac_build_buffer_store_format(&ctx->ac, descriptor, store_data, vidx, 
voffset, cache_policy);
+         ac_build_buffer_store_format(&ctx->ac, descriptor, store_data, vidx, 
voffset, access);
       } else if (instr->intrinsic == nir_intrinsic_load_buffer_amd ||
                  instr->intrinsic == nir_intrinsic_load_typed_buffer_amd) {
          /* LLVM is unable to select instructions for larger than 32-bit 
channel types.
@@ -3843,7 +3819,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, 
nir_intrinsic_instr *ins
 
          if (instr->intrinsic == nir_intrinsic_load_buffer_amd) {
             result = ac_build_buffer_load(&ctx->ac, descriptor, 
fetch_num_components, vidx, voffset,
-                                          addr_soffset, channel_type, 
cache_policy, reorder, false);
+                                          addr_soffset, channel_type, access, 
reorder, false);
          } else {
             const unsigned align_offset = nir_intrinsic_align_offset(instr);
             const unsigned align_mul = nir_intrinsic_align_mul(instr);
@@ -3854,7 +3830,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, 
nir_intrinsic_instr *ins
             result =
                ac_build_safe_tbuffer_load(&ctx->ac, descriptor, vidx, 
addr_voffset, addr_soffset,
                                           channel_type, vtx_info, 
const_offset, align_offset,
-                                          align_mul, fetch_num_components, 
cache_policy, reorder);
+                                          align_mul, fetch_num_components, 
access, reorder);
          }
 
          /* Trim to needed vector components. */
@@ -3884,7 +3860,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, 
nir_intrinsic_instr *ins
 
             LLVMValueRef data = extract_vector_range(&ctx->ac, store_data, 
start, count);
             ac_build_buffer_store_dword(&ctx->ac, descriptor, data, vidx, 
voffset, addr_soffset,
-                                        cache_policy);
+                                        access);
          }
       }
       break;
@@ -3933,12 +3909,15 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, 
nir_intrinsic_instr *ins
       LLVMValueRef data = get_src(ctx, instr->src[1]);
       unsigned base = nir_intrinsic_base(instr);
       LLVMTypeRef return_type = LLVMTypeOf(data);
+      unsigned cache_flags =
+         ac_get_hw_cache_flags(ctx->ac.gfx_level,
+                              ac_get_mem_access_flags(instr) | 
ACCESS_TYPE_ATOMIC).value;
 
       LLVMValueRef args[] = {
          data, desc,
          LLVMConstInt(ctx->ac.i32, base, false),
          ctx->ac.i32_0, /* soffset */
-         ctx->ac.i32_0, /* cachepolicy */
+         LLVMConstInt(ctx->ac.i32, cache_flags, 0),
       };
 
       char name[64], type[8];
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
index 53fb4991594..a2dad78a78c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@@ -372,7 +372,8 @@ static void si_write_tess_factors(struct si_shader_context 
*ctx, union si_shader
       ac_build_ifcc(&ctx->ac,
                     LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, 
ctx->ac.i32_0, ""), 6504);
       ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 
0x80000000, 0),
-                                  NULL, LLVMConstInt(ctx->ac.i32, offset, 0), 
tf_base, ac_glc);
+                                  NULL, LLVMConstInt(ctx->ac.i32, offset, 0), 
tf_base,
+                                  ACCESS_COHERENT);
       ac_build_endif(&ctx->ac, 6504);
       offset += 4;
    }
@@ -381,13 +382,13 @@ static void si_write_tess_factors(struct 
si_shader_context *ctx, union si_shader
    ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, NULL,
                                LLVMBuildAdd(ctx->ac.builder, byteoffset,
                                             LLVMConstInt(ctx->ac.i32, offset, 
0), ""),
-                               tf_base, ac_glc);
+                               tf_base, ACCESS_COHERENT);
    offset += 16;
    if (vec1)
       ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, NULL,
                                   LLVMBuildAdd(ctx->ac.builder, byteoffset,
                                                LLVMConstInt(ctx->ac.i32, 
offset, 0), ""),
-                                  tf_base, ac_glc);
+                                  tf_base, ACCESS_COHERENT);
 
    /* Store the tess factors into the offchip buffer if TES reads them. */
    if (shader->key.ge.part.tcs.epilog.tes_reads_tess_factors) {
@@ -405,7 +406,7 @@ static void si_write_tess_factors(struct si_shader_context 
*ctx, union si_shader
       outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_comps);
 
       ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, NULL, 
tf_outer_offset,
-                                  base, ac_glc);
+                                  base, ACCESS_COHERENT);
       if (inner_comps) {
          param_inner = 
si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);
          tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
@@ -413,7 +414,7 @@ static void si_write_tess_factors(struct si_shader_context 
*ctx, union si_shader
 
          inner_vec = ac_build_gather_values(&ctx->ac, inner, inner_comps);
          ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, NULL,
-                                     tf_inner_offset, base, ac_glc);
+                                     tf_inner_offset, base, ACCESS_COHERENT);
       }
    }
 


Reply via email to