Module: Mesa Branch: main Commit: 66b3d3463316444210f62cf923e48f605116bad1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=66b3d3463316444210f62cf923e48f605116bad1
Author: Iago Toral Quiroga <[email protected]> Date: Wed May 10 09:11:06 2023 +0200 broadcom/compiler: use unified atomics Reviewed-by: Alyssa Rosenzweig <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22939> --- src/broadcom/compiler/nir_to_vir.c | 129 +++++++-------------- src/broadcom/compiler/v3d40_tex.c | 54 +++++---- .../compiler/v3d_nir_lower_robust_access.c | 35 +----- src/broadcom/compiler/vir.c | 3 + 4 files changed, 82 insertions(+), 139 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index d2badf0a74a..53973e52e5f 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -186,6 +186,28 @@ v3d_get_op_for_atomic_add(nir_intrinsic_instr *instr, unsigned src) return V3D_TMU_OP_WRITE_ADD_READ_PREFETCH; } +static uint32_t +v3d_general_tmu_op_for_atomic(nir_intrinsic_instr *instr) +{ + nir_atomic_op atomic_op = nir_intrinsic_atomic_op(instr); + switch (atomic_op) { + case nir_atomic_op_iadd: + return instr->intrinsic == nir_intrinsic_ssbo_atomic ? + v3d_get_op_for_atomic_add(instr, 2) : + v3d_get_op_for_atomic_add(instr, 1); + case nir_atomic_op_imin: return V3D_TMU_OP_WRITE_SMIN; + case nir_atomic_op_umin: return V3D_TMU_OP_WRITE_UMIN_FULL_L1_CLEAR; + case nir_atomic_op_imax: return V3D_TMU_OP_WRITE_SMAX; + case nir_atomic_op_umax: return V3D_TMU_OP_WRITE_UMAX; + case nir_atomic_op_iand: return V3D_TMU_OP_WRITE_AND_READ_INC; + case nir_atomic_op_ior: return V3D_TMU_OP_WRITE_OR_READ_DEC; + case nir_atomic_op_ixor: return V3D_TMU_OP_WRITE_XOR_READ_NOT; + case nir_atomic_op_xchg: return V3D_TMU_OP_WRITE_XCHG_READ_FLUSH; + case nir_atomic_op_cmpxchg: return V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH; + default: unreachable("unknown atomic op"); + } +} + static uint32_t v3d_general_tmu_op(nir_intrinsic_instr *instr) { @@ -201,47 +223,15 @@ v3d_general_tmu_op(nir_intrinsic_instr *instr) case nir_intrinsic_store_scratch: case nir_intrinsic_store_global_2x32: return V3D_TMU_OP_REGULAR; - case nir_intrinsic_ssbo_atomic_add: - return v3d_get_op_for_atomic_add(instr, 2); - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_global_atomic_add_2x32: - return v3d_get_op_for_atomic_add(instr, 1); - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_global_atomic_imin_2x32: - case nir_intrinsic_shared_atomic_imin: - return V3D_TMU_OP_WRITE_SMIN; - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_global_atomic_umin_2x32: - case nir_intrinsic_shared_atomic_umin: - return V3D_TMU_OP_WRITE_UMIN_FULL_L1_CLEAR; - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_global_atomic_imax_2x32: - case nir_intrinsic_shared_atomic_imax: - return V3D_TMU_OP_WRITE_SMAX; - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_global_atomic_umax_2x32: - case nir_intrinsic_shared_atomic_umax: - return V3D_TMU_OP_WRITE_UMAX; - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_global_atomic_and_2x32: - case nir_intrinsic_shared_atomic_and: - return V3D_TMU_OP_WRITE_AND_READ_INC; - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_global_atomic_or_2x32: - case nir_intrinsic_shared_atomic_or: - return V3D_TMU_OP_WRITE_OR_READ_DEC; - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_global_atomic_xor_2x32: - case nir_intrinsic_shared_atomic_xor: - return V3D_TMU_OP_WRITE_XOR_READ_NOT; - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_global_atomic_exchange_2x32: - case nir_intrinsic_shared_atomic_exchange: - return V3D_TMU_OP_WRITE_XCHG_READ_FLUSH; - case nir_intrinsic_ssbo_atomic_comp_swap: - case nir_intrinsic_global_atomic_comp_swap_2x32: - case nir_intrinsic_shared_atomic_comp_swap: - return V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH; + + case nir_intrinsic_ssbo_atomic: + case nir_intrinsic_ssbo_atomic_swap: + case nir_intrinsic_shared_atomic: + case nir_intrinsic_shared_atomic_swap: + case nir_intrinsic_global_atomic_2x32: + case nir_intrinsic_global_atomic_swap_2x32: + return v3d_general_tmu_op_for_atomic(instr); + default: unreachable("unknown intrinsic op"); } @@ -514,11 +504,12 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr, * amount to add/sub, as that is implicit. */ bool atomic_add_replaced = - ((instr->intrinsic == nir_intrinsic_ssbo_atomic_add || - instr->intrinsic == nir_intrinsic_shared_atomic_add || - instr->intrinsic == nir_intrinsic_global_atomic_add_2x32) && + (instr->intrinsic == nir_intrinsic_ssbo_atomic || + instr->intrinsic == nir_intrinsic_shared_atomic || + instr->intrinsic == nir_intrinsic_global_atomic_2x32) && + nir_intrinsic_atomic_op(instr) == nir_atomic_op_iadd && (tmu_op == V3D_TMU_OP_WRITE_AND_READ_INC || - tmu_op == V3D_TMU_OP_WRITE_OR_READ_DEC)); + tmu_op == V3D_TMU_OP_WRITE_OR_READ_DEC); bool is_store = (instr->intrinsic == nir_intrinsic_store_ssbo || instr->intrinsic == nir_intrinsic_store_scratch || @@ -3330,44 +3321,20 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) } break; - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: case nir_intrinsic_store_ssbo: + case nir_intrinsic_ssbo_atomic: + case nir_intrinsic_ssbo_atomic_swap: ntq_emit_tmu_general(c, instr, false, false); break; - case nir_intrinsic_global_atomic_add_2x32: - case nir_intrinsic_global_atomic_imin_2x32: - case nir_intrinsic_global_atomic_umin_2x32: - case nir_intrinsic_global_atomic_imax_2x32: - case nir_intrinsic_global_atomic_umax_2x32: - case nir_intrinsic_global_atomic_and_2x32: - case nir_intrinsic_global_atomic_or_2x32: - case nir_intrinsic_global_atomic_xor_2x32: - case nir_intrinsic_global_atomic_exchange_2x32: - case nir_intrinsic_global_atomic_comp_swap_2x32: case nir_intrinsic_store_global_2x32: + case nir_intrinsic_global_atomic_2x32: + case nir_intrinsic_global_atomic_swap_2x32: ntq_emit_tmu_general(c, instr, false, true); break; - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic: + case nir_intrinsic_shared_atomic_swap: case nir_intrinsic_store_shared: case nir_intrinsic_store_scratch: ntq_emit_tmu_general(c, instr, true, false); @@ -3380,16 +3347,8 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_image_store: - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_imin: - case nir_intrinsic_image_atomic_umin: - case nir_intrinsic_image_atomic_imax: - case nir_intrinsic_image_atomic_umax: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_atomic: + case nir_intrinsic_image_atomic_swap: v3d40_vir_emit_image_load_store(c, instr); break; diff --git a/src/broadcom/compiler/v3d40_tex.c b/src/broadcom/compiler/v3d40_tex.c index dab7e477204..a20ee110a23 100644 --- a/src/broadcom/compiler/v3d40_tex.c +++ b/src/broadcom/compiler/v3d40_tex.c @@ -399,6 +399,25 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) p0_unpacked.return_words_of_texture_data); } +static uint32_t +v3d40_image_atomic_tmu_op(nir_intrinsic_instr *instr) +{ + nir_atomic_op atomic_op = nir_intrinsic_atomic_op(instr); + switch (atomic_op) { + case nir_atomic_op_iadd: return v3d_get_op_for_atomic_add(instr, 3); + case nir_atomic_op_imin: return V3D_TMU_OP_WRITE_SMIN; + case nir_atomic_op_umin: return V3D_TMU_OP_WRITE_UMIN_FULL_L1_CLEAR; + case nir_atomic_op_imax: return V3D_TMU_OP_WRITE_SMAX; + case nir_atomic_op_umax: return V3D_TMU_OP_WRITE_UMAX; + case nir_atomic_op_iand: return V3D_TMU_OP_WRITE_AND_READ_INC; + case nir_atomic_op_ior: return V3D_TMU_OP_WRITE_OR_READ_DEC; + case nir_atomic_op_ixor: return V3D_TMU_OP_WRITE_XOR_READ_NOT; + case nir_atomic_op_xchg: return V3D_TMU_OP_WRITE_XCHG_READ_FLUSH; + case nir_atomic_op_cmpxchg: return V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH; + default: unreachable("unknown atomic op"); + } +} + static uint32_t v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr) { @@ -406,26 +425,11 @@ v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr) case nir_intrinsic_image_load: case nir_intrinsic_image_store: return V3D_TMU_OP_REGULAR; - case nir_intrinsic_image_atomic_add: - return v3d_get_op_for_atomic_add(instr, 3); - case nir_intrinsic_image_atomic_imin: - return V3D_TMU_OP_WRITE_SMIN; - case nir_intrinsic_image_atomic_umin: - return V3D_TMU_OP_WRITE_UMIN_FULL_L1_CLEAR; - case nir_intrinsic_image_atomic_imax: - return V3D_TMU_OP_WRITE_SMAX; - case nir_intrinsic_image_atomic_umax: - return V3D_TMU_OP_WRITE_UMAX; - case nir_intrinsic_image_atomic_and: - return V3D_TMU_OP_WRITE_AND_READ_INC; - case nir_intrinsic_image_atomic_or: - return V3D_TMU_OP_WRITE_OR_READ_DEC; - case nir_intrinsic_image_atomic_xor: - return V3D_TMU_OP_WRITE_XOR_READ_NOT; - case nir_intrinsic_image_atomic_exchange: - return V3D_TMU_OP_WRITE_XCHG_READ_FLUSH; - case nir_intrinsic_image_atomic_comp_swap: - return V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH; + + case nir_intrinsic_image_atomic: + case nir_intrinsic_image_atomic_swap: + return v3d40_image_atomic_tmu_op(instr); + default: unreachable("unknown image intrinsic"); }; @@ -496,7 +500,8 @@ vir_image_emit_register_writes(struct v3d_compile *c, } /* Second atomic argument */ - if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap) { + if (instr->intrinsic == nir_intrinsic_image_atomic_swap && + nir_intrinsic_atomic_op(instr) == nir_atomic_op_cmpxchg) { struct qreg src_4_0 = ntq_get_src(c, instr->src[4], 0); vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUD, src_4_0, tmu_writes); @@ -568,9 +573,10 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, * amount to add/sub, as that is implicit. */ bool atomic_add_replaced = - (instr->intrinsic == nir_intrinsic_image_atomic_add && - (p2_unpacked.op == V3D_TMU_OP_WRITE_AND_READ_INC || - p2_unpacked.op == V3D_TMU_OP_WRITE_OR_READ_DEC)); + instr->intrinsic == nir_intrinsic_image_atomic && + nir_intrinsic_atomic_op(instr) == nir_atomic_op_iadd && + (p2_unpacked.op == V3D_TMU_OP_WRITE_AND_READ_INC || + p2_unpacked.op == V3D_TMU_OP_WRITE_OR_READ_DEC); uint32_t p0_packed; V3D41_TMU_CONFIG_PARAMETER_0_pack(NULL, diff --git a/src/broadcom/compiler/v3d_nir_lower_robust_access.c b/src/broadcom/compiler/v3d_nir_lower_robust_access.c index e4455574a60..50fd8196304 100644 --- a/src/broadcom/compiler/v3d_nir_lower_robust_access.c +++ b/src/broadcom/compiler/v3d_nir_lower_robust_access.c @@ -165,16 +165,7 @@ lower_buffer_instr(nir_builder *b, nir_instr *instr, void *_state) return true; } return false; - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic: if (c->key->robust_storage_access) { lower_buffer_atomic(c, b, intr); return true; @@ -182,16 +173,8 @@ lower_buffer_instr(nir_builder *b, nir_instr *instr, void *_state) return false; case nir_intrinsic_store_shared: case nir_intrinsic_load_shared: - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic: + case nir_intrinsic_shared_atomic_swap: if (robust_shared_enabled) { lower_buffer_shared(c, b, intr); return true; @@ -295,16 +278,8 @@ lower_image_instr(nir_builder *b, nir_instr *instr, void *_state) switch (intr->intrinsic) { case nir_intrinsic_image_load: case nir_intrinsic_image_store: - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_imin: - case nir_intrinsic_image_atomic_umin: - case nir_intrinsic_image_atomic_imax: - case nir_intrinsic_image_atomic_umax: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_atomic: + case nir_intrinsic_image_atomic_swap: lower_image(c, b, intr); return true; default: diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 5355645085d..08ac97f6f48 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -617,6 +617,9 @@ type_size_vec4(const struct glsl_type *type, bool bindless) static void v3d_lower_nir(struct v3d_compile *c) { + /* FIXME: drop once GLSL/SPIR-V produce the new intrinsics. */ + NIR_PASS(_, c->s, nir_lower_legacy_atomics); + struct nir_lower_tex_options tex_options = { .lower_txd = true, .lower_tg4_broadcom_swizzle = true,
