Module: Mesa Branch: main Commit: 2f0bb39e1621ab610cd6ca788635c64320917404 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f0bb39e1621ab610cd6ca788635c64320917404
Author: Rhys Perry <[email protected]> Date: Fri Apr 22 16:46:21 2022 +0100 aco: ensure that definitions fixed to operands have matching regclasses If the operand is not killed, the definition needs to be large enough so that the new location for the operand does not intersect with the old location. Fixes with zink: KHR-GL45.shader_image_load_store.basic-allTargets-atomicCS KHR-GL45.shader_image_load_store.basic-allTargets-atomicGS KHR-GL45.shader_image_load_store.basic-allTargets-atomicVS Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Gitlab: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6276 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16106> --- src/amd/compiler/aco_instruction_selection.cpp | 23 ++++++++++++++++++----- src/amd/compiler/aco_register_allocation.cpp | 12 +++++++++++- src/gallium/drivers/zink/ci/zink-radv-fails.txt | 3 --- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1fe28c82657..540886ba2eb 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6184,10 +6184,11 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) Builder bld(ctx->program, ctx->block); Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[3].ssa)); + bool cmpswap = instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap; bool is_64bit = data.bytes() == 8; assert((data.bytes() == 4 || data.bytes() == 8) && "only 32/64-bit image atomics implemented."); - if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap) + if (cmpswap) data = bld.pseudo(aco_opcode::p_create_vector, bld.def(is_64bit ? v4 : v2), get_ssa_temp(ctx, instr->src[4].ssa), data); @@ -6272,8 +6273,10 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) mubuf->operands[1] = Operand(vindex); mubuf->operands[2] = Operand::c32(0); mubuf->operands[3] = Operand(data); + Definition def = + return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); if (return_previous) - mubuf->definitions[0] = Definition(dst); + mubuf->definitions[0] = def; mubuf->offset = 0; mubuf->idxen = true; mubuf->glc = return_previous; @@ -6282,12 +6285,15 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) mubuf->sync = sync; ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(mubuf)); + if (return_previous && cmpswap) + bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero()); return; } std::vector<Temp> coords = get_image_coords(ctx, instr); Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); - Definition def = return_previous ? Definition(dst) : Definition(); + Definition def = + return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); MIMG_instruction* mimg = emit_mimg(bld, image_op, def, resource, Operand(s4), coords, 0, Operand(data)); mimg->glc = return_previous; @@ -6299,6 +6305,8 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) mimg->disable_wqm = true; mimg->sync = sync; ctx->program->needs_exact = true; + if (return_previous && cmpswap) + bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero()); return; } @@ -6481,8 +6489,9 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) Builder bld(ctx->program, ctx->block); bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa); Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa)); + bool cmpswap = instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap; - if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) + if (cmpswap) data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2), get_ssa_temp(ctx, instr->src[3].ssa), data); @@ -6552,8 +6561,10 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1); mubuf->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0); mubuf->operands[3] = Operand(data); + Definition def = + return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); if (return_previous) - mubuf->definitions[0] = Definition(dst); + mubuf->definitions[0] = def; mubuf->offset = 0; mubuf->offen = (offset.type() == RegType::vgpr); mubuf->glc = return_previous; @@ -6562,6 +6573,8 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw); ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(mubuf)); + if (return_previous && cmpswap) + bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero()); } void diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 61e384dbdaa..108dfd04b8c 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2711,7 +2711,12 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra } } - /* handle definitions which must have the same register as an operand */ + /* Handle definitions which must have the same register as an operand. + * We expect that the definition has the same size as the operand, otherwise the new + * location for the operand (if it's not killed) might intersect with the old one. + * We can't read from the old location because it's corrupted, and we can't write the new + * location because that's used by a live-through operand. + */ if (instr->opcode == aco_opcode::v_interp_p2_f32 || instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 || @@ -2721,15 +2726,20 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra instr->opcode == aco_opcode::v_writelane_b32 || instr->opcode == aco_opcode::v_writelane_b32_e64 || instr->opcode == aco_opcode::v_dot4c_i32_i8) { + assert(instr->definitions[0].bytes() == instr->operands[2].bytes() || + instr->operands[2].regClass() == v1); instr->definitions[0].setFixed(instr->operands[2].physReg()); } else if (instr->opcode == aco_opcode::s_addk_i32 || instr->opcode == aco_opcode::s_mulk_i32) { + assert(instr->definitions[0].bytes() == instr->operands[0].bytes()); instr->definitions[0].setFixed(instr->operands[0].physReg()); } else if (instr->isMUBUF() && instr->definitions.size() == 1 && instr->operands.size() == 4) { + assert(instr->definitions[0].bytes() == instr->operands[3].bytes()); instr->definitions[0].setFixed(instr->operands[3].physReg()); } else if (instr->isMIMG() && instr->definitions.size() == 1 && !instr->operands[2].isUndefined()) { + assert(instr->definitions[0].bytes() == instr->operands[2].bytes()); instr->definitions[0].setFixed(instr->operands[2].physReg()); } diff --git a/src/gallium/drivers/zink/ci/zink-radv-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-fails.txt index 33c55926244..fe90384874d 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-fails.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-fails.txt @@ -1,7 +1,4 @@ # probable ACO bug: #6276 -KHR-GL46.shader_image_load_store.basic-allTargets-atomicCS,Fail -KHR-GL46.shader_image_load_store.basic-allTargets-atomicGS,Fail -KHR-GL46.shader_image_load_store.basic-allTargets-atomicVS,Fail KHR-GL46.shader_storage_buffer_object.basic-operations-case1-cs,Fail KHR-GL46.shader_storage_buffer_object.basic-operations-case1-vs,Fail
