https://gcc.gnu.org/g:3760f09cafa8db5c42a8dc3193839c25a4994a46
commit 3760f09cafa8db5c42a8dc3193839c25a4994a46 Author: Andrew Stubbs <a...@codesourcery.com> Date: Mon Mar 6 12:42:44 2023 +0000 amdgcn: gather/scatter with DImode offsets The GPU architecture requires SImode offsets on gather/scatter instructions, but they can also take a vector of absolute addresses, so this allows gather/scatter in more situations. gcc/ChangeLog: * config/gcn/gcn-valu.md (gather_load<mode><vndi>): New. (scatter_store<mode><vndi>): New. (mask_gather_load<mode><vndi>): New. (mask_scatter_store<mode><vndi>): New. Diff: --- gcc/ChangeLog.omp | 7 +++ gcc/config/gcn/gcn-valu.md | 123 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index 66d27e6ab7c..3f6b3e44e02 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,3 +1,10 @@ +2023-03-17 Andrew Stubbs <a...@codesourcery.com> + + * config/gcn/gcn-valu.md (gather_load<mode><vndi>): New. + (scatter_store<mode><vndi>): New. + (mask_gather_load<mode><vndi>): New. + (mask_scatter_store<mode><vndi>): New. + 2024-05-10 Thomas Schwinge <tschwi...@baylibre.com> * config/nvptx/nvptx.md (nvptx_uniform_warp_check): Make fit for diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index e8381d28c1b..15fdeb7d1ea 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -1111,6 +1111,34 @@ ;; ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on +(define_expand "gather_load<mode><vndi>" + [(match_operand:V_ALL 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:<VnDI> 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand")] + "" + { + rtx vec_base = gen_reg_rtx (<VnDI>mode); + rtx addr = gen_reg_rtx (<VnDI>mode); + rtx multiplier = gen_reg_rtx (<VnDI>mode); + rtx offsets = gen_reg_rtx (<VnDI>mode); + + if (CONST_INT_P (operands[4]) && INTVAL (operands[4]) != 1) + { + emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[4])); + emit_insn (gen_mul<vndi>3 (offsets, operands[2], multiplier)); + } + else + offsets = operands[2]; + emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[1])); + emit_insn (gen_add<vndi>3 (addr, vec_base, offsets)); + + emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx, + const0_rtx, const0_rtx)); + DONE; + }) + (define_expand "gather_load<mode><vnsi>" [(match_operand:V_MOV 0 "register_operand") (match_operand:DI 1 "register_operand") @@ -1244,6 +1272,34 @@ (set_attr "gcn_version" "*,cdna2,*,cdna2") (set_attr "xnack" "off,off,on,on")]) +(define_expand "scatter_store<mode><vndi>" + [(match_operand:DI 0 "register_operand") + (match_operand:<VnDI> 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:V_ALL 4 "register_operand")] + "" + { + rtx vec_base = gen_reg_rtx (<VnDI>mode); + rtx addr = gen_reg_rtx (<VnDI>mode); + rtx multiplier = gen_reg_rtx (<VnDI>mode); + rtx offsets = gen_reg_rtx (<VnDI>mode); + + if (CONST_INT_P (operands[3]) && INTVAL (operands[3]) != 1) + { + emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[3])); + emit_insn (gen_mul<vndi>3 (offsets, operands[1], multiplier)); + } + else + offsets = operands[1]; + emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[0])); + emit_insn (gen_add<vndi>3 (addr, vec_base, offsets)); + + emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4], + const0_rtx, const0_rtx)); + DONE; + }) + (define_expand "scatter_store<mode><vnsi>" [(match_operand:DI 0 "register_operand") (match_operand:<VnSI> 1 "register_operand") @@ -4034,6 +4090,41 @@ DONE; }) +(define_expand "mask_gather_load<mode><vndi>" + [(match_operand:V_ALL 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:<VnDI> 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand") + (match_operand:DI 5 "")] + "" + { + rtx vec_base = gen_reg_rtx (<VnDI>mode); + rtx addr = gen_reg_rtx (<VnDI>mode); + rtx multiplier = gen_reg_rtx (<VnDI>mode); + rtx offsets = gen_reg_rtx (<VnDI>mode); + rtx exec = force_reg (DImode, operands[5]); + + if (CONST_INT_P (operands[4]) && INTVAL (operands[4]) != 1) + { + emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[4])); + emit_insn (gen_mul<vndi>3 (offsets, operands[2], multiplier)); + } + else + offsets = operands[2]; + emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[1])); + emit_insn (gen_add<vndi>3 (addr, vec_base, offsets)); + + /* Masked lanes are required to hold zero. */ + emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0)); + + emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr, + const0_rtx, const0_rtx, + const0_rtx, operands[0], + exec)); + DONE; + }) + (define_expand "mask_gather_load<mode><vnsi>" [(match_operand:V_MOV 0 "register_operand") (match_operand:DI 1 "register_operand") @@ -4065,6 +4156,38 @@ DONE; }) +(define_expand "mask_scatter_store<mode><vndi>" + [(match_operand:DI 0 "register_operand") + (match_operand:<VnDI> 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:DI 3 "gcn_alu_operand") + (match_operand:V_ALL 4 "register_operand") + (match_operand:DI 5 "")] + "" + { + rtx vec_base = gen_reg_rtx (<VnDI>mode); + rtx addr = gen_reg_rtx (<VnDI>mode); + rtx multiplier = gen_reg_rtx (<VnDI>mode); + rtx offsets = gen_reg_rtx (<VnDI>mode); + rtx exec = force_reg (DImode, operands[5]); + + if (CONST_INT_P (operands[3]) && INTVAL (operands[3]) != 1) + { + emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[3])); + emit_insn (gen_mul<vndi>3 (offsets, operands[1], multiplier)); + } + else + offsets = operands[1]; + emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[0])); + emit_insn (gen_add<vndi>3 (addr, vec_base, offsets)); + + emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx, + operands[4], const0_rtx, + const0_rtx, + exec)); + DONE; + }) + (define_expand "mask_scatter_store<mode><vnsi>" [(match_operand:DI 0 "register_operand") (match_operand:<VnSI> 1 "register_operand")