https://gcc.gnu.org/g:3760f09cafa8db5c42a8dc3193839c25a4994a46

commit 3760f09cafa8db5c42a8dc3193839c25a4994a46
Author: Andrew Stubbs <a...@codesourcery.com>
Date:   Mon Mar 6 12:42:44 2023 +0000

    amdgcn: gather/scatter with DImode offsets
    
    The GPU architecture requires SImode offsets on gather/scatter instructions,
    but they can also take a vector of absolute addresses, so this allows
    gather/scatter in more situations.
    
    gcc/ChangeLog:
    
            * config/gcn/gcn-valu.md (gather_load<mode><vndi>): New.
            (scatter_store<mode><vndi>): New.
            (mask_gather_load<mode><vndi>): New.
            (mask_scatter_store<mode><vndi>): New.

Diff:
---
 gcc/ChangeLog.omp          |   7 +++
 gcc/config/gcn/gcn-valu.md | 123 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 130 insertions(+)

diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 66d27e6ab7c..3f6b3e44e02 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,10 @@
+2023-03-17  Andrew Stubbs  <a...@codesourcery.com>
+
+       * config/gcn/gcn-valu.md (gather_load<mode><vndi>): New.
+       (scatter_store<mode><vndi>): New.
+       (mask_gather_load<mode><vndi>): New.
+       (mask_scatter_store<mode><vndi>): New.
+
 2024-05-10  Thomas Schwinge  <tschwi...@baylibre.com>
 
        * config/nvptx/nvptx.md (nvptx_uniform_warp_check): Make fit for
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index e8381d28c1b..15fdeb7d1ea 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1111,6 +1111,34 @@
 ;;
 ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
 
+(define_expand "gather_load<mode><vndi>"
+  [(match_operand:V_ALL 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:<VnDI> 2 "register_operand")
+   (match_operand 3 "immediate_operand")
+   (match_operand:SI 4 "gcn_alu_operand")]
+  ""
+  {
+    rtx vec_base = gen_reg_rtx (<VnDI>mode);
+    rtx addr = gen_reg_rtx (<VnDI>mode);
+    rtx multiplier = gen_reg_rtx (<VnDI>mode);
+    rtx offsets = gen_reg_rtx (<VnDI>mode);
+
+    if (CONST_INT_P (operands[4]) && INTVAL (operands[4]) != 1)
+      {
+       emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[4]));
+       emit_insn (gen_mul<vndi>3 (offsets, operands[2], multiplier));
+      }
+    else
+      offsets = operands[2];
+    emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[1]));
+    emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+    emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
+                                             const0_rtx, const0_rtx));
+    DONE;
+  })
+
 (define_expand "gather_load<mode><vnsi>"
   [(match_operand:V_MOV 0 "register_operand")
    (match_operand:DI 1 "register_operand")
@@ -1244,6 +1272,34 @@
    (set_attr "gcn_version" "*,cdna2,*,cdna2")
    (set_attr "xnack" "off,off,on,on")])
 
+(define_expand "scatter_store<mode><vndi>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:<VnDI> 1 "register_operand")
+   (match_operand 2 "immediate_operand")
+   (match_operand:SI 3 "gcn_alu_operand")
+   (match_operand:V_ALL 4 "register_operand")]
+  ""
+  {
+    rtx vec_base = gen_reg_rtx (<VnDI>mode);
+    rtx addr = gen_reg_rtx (<VnDI>mode);
+    rtx multiplier = gen_reg_rtx (<VnDI>mode);
+    rtx offsets = gen_reg_rtx (<VnDI>mode);
+
+    if (CONST_INT_P (operands[3]) && INTVAL (operands[3]) != 1)
+      {
+       emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[3]));
+       emit_insn (gen_mul<vndi>3 (offsets, operands[1], multiplier));
+      }
+    else
+      offsets = operands[1];
+    emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[0]));
+    emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+    emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
+                                              const0_rtx, const0_rtx));
+    DONE;
+  })
+
 (define_expand "scatter_store<mode><vnsi>"
   [(match_operand:DI 0 "register_operand")
    (match_operand:<VnSI> 1 "register_operand")
@@ -4034,6 +4090,41 @@
     DONE;
   })
 
+(define_expand "mask_gather_load<mode><vndi>"
+  [(match_operand:V_ALL 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:<VnDI> 2 "register_operand")
+   (match_operand 3 "immediate_operand")
+   (match_operand:SI 4 "gcn_alu_operand")
+   (match_operand:DI 5 "")]
+  ""
+  {
+    rtx vec_base = gen_reg_rtx (<VnDI>mode);
+    rtx addr = gen_reg_rtx (<VnDI>mode);
+    rtx multiplier = gen_reg_rtx (<VnDI>mode);
+    rtx offsets = gen_reg_rtx (<VnDI>mode);
+    rtx exec = force_reg (DImode, operands[5]);
+
+    if (CONST_INT_P (operands[4]) && INTVAL (operands[4]) != 1)
+      {
+       emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[4]));
+       emit_insn (gen_mul<vndi>3 (offsets, operands[2], multiplier));
+      }
+    else
+      offsets = operands[2];
+    emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[1]));
+    emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+    /* Masked lanes are required to hold zero.  */
+    emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
+
+    emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+                                                  const0_rtx, const0_rtx,
+                                                  const0_rtx, operands[0],
+                                                  exec));
+    DONE;
+  })
+
 (define_expand "mask_gather_load<mode><vnsi>"
   [(match_operand:V_MOV 0 "register_operand")
    (match_operand:DI 1 "register_operand")
@@ -4065,6 +4156,38 @@
     DONE;
   })
 
+(define_expand "mask_scatter_store<mode><vndi>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:<VnDI> 1 "register_operand")
+   (match_operand 2 "immediate_operand")
+   (match_operand:DI 3 "gcn_alu_operand")
+   (match_operand:V_ALL 4 "register_operand")
+   (match_operand:DI 5 "")]
+  ""
+  {
+    rtx vec_base = gen_reg_rtx (<VnDI>mode);
+    rtx addr = gen_reg_rtx (<VnDI>mode);
+    rtx multiplier = gen_reg_rtx (<VnDI>mode);
+    rtx offsets = gen_reg_rtx (<VnDI>mode);
+    rtx exec = force_reg (DImode, operands[5]);
+ 
+    if (CONST_INT_P (operands[3]) && INTVAL (operands[3]) != 1)
+      {
+       emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[3]));
+       emit_insn (gen_mul<vndi>3 (offsets, operands[1], multiplier));
+      }
+    else
+      offsets = operands[1];
+    emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[0]));
+    emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+    emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
+                                                   operands[4], const0_rtx,
+                                                   const0_rtx,
+                                                   exec));
+    DONE;
+  })
+
 (define_expand "mask_scatter_store<mode><vnsi>"
   [(match_operand:DI 0 "register_operand")
    (match_operand:<VnSI> 1 "register_operand")

Reply via email to