Add new variant of he gather_load and scatter_store instructions that take the
offsets in DImode.  This is not the natural width for offsets in the
instruction set, but we can use them to compute a vector of absolute addresses,
which does work.

This enables the autovectorizer to use gather/scatter in a number of additional
scenarios (one of which shows up in the SPEC HPC lbm benchmark).

gcc/ChangeLog:

        * config/gcn/gcn-valu.md (gather_load<mode><vndi>): New.
        (scatter_store<mode><vndi>): New.
        (mask_gather_load<mode><vndi>): New.
        (mask_scatter_store<mode><vndi>): New.
        * config/gcn/gcn.cc (gcn_expand_scaled_offsets): Support DImode.
---
 gcc/config/gcn/gcn-valu.md | 81 ++++++++++++++++++++++++++++++++++++++
 gcc/config/gcn/gcn.cc      | 34 ++++++++++------
 2 files changed, 103 insertions(+), 12 deletions(-)

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index dfa6b1523bd..3899117f271 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1133,6 +1133,23 @@ (define_expand "gather_load<mode><vnsi>"
     DONE;
   })
 
+(define_expand "gather_load<mode><vndi>"
+  [(match_operand:V_MOV 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:<VnDI> 2 "register_operand")
+   (match_operand 3 "immediate_operand")
+   (match_operand:SI 4 "gcn_alu_operand")]
+  ""
+  {
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+                                         operands[2], operands[4],
+                                         INTVAL (operands[3]), NULL);
+
+    emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
+                                             const0_rtx, const0_rtx));
+    DONE;
+  })
+
 ; Allow any address expression
 (define_expand "gather<mode>_expr<exec>"
   [(set (match_operand:V_MOV 0 "register_operand")
@@ -1259,6 +1276,23 @@ (define_expand "scatter_store<mode><vnsi>"
     DONE;
   })
 
+(define_expand "scatter_store<mode><vndi>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:<VnDI> 1 "register_operand")
+   (match_operand 2 "immediate_operand")
+   (match_operand:SI 3 "gcn_alu_operand")
+   (match_operand:V_MOV 4 "register_operand")]
+  ""
+  {
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+                                         operands[1], operands[3],
+                                         INTVAL (operands[2]), NULL);
+
+    emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
+                                              const0_rtx, const0_rtx));
+    DONE;
+  })
+
 ; Allow any address expression
 (define_expand "scatter<mode>_expr<exec_scatter>"
   [(set (mem:BLK (scratch))
@@ -4222,6 +4256,32 @@ (define_expand "mask_gather_load<mode><vnsi>"
     DONE;
   })
 
+(define_expand "mask_gather_load<mode><vndi>"
+  [(set:V_MOV (match_operand:V_MOV 0 "register_operand")
+             (unspec:V_MOV
+               [(match_operand:DI 1 "register_operand")
+                (match_operand:<VnDI> 2 "register_operand")
+                (match_operand 3 "immediate_operand")
+                (match_operand:SI 4 "gcn_alu_operand")
+                (match_operand:DI 5 "")
+                (match_operand:V_MOV 6 "maskload_else_operand")]
+               UNSPEC_GATHER))]
+  ""
+  {
+    rtx exec = force_reg (DImode, operands[5]);
+
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+                                         operands[2], operands[4],
+                                         INTVAL (operands[3]), exec);
+
+    emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+                                                  const0_rtx, const0_rtx,
+                                                  const0_rtx,
+                                                  gcn_gen_undef (<MODE>mode),
+                                                  exec));
+    DONE;
+  })
+
 (define_expand "mask_scatter_store<mode><vnsi>"
   [(match_operand:DI 0 "register_operand")
    (match_operand:<VnSI> 1 "register_operand")
@@ -4250,6 +4310,27 @@ (define_expand "mask_scatter_store<mode><vnsi>"
     DONE;
   })
 
+(define_expand "mask_scatter_store<mode><vndi>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:<VnDI> 1 "register_operand")
+   (match_operand 2 "immediate_operand")
+   (match_operand:SI 3 "gcn_alu_operand")
+   (match_operand:V_MOV 4 "register_operand")
+   (match_operand:DI 5 "")]
+  ""
+  {
+    rtx exec = force_reg (DImode, operands[5]);
+
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+                                         operands[1], operands[3],
+                                         INTVAL (operands[2]), exec);
+
+    emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
+                                                   operands[4], const0_rtx,
+                                                   const0_rtx, exec));
+    DONE;
+  })
+
 (define_code_iterator cond_op [plus minus mult])
 
 (define_expand "cond_<expander><mode>"
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 81a8578cf5d..3b26d5c6a58 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -2307,36 +2307,46 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, 
rtx exec, rtx mem,
 
    Return values.
      ADDR_SPACE_FLAT   - return VnDImode vector of absolute addresses.
-     ADDR_SPACE_GLOBAL - return VnSImode vector of offsets.  */
+     ADDR_SPACE_GLOBAL - return VnSImode vector of offsets.
+     64-bit offsets    - return VnDImode vector of absolute addresses. */
 
 rtx
 gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
                           bool unsigned_p, rtx exec)
 {
   int vf = GET_MODE_NUNITS (GET_MODE (offsets));
-  rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode));
-  rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode));
+  rtx scaled_offsets = gen_reg_rtx (GET_MODE (offsets));
+  rtx abs_addr = gen_reg_rtx (VnMODE (vf, DImode));
+  bool use_di = GET_MODE_INNER (GET_MODE (scaled_offsets)) == DImode;
 
   if (CONST_INT_P (scale)
       && INTVAL (scale) > 0
       && exact_log2 (INTVAL (scale)) >= 0)
-    emit_insn (gen_ashlvNsi3 (tmpsi, offsets,
-                             GEN_INT (exact_log2 (INTVAL (scale))),
-                             NULL, exec));
+    emit_insn (gen_ashlvNm3 (scaled_offsets, offsets,
+                            GEN_INT (exact_log2 (INTVAL (scale))),
+                            NULL, exec));
   else
-     emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec));
+     emit_insn (gen_mulvNm3_dup (scaled_offsets, scale, offsets, NULL, exec));
 
+  /* No instructions support DImode offsets.  */
+  if (use_di)
+    {
+      emit_insn (gen_addvNdi3_dup (abs_addr, base, scaled_offsets, NULL, 
exec));
+      return abs_addr;
+    }
   /* "Global" instructions do not support negative register offsets.  */
-  if (as == ADDR_SPACE_FLAT || !unsigned_p)
+  else if (as == ADDR_SPACE_FLAT || !unsigned_p)
     {
       if (unsigned_p)
-        emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec));
+       emit_insn (gen_addvNdi3_zext_dup2 (abs_addr, scaled_offsets, base,
+                                          NULL, exec));
       else
-        emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec));
-      return tmpdi;
+       emit_insn (gen_addvNdi3_sext_dup2 (abs_addr, scaled_offsets, base,
+                                          NULL, exec));
+      return abs_addr;
     }
   else if (as == ADDR_SPACE_GLOBAL)
-    return tmpsi;
+    return scaled_offsets;
 
   gcc_unreachable ();
 }
-- 
2.50.0

Reply via email to