Hi Peter,

On 7/1/21 2:48 PM, Peter Bergner via Gcc-patches wrote:
gcc/
        * config/rs6000/rs6000-builtin.def (BU_MMA_PAIR_LD, BU_MMA_PAIR_ST):
        New macros.
        (__builtin_vsx_lxvp, __builtin_vsx_stxvp): New built-ins.
        * config/rs6000/rs6000-call.c (rs6000_gimple_fold_mma_builtin): Expand
        lxvp and stxvp built-ins.
        (mma_init_builtins): Handle lxvp and stxvp built-ins.
        (builtin_function_type): Likewise.
        * doc/extend.texi (__builtin_vsx_lxvp, __builtin_mma_stxvp): Document.

gcc/testsuite/
        * gcc.target/powerpc/mma-builtin-7.c: New test.
        * gcc.target/powerpc/mma-builtin-8.c: New test.


diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index d7ce4de421e..6270444ef70 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -484,6 +484,25 @@
                     | RS6000_BTC_SENARY),                              \
                    CODE_FOR_ ## ICODE)                 /* ICODE */

+#define BU_MMA_PAIR_LD(ENUM, NAME, ATTR)                               \
+  RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM,              /* ENUM */      \
+                   "__builtin_vsx_" NAME,            /* NAME */      \
+                   RS6000_BTM_MMA,                     /* MASK */      \
+                   (RS6000_BTC_ ## ATTR                /* ATTR */      \
+                    | RS6000_BTC_BINARY                                \
+                    | RS6000_BTC_GIMPLE),                              \
+                   CODE_FOR_nothing)                   /* ICODE */
+
+#define BU_MMA_PAIR_ST(ENUM, NAME, ATTR)                               \
+  RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM,              /* ENUM */      \
+                   "__builtin_vsx_" NAME,            /* NAME */      \
+                   RS6000_BTM_MMA,                     /* MASK */      \
+                   (RS6000_BTC_ ## ATTR                /* ATTR */      \
+                    | RS6000_BTC_TERNARY                               \
+                    | RS6000_BTC_VOID                                  \
+                    | RS6000_BTC_GIMPLE),                              \
+                   CODE_FOR_nothing)                   /* ICODE */
+
  /* ISA 2.05 (power6) convenience macros. */
  /* For functions that depend on the CMPB instruction */
  #define BU_P6_2(ENUM, NAME, ATTR, ICODE)                              \
@@ -3253,6 +3272,9 @@ BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, 
"__builtin_cfstring", RS6000_BTM_ALWAYS,
  BU_P10V_VSX_1 (XVCVBF16SPN,    "xvcvbf16spn",       MISC, vsx_xvcvbf16spn)
  BU_P10V_VSX_1 (XVCVSPBF16,        "xvcvspbf16",     MISC, vsx_xvcvspbf16)

+BU_MMA_PAIR_LD (LXVP,      "lxvp",           MISC)
+BU_MMA_PAIR_ST (STXVP,     "stxvp",          PAIR)
+
  BU_MMA_1 (XXMFACC,        "xxmfacc",                QUAD, mma_xxmfacc)
  BU_MMA_1 (XXMTACC,        "xxmtacc",                QUAD, mma_xxmtacc)
  BU_MMA_1 (XXSETACCZ,      "xxsetaccz",      MISC, mma_xxsetaccz)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b67789845a5..6115e3b34d9 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -11913,6 +11913,32 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator 
*gsi)
        gsi_replace_with_seq (gsi, new_seq, true);
        return true;
      }
+  else if (fncode == VSX_BUILTIN_LXVP)
+    {
+      push_gimplify_context (true);
+      tree offset = gimple_call_arg (stmt, 0);
+      tree ptr = gimple_call_arg (stmt, 1);
+      tree lhs = gimple_call_lhs (stmt);
+      tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR,
+                                              TREE_TYPE (ptr), ptr, offset));
+      gimplify_assign (lhs, mem, &new_seq);
+      pop_gimplify_context (NULL);
+      gsi_replace_with_seq (gsi, new_seq, true);
+      return true;
+    }
+  else if (fncode == VSX_BUILTIN_STXVP)
+    {
+      push_gimplify_context (true);
+      tree src = gimple_call_arg (stmt, 0);
+      tree offset = gimple_call_arg (stmt, 1);
+      tree ptr = gimple_call_arg (stmt, 2);
+      tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR,
+                                              TREE_TYPE (ptr), ptr, offset));
+      gimplify_assign (mem, src, &new_seq);
+      pop_gimplify_context (NULL);
+      gsi_replace_with_seq (gsi, new_seq, true);
+      return true;
+    }

    /* Convert this built-in into an internal version that uses pass-by-value
       arguments.  The internal built-in follows immediately after this one.  */
@@ -14264,11 +14290,14 @@ mma_init_builtins (void)
        if (gimple_func)
        {
          gcc_assert (icode == CODE_FOR_nothing);
-         op[nopnds++] = void_type_node;
          /* Some MMA built-ins that are expanded into gimple are converted
             into internal MMA built-ins that are expanded into rtl.
             The internal built-in follows immediately after this built-in.  */
-         icode = d[1].icode;
+         if (d[1].icode != CODE_FOR_nothing)
+           {
+             op[nopnds++] = void_type_node;
+             icode = d[1].icode;
+           }

This hunk bothers me.  The new macros BU_MMA_PAIR_LD and BU_MMA_PAIR_ST define only one builtin, not two.  They are both flagged as RS6000_BTC_GIMPLE.  The use of d[1] in this case is suspect and fragile.  It appears you're relying on the built-in following each of __builtin_vsx_lxvp and __builtin_vsx_stxvp to exist, as otherwise d[1] will be out of bounds.  It's otherwise rather meaningless because you later handle VSX_BUILTIN_LXVP and VSX_BUILTIN_STXVP separately.  Can you please replace this with something less fragile?  Perhaps leave the gimple_func leg alone, but first test for these two builtins and do the right thing for them instead.

Otherwise this LGTM.

Thanks,
Bill

        }
        else
        {
@@ -14291,6 +14320,19 @@ mma_init_builtins (void)
          else
            op[nopnds++] = build_pointer_type (vector_pair_type_node);
        }
+      else if (d->code == VSX_BUILTIN_LXVP)
+       {
+         op[nopnds++] = vector_pair_type_node;
+         op[nopnds++] = sizetype;
+         op[nopnds++] = build_pointer_type (vector_pair_type_node);
+       }
+      else if (d->code == VSX_BUILTIN_STXVP)
+       {
+         op[nopnds++] = void_type_node;
+         op[nopnds++] = vector_pair_type_node;
+         op[nopnds++] = sizetype;
+         op[nopnds++] = build_pointer_type (vector_pair_type_node);
+       }
        else
        {
          /* This is a normal MMA built-in function.  */
@@ -14781,6 +14823,16 @@ builtin_function_type (machine_mode mode_ret, 
machine_mode mode_arg0,
        h.uns_p[2] = 1;
        break;

+    case VSX_BUILTIN_LXVP:
+      h.uns_p[0] = 1;
+      h.uns_p[2] = 1;
+      break;
+
+    case VSX_BUILTIN_STXVP:
+      h.uns_p[1] = 1;
+      h.uns_p[3] = 1;
+      break;
+
      default:
        break;
      }
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 8fc66d626d8..b83cd4919bb 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -20731,6 +20731,9 @@ void __builtin_vsx_disassemble_pair (void *, 
__vector_pair *);

  vec_t __builtin_vsx_xvcvspbf16 (vec_t);
  vec_t __builtin_vsx_xvcvbf16spn (vec_t);
+
+__vector_pair __builtin_vsx_lxvp (size_t, __vector_pair *);
+void __builtin_vsx_stxvp (__vector_pair, size_t, __vector_pair *);
  @end smallexample

  @node PRU Built-in Functions
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-7.c 
b/gcc/testsuite/gcc.target/powerpc/mma-builtin-7.c
new file mode 100644
index 00000000000..c661a4b84bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-7.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+void
+foo (__vector_pair *dst, __vector_pair *src, long idx)
+{
+  dst[0] = __builtin_vsx_lxvp (0, src);
+  dst[2] = __builtin_vsx_lxvp (32, src);
+  dst[4] = __builtin_vsx_lxvp (64, src);
+  /* Non-constant offset should generate a lxvpx.  */
+  dst[6] = __builtin_vsx_lxvp (idx, src);
+  /* Non-aligned offset should generate a plxvp.  */
+  dst[8] = __builtin_vsx_lxvp (257, src);
+}
+
+#if !__has_builtin (__builtin_vsx_lxvp)
+#  error "__has_builtin (__builtin_vsx_lxvp) failed"
+#endif
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mlxvpx\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mplxvp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 5 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-8.c 
b/gcc/testsuite/gcc.target/powerpc/mma-builtin-8.c
new file mode 100644
index 00000000000..af29e479f83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-8.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+void
+foo (__vector_pair *dst, __vector_pair *src, long idx)
+{
+  __vector_pair pair = *src;
+  __builtin_vsx_stxvp (pair, 0, dst);
+  __builtin_vsx_stxvp (pair, 32, dst);
+  __builtin_vsx_stxvp (pair, 64, dst);
+  /* Non-constant offset should generate a stxvpx.  */
+  __builtin_vsx_stxvp (pair, idx, dst);
+  /* Non-aligned offset should generate a pstxvp.  */
+  __builtin_vsx_stxvp (pair, 257, dst);
+}
+
+#if !__has_builtin (__builtin_vsx_stxvp)
+#  error "__has_builtin (__builtin_vsx_stxvp) failed"
+#endif
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mstxvpx\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpstxvp\M} 1 } } */

Reply via email to