Hi!

The following gcc.dg/pr101384.c testcase is miscompiled on
powerpc64le-linux.
easy_altivec_constant has code to try construct vector constants with
different element sizes, perhaps different from CONST_VECTOR's mode.  But as
written, that works fine for vspltis[bhw] cases, but not for the vspltisw
x,-1; vsl[bhw] x,x,x case, because that creates always a V16QImode, V8HImode
or V4SImode constant containing broadcasted constant with just the MSB set. 
The vspltis_constant function etc. expects the vspltis[bhw] instructions
where the small [-16..15] or even [-32..30] constant is sign-extended to the
remaining step bytes, but that is not the case for the 0x80...00 constants,
with step > 1 we can't handle e.g.
{ 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff, 0x80, 
0xff, 0xff, 0xff }
vectors but do want to handle e.g.
{ 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80 }
and similarly with copies > 1 we do want to handle e.g.
{ 0x80808080, 0x80808080, 0x80808080, 0x80808080 }.

Bootstrapped/regtested on powerpc64le-linux and powerpc64-linux (the latter
regtested with -m32/-m64), ok for trunk?

Perhaps for backports it would be best to limit the EASY_VECTOR_MSB case
matching to step == 1 && copies == 1, because that is the only case the
splitter handled correctly, but as can be seen in the gcc.target tests, the
patch tries to handle it for all the cases.  Do you want that other patch
or prefer this patch for the backports too?

2021-07-13  Jakub Jelinek  <ja...@redhat.com>

        PR target/101384
        * config/rs6000/rs6000-protos.h (easy_altivec_constant): Change return
        type from bool to int.
        * config/rs6000/rs6000.c (vspltis_constant): Fix up handling the
        EASY_VECTOR_MSB case if either step or copies is not 1.
        (vspltis_shifted): Fix comment typo.
        (easy_altivec_constant): Change return type from bool to int, instead
        of returning true return byte size of the element mode that should be
        used to synthetize the constant.
        * config/rs6000/predicates.md (easy_vector_constant_msb): Require
        that vspltis_shifted is 0, handle the case where easy_altivec_constant
        assumes using different vector mode from CONST_VECTOR's mode.
        * config/rs6000/altivec.md (easy_vector_constant_msb splitter): Use
        easy_altivec_constant to determine mode in which -1 >> -1 should be
        performed, use rs6000_expand_vector_init instead of gen_vec_initv4sisi.

        * gcc.dg/pr101384.c: New test.
        * gcc.target/powerpc/pr101384-1.c: New test.
        * gcc.target/powerpc/pr101384-2.c: New test.

--- gcc/config/rs6000/rs6000-protos.h.jj        2021-07-13 09:07:03.697092286 
+0200
+++ gcc/config/rs6000/rs6000-protos.h   2021-07-13 11:28:54.876243593 +0200
@@ -30,7 +30,7 @@ extern void init_cumulative_args (CUMULA
                                  tree, machine_mode);
 #endif /* TREE_CODE */
 
-extern bool easy_altivec_constant (rtx, machine_mode);
+extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
--- gcc/config/rs6000/rs6000.c.jj       2021-07-13 09:07:03.715092036 +0200
+++ gcc/config/rs6000/rs6000.c  2021-07-13 12:18:08.715706507 +0200
@@ -6134,6 +6134,27 @@ vspltis_constant (rtx op, unsigned step,
   splat_val = val;
   msb_val = val >= 0 ? 0 : -1;
 
+  if (val == 0 && step > 1)
+    {
+      /* Special case for loading most significant bit with step > 1.
+        In that case, match 0s in all but step-1s elements, where match
+        EASY_VECTOR_MSB.  */
+      for (i = 1; i < nunits; ++i)
+       {
+         unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
+         HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
+         if ((i & (step - 1)) == step - 1)
+           {
+             if (!EASY_VECTOR_MSB (elt_val, inner))
+               break;
+           }
+         else if (elt_val)
+           break;
+       }
+      if (i == nunits)
+       return true;
+    }
+
   /* Construct the value to be splatted, if possible.  If not, return 0.  */
   for (i = 2; i <= copies; i *= 2)
     {
@@ -6146,6 +6167,7 @@ vspltis_constant (rtx op, unsigned step,
           | (small_val & mask)))
        return false;
       splat_val = small_val;
+      inner = smallest_int_mode_for_size (bitsize);
     }
 
   /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw].  */
@@ -6160,8 +6182,9 @@ vspltis_constant (rtx op, unsigned step,
     ;
 
   /* Also check if are loading up the most significant bit which can be done by
-     loading up -1 and shifting the value left by -1.  */
-  else if (EASY_VECTOR_MSB (splat_val, inner))
+     loading up -1 and shifting the value left by -1.  Only do this for
+     step 1 here, for larger steps it is done earlier.  */
+  else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
     ;
 
   else
@@ -6271,15 +6294,15 @@ vspltis_shifted (rtx op)
        }
     }
 
-  /* If all elements are equal, we don't need to do VLSDOI.  */
+  /* If all elements are equal, we don't need to do VSLDOI.  */
   return 0;
 }
 
 
-/* Return true if OP is of the given MODE and can be synthesized
-   with a vspltisb, vspltish or vspltisw.  */
+/* Return non-zero (element mode byte size) if OP is of the given MODE
+   and can be synthesized with a vspltisb, vspltish or vspltisw.  */
 
-bool
+int
 easy_altivec_constant (rtx op, machine_mode mode)
 {
   unsigned step, copies;
@@ -6287,39 +6310,39 @@ easy_altivec_constant (rtx op, machine_m
   if (mode == VOIDmode)
     mode = GET_MODE (op);
   else if (mode != GET_MODE (op))
-    return false;
+    return 0;
 
   /* V2DI/V2DF was added with VSX.  Only allow 0 and all 1's as easy
      constants.  */
   if (mode == V2DFmode)
-    return zero_constant (op, mode);
+    return zero_constant (op, mode) ? 8 : 0;
 
   else if (mode == V2DImode)
     {
       if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
          || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
-       return false;
+       return 0;
 
       if (zero_constant (op, mode))
-       return true;
+       return 8;
 
       if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
          && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
-       return true;
+       return 8;
 
-      return false;
+      return 0;
     }
 
   /* V1TImode is a special container for TImode.  Ignore for now.  */
   else if (mode == V1TImode)
-    return false;
+    return 0;
 
   /* Start with a vspltisw.  */
   step = GET_MODE_NUNITS (mode) / 4;
   copies = 1;
 
   if (vspltis_constant (op, step, copies))
-    return true;
+    return 4;
 
   /* Then try with a vspltish.  */
   if (step == 1)
@@ -6328,7 +6351,7 @@ easy_altivec_constant (rtx op, machine_m
     step >>= 1;
 
   if (vspltis_constant (op, step, copies))
-    return true;
+    return 2;
 
   /* And finally a vspltisb.  */
   if (step == 1)
@@ -6337,12 +6360,12 @@ easy_altivec_constant (rtx op, machine_m
     step >>= 1;
 
   if (vspltis_constant (op, step, copies))
-    return true;
+    return 1;
 
   if (vspltis_shifted (op) != 0)
-    return true;
+    return GET_MODE_SIZE (GET_MODE_INNER (mode));
 
-  return false;
+  return 0;
 }
 
 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
--- gcc/config/rs6000/predicates.md.jj  2021-05-31 10:11:15.138979068 +0200
+++ gcc/config/rs6000/predicates.md     2021-07-13 12:45:18.480295174 +0200
@@ -683,15 +683,26 @@ (define_predicate "easy_vector_constant_
 (define_predicate "easy_vector_constant_msb"
   (and (match_code "const_vector")
        (and (match_test "TARGET_ALTIVEC")
-           (match_test "easy_altivec_constant (op, mode)")))
+           (match_test "easy_altivec_constant (op, mode)")
+           (match_test "vspltis_shifted (op) == 0")))
 {
   HOST_WIDE_INT val;
-  int elt;
+  int elt, sz = easy_altivec_constant (op, mode);
+  machine_mode inner = GET_MODE_INNER (mode);
+  int isz = GET_MODE_SIZE (inner);
   if (mode == V2DImode || mode == V2DFmode)
     return 0;
   elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0;
+  if (isz < sz)
+    {
+      if (const_vector_elt_as_int (op, elt) != 0)
+       return 0;
+      elt += (BYTES_BIG_ENDIAN ? -1 : 1) * (sz - isz) / isz;
+    }
+  else if (isz > sz)
+    inner = smallest_int_mode_for_size (sz * BITS_PER_UNIT);
   val = const_vector_elt_as_int (op, elt);
-  return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode));
+  return EASY_VECTOR_MSB (val, inner);
 })
 
 ;; Return true if this is an easy altivec constant that we form
--- gcc/config/rs6000/altivec.md.jj     2021-07-13 09:07:03.697092286 +0200
+++ gcc/config/rs6000/altivec.md        2021-07-13 12:54:25.619804641 +0200
@@ -317,22 +317,26 @@ (define_split
   [(const_int 0)]
 {
   rtx dest = operands[0];
-  machine_mode mode = GET_MODE (operands[0]);
+  machine_mode mode;
   rtvec v;
   int i, num_elements;
 
-  if (mode == V4SFmode)
+  switch (easy_altivec_constant (operands[1], <MODE>mode))
     {
-      mode = V4SImode;
-      dest = gen_lowpart (V4SImode, dest);
+    case 1: mode = V16QImode; break;
+    case 2: mode = V8HImode; break;
+    case 4: mode = V4SImode; break;
+    default: gcc_unreachable ();
     }
+  if (mode != <MODE>mode)
+    dest = gen_lowpart (mode, dest);
 
   num_elements = GET_MODE_NUNITS (mode);
   v = rtvec_alloc (num_elements);
   for (i = 0; i < num_elements; i++)
     RTVEC_ELT (v, i) = constm1_rtx;
 
-  emit_insn (gen_vec_initv4sisi (dest, gen_rtx_PARALLEL (mode, v)));
+  rs6000_expand_vector_init (dest, gen_rtx_PARALLEL (mode, v));
   emit_insn (gen_rtx_SET (dest, gen_rtx_ASHIFT (mode, dest, dest)));
   DONE;
 })
--- gcc/testsuite/gcc.dg/pr101384.c.jj  2021-07-13 13:45:42.971992584 +0200
+++ gcc/testsuite/gcc.dg/pr101384.c     2021-07-13 13:45:32.427135184 +0200
@@ -0,0 +1,39 @@
+/* PR target/101384 */
+/* { dg-do run } */
+/* { dg-options "-O2 -Wno-psabi -w" } */
+
+typedef unsigned char __attribute__((__vector_size__ (16))) U;
+typedef unsigned short __attribute__((__vector_size__ (8 * sizeof (short)))) V;
+
+U u;
+V v;
+
+__attribute__((noipa)) U
+foo (void)
+{
+  U y = (U) { 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff,
+              0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff } + u;
+  return y;
+}
+
+__attribute__((noipa)) V
+bar (void)
+{
+  V y = (V) { 0x8000, 0xffff, 0x8000, 0xffff,
+              0x8000, 0xffff, 0x8000, 0xffff } + v;
+  return y;
+}
+
+int
+main ()
+{
+  U x = foo ();
+  for (unsigned i = 0; i < 16; i++)
+    if (x[i] != ((i & 3) ? 0xff : 0x80))
+      __builtin_abort ();
+  V y = bar ();
+  for (unsigned i = 0; i < 8; i++)
+    if (y[i] != ((i & 1) ? 0xffff : 0x8000))
+      __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.target/powerpc/pr101384-1.c.jj    2021-07-13 
14:02:57.003030314 +0200
+++ gcc/testsuite/gcc.target/powerpc/pr101384-1.c       2021-07-13 
14:02:40.868247714 +0200
@@ -0,0 +1,79 @@
+/* PR target/101384 */
+/* { dg-do compile { target le } } */
+/* { dg-options "-O2 -maltivec" } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-final { scan-assembler-times {\mvspltis[whb] [^\n\r]*,-1\M} 9 } } */
+/* { dg-final { scan-assembler-times {\mvslw\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvslh\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvslb\M} 3 } } */
+
+typedef unsigned char __attribute__((__vector_size__ (16))) U;
+typedef unsigned short __attribute__((__vector_size__ (16))) V;
+typedef unsigned int __attribute__((__vector_size__ (16))) W;
+
+U u;
+V v;
+W w;
+
+U
+f1 (void)
+{
+  U y = (U) { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 0x80, 0x80, 0x80, 0x80, 0x80 } + u;
+  return y;
+}
+
+U
+f2 (void)
+{
+  U y = (U) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 
0, 0x80 } + u;
+  return y;
+}
+
+U
+f3 (void)
+{
+  U y = (U) { 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80 } + u;
+  return y;
+}
+
+V
+f4 (void)
+{
+  V y = (V) { 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 } 
+ v;
+  return y;
+}
+
+V
+f5 (void)
+{
+  V y = (V) { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 } 
+ v;
+  return y;
+}
+
+V
+f6 (void)
+{
+  V y = (V) { 0, 0x8000, 0, 0x8000, 0, 0x8000, 0, 0x8000 } + v;
+  return y;
+}
+
+W
+f7 (void)
+{
+  W y = (W) { 0x80808080, 0x80808080, 0x80808080, 0x80808080 } + w;
+  return y;
+}
+
+W
+f8 (void)
+{
+  W y = (W) { 0x80008000, 0x80008000, 0x80008000, 0x80008000 } + w;
+  return y;
+}
+
+W
+f9 (void)
+{
+  W y = (W) { 0x80000000, 0x80000000, 0x80000000, 0x80000000 } + w;
+  return y;
+}
--- gcc/testsuite/gcc.target/powerpc/pr101384-2.c.jj    2021-07-13 
14:03:04.282932227 +0200
+++ gcc/testsuite/gcc.target/powerpc/pr101384-2.c       2021-07-13 
14:04:03.463134848 +0200
@@ -0,0 +1,79 @@
+/* PR target/101384 */
+/* { dg-do compile { target be } } */
+/* { dg-options "-O2 -maltivec" } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-final { scan-assembler-times {\mvspltis[whb] [^\n\r]*,-1\M} 9 } } */
+/* { dg-final { scan-assembler-times {\mvslw\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvslh\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvslb\M} 3 } } */
+
+typedef unsigned char __attribute__((__vector_size__ (16))) U;
+typedef unsigned short __attribute__((__vector_size__ (16))) V;
+typedef unsigned int __attribute__((__vector_size__ (16))) W;
+
+U u;
+V v;
+W w;
+
+U
+f1 (void)
+{
+  U y = (U) { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 0x80, 0x80, 0x80, 0x80, 0x80 } + u;
+  return y;
+}
+
+U
+f2 (void)
+{
+  U y = (U) { 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 
0x80, 0 } + u;
+  return y;
+}
+
+U
+f3 (void)
+{
+  U y = (U) { 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0 } + u;
+  return y;
+}
+
+V
+f4 (void)
+{
+  V y = (V) { 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 } 
+ v;
+  return y;
+}
+
+V
+f5 (void)
+{
+  V y = (V) { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 } 
+ v;
+  return y;
+}
+
+V
+f6 (void)
+{
+  V y = (V) { 0x8000, 0, 0x8000, 0, 0x8000, 0, 0x8000, 0 } + v;
+  return y;
+}
+
+W
+f7 (void)
+{
+  W y = (W) { 0x80808080, 0x80808080, 0x80808080, 0x80808080 } + w;
+  return y;
+}
+
+W
+f8 (void)
+{
+  W y = (W) { 0x80008000, 0x80008000, 0x80008000, 0x80008000 } + w;
+  return y;
+}
+
+W
+f9 (void)
+{
+  W y = (W) { 0x80000000, 0x80000000, 0x80000000, 0x80000000 } + w;
+  return y;
+}

        Jakub

Reply via email to