Hi,

this patch fixes the check of immediate operands to the builtin vec_permi and
adds a new test for this built-in.

Reg-rested and bootstrapped on s390x.

Is it OK for master? Is it OK for backporting to gcc-11?

Regards,
Marius


--8<----------8<---------8<---------

The builtin vec_permi is peculiar in that its immediate operand is
encoded differently than the immediate operand that is backing the
builtin. This fixes the check for the immediate operand, adding a
regression test in the process.

This partially reverts commit 3191c1f4488d1f7563b563d7ae2a102a26f16d82

gcc/ChangeLog:

2021-05-04  Marius Hillenbrand  <mhil...@linux.ibm.com>

        * config/s390/s390-builtins.def (O_M5, O1_M5, ...): Remove unused 
macros.
        (s390_vec_permi_s64, s390_vec_permi_b64, s390_vec_permi_u64)
        (s390_vec_permi_dbl, s390_vpdi): Use the O3_U2 type for the immediate
        operand.
        * config/s390/s390.c (s390_const_operand_ok): Remove unused
        values.

gcc/testsuite/ChangeLog:

        * gcc.target/s390/zvector/imm-range-error-1.c: Fix test for
        __builtin_s390_vpdi.
        * gcc.target/s390/zvector/vec-permi.c: New test for builtin
        vec_permi.
---
 gcc/config/s390/s390-builtins.def             | 44 ++++++---------
 gcc/config/s390/s390.c                        |  7 ++-
 .../s390/zvector/imm-range-error-1.c          |  2 +-
 .../gcc.target/s390/zvector/vec-permi.c       | 54 +++++++++++++++++++
 4 files changed, 76 insertions(+), 31 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-permi.c

diff --git a/gcc/config/s390/s390-builtins.def 
b/gcc/config/s390/s390-builtins.def
index f77ab750d22..8ca002dc55a 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -29,7 +29,6 @@
 #undef O_U16
 #undef O_U32
 
-#undef O_M5
 #undef O_M12
 
 #undef O_S2
@@ -89,11 +88,6 @@
 #undef O3_U32
 #undef O4_U32
 
-#undef O1_M5
-#undef O2_M5
-#undef O3_M5
-#undef O4_M5
-
 #undef O1_M12
 #undef O2_M12
 #undef O3_M12
@@ -164,20 +158,19 @@
 #define O_U16    8 /* unsigned 16 bit literal */
 #define O_U32    9 /* unsigned 32 bit literal */
 
-#define O_M5    10 /* matches bitmask of 5 */
-#define O_M12   11 /* matches bitmask of 12 */
+#define O_M12   10 /* matches bitmask of 12 */
 
-#define O_S2    12 /* signed  2 bit literal */
-#define O_S3    13 /* signed  3 bit literal */
-#define O_S4    14 /* signed  4 bit literal */
-#define O_S5    15 /* signed  5 bit literal */
-#define O_S8    16 /* signed  8 bit literal */
-#define O_S12   17 /* signed 12 bit literal */
-#define O_S16   18 /* signed 16 bit literal */
-#define O_S32   19 /* signed 32 bit literal */
+#define O_S2    11 /* signed  2 bit literal */
+#define O_S3    12 /* signed  3 bit literal */
+#define O_S4    13 /* signed  4 bit literal */
+#define O_S5    14 /* signed  5 bit literal */
+#define O_S8    15 /* signed  8 bit literal */
+#define O_S12   16 /* signed 12 bit literal */
+#define O_S16   17 /* signed 16 bit literal */
+#define O_S32   18 /* signed 32 bit literal */
 
-#define O_ELEM  20 /* Element selector requiring modulo arithmetic. */
-#define O_LIT   21 /* Operand must be a literal fitting the target type.  */
+#define O_ELEM  19 /* Element selector requiring modulo arithmetic. */
+#define O_LIT   20 /* Operand must be a literal fitting the target type.  */
 
 #define O_SHIFT 5
 
@@ -230,11 +223,6 @@
 #define O3_U32 (O_U32 << (2 * O_SHIFT))
 #define O4_U32 (O_U32 << (3 * O_SHIFT))
 
-#define O1_M5 O_M5
-#define O2_M5 (O_M5 << O_SHIFT)
-#define O3_M5 (O_M5 << (2 * O_SHIFT))
-#define O4_M5 (O_M5 << (3 * O_SHIFT))
-
 #define O1_M12 O_M12
 #define O2_M12 (O_M12 << O_SHIFT)
 #define O3_M12 (O_M12 << (2 * O_SHIFT))
@@ -671,12 +659,12 @@ OB_DEF_VAR (s390_vec_perm_dbl,          s390_vperm,       
  0,
 B_DEF      (s390_vperm,                 vec_permv16qi,      0,                 
 B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
 
 OB_DEF     (s390_vec_permi,             s390_vec_permi_s64, 
s390_vec_permi_dbl, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_INT)
-OB_DEF_VAR (s390_vec_permi_s64,         s390_vpdi,          0,                 
 O3_M5,              BT_OV_V2DI_V2DI_V2DI_INT)
-OB_DEF_VAR (s390_vec_permi_b64,         s390_vpdi,          0,                 
 O3_M5,              BT_OV_BV2DI_BV2DI_BV2DI_INT)
-OB_DEF_VAR (s390_vec_permi_u64,         s390_vpdi,          0,                 
 O3_M5,              BT_OV_UV2DI_UV2DI_UV2DI_INT)
-OB_DEF_VAR (s390_vec_permi_dbl,         s390_vpdi,          0,                 
 O3_M5,              BT_OV_V2DF_V2DF_V2DF_INT)
+OB_DEF_VAR (s390_vec_permi_s64,         s390_vpdi,          0,                 
 O3_U2,              BT_OV_V2DI_V2DI_V2DI_INT)
+OB_DEF_VAR (s390_vec_permi_b64,         s390_vpdi,          0,                 
 O3_U2,              BT_OV_BV2DI_BV2DI_BV2DI_INT)
+OB_DEF_VAR (s390_vec_permi_u64,         s390_vpdi,          0,                 
 O3_U2,              BT_OV_UV2DI_UV2DI_UV2DI_INT)
+OB_DEF_VAR (s390_vec_permi_dbl,         s390_vpdi,          0,                 
 O3_U2,              BT_OV_V2DF_V2DF_V2DF_INT)
 
-B_DEF      (s390_vpdi,                  vec_permiv2di,      0,                 
 B_VX,               O3_M5,              BT_FN_UV2DI_UV2DI_UV2DI_INT)
+B_DEF      (s390_vpdi,                  vec_permiv2di,      0,                 
 B_VX,               O3_U2,              BT_FN_UV2DI_UV2DI_UV2DI_INT)
 
 OB_DEF     (s390_vec_splat,             s390_vec_splat2_s8, 
s390_vec_splat2_dbl,B_VX,               BT_FN_OV4SI_OV4SI_UCHAR)
 OB_DEF_VAR (s390_vec_splat2_s8,         s390_vrepb,         0,                 
 O2_U4,              BT_OV_V16QI_V16QI_UCHAR)
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 88361f98c7e..6bbeb640e1f 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -734,11 +734,14 @@ s390_const_operand_ok (tree arg, int argnum, int 
op_flags, tree decl)
 {
   if (O_UIMM_P (op_flags))
     {
-      unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4,  
4 };
-      unsigned HOST_WIDE_INT bitmasks[]  = { 0, 0, 0, 0, 0, 0,  0,  0,  0, 5, 
12 };
+      unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
+      unsigned HOST_WIDE_INT bitmasks[]  = { 0, 0, 0, 0, 0, 0,  0,  0,  0, 12 
};
       unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1];
       unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1];
 
+      gcc_assert(ARRAY_SIZE(bitwidths) == (O_M12 - O_U1 + 1));
+      gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1));
+
       if (!tree_fits_uhwi_p (arg)
          || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1
          || (bitmask && tree_to_uhwi (arg) & ~bitmask))
diff --git a/gcc/testsuite/gcc.target/s390/zvector/imm-range-error-1.c 
b/gcc/testsuite/gcc.target/s390/zvector/imm-range-error-1.c
index 1fe68f57e4f..021c8d297d2 100644
--- a/gcc/testsuite/gcc.target/s390/zvector/imm-range-error-1.c
+++ b/gcc/testsuite/gcc.target/s390/zvector/imm-range-error-1.c
@@ -22,5 +22,5 @@ main ()
   __builtin_s390_vrepf (s,  4); /* { dg-error "constant argument 2 for builtin 
'__builtin_s390_vrepf' is out of range \\(0..3\\)" } */
   __builtin_s390_vrepg (d,  2); /* { dg-error "constant argument 2 for builtin 
'__builtin_s390_vrepg' is out of range \\(0..1\\)" } */
 
-  __builtin_s390_vpdi (d, d, 2); /* { dg-error "constant argument 3 for 
builtin '__builtin_s390_vpdi' is invalid \\(0, 1, 4, 5\\)" } */
+  __builtin_s390_vpdi (d, d, 4); /* { dg-error "constant argument 3 for 
builtin '__builtin_s390_vpdi' is out of range \\(0..3\\)" } */
 }
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c 
b/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c
new file mode 100644
index 00000000000..c0a852b9703
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z13 -mzarch --save-temps" } */
+/* { dg-do run { target { s390_z13_hw } } } */
+
+/*
+ * The vector intrinsic vec_permi(a, b, c) chooses one of the two eight-byte
+ * vector elements in each of a and b, depending on the value of c. The valid
+ * values for c differ from the encoding for the M4 field in assembly and in 
the
+ * binary instruction.
+ *
+ * selection | c | encoding in assembly
+ * a[0] b[0] | 0 | 0
+ * a[0] b[1] | 1 | 1
+ * a[1] b[0] | 2 | 4
+ * a[1] b[1] | 3 | 5
+ *
+ * (i.e., indices a[i] b[j] are encoded for c as (i<<1) | j, yet for the
+ * M4 field as (i<<2) | j.
+ */
+#include <assert.h>
+#include <vecintrin.h>
+
+typedef unsigned long long uv2di __attribute__((vector_size(16)));
+
+__attribute__ ((noipa)) static uv2di
+do_vec_permi(uv2di a, uv2di b, int c)
+{
+    switch(c) {
+       case 0: return vec_permi(a, b, 0);
+       case 1: return vec_permi(a, b, 1);
+       case 2: return vec_permi(a, b, 2);
+       case 3: return vec_permi(a, b, 3);
+       default: assert(0);
+    }
+}
+
+/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,4\n} 1 } } */
+/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,5\n} 1 } } */
+
+int
+main (void)
+{
+    uv2di a = { 0xa0, 0xa1 };
+    uv2di b = { 0xb0, 0xb1 };
+
+    for (int i = 0; i < 2; i++)
+       for (int j = 0; j < 2; j++) {
+           uv2di res = do_vec_permi(a, b, (i<<1)|j);
+           assert(res[0] == a[i]);
+           assert(res[1] == b[j]);
+       }
+}
-- 
2.26.2

Reply via email to