For reduction operations (e.g. multiply) that don't have such a tree code ,or where the target platform doesn't define an optab handler for the tree code, we can perform the reduction using a series of log(N) shifts (where N = #elements in vector), using the VEC_RSHIFT_EXPR=whole-vector-shift tree code (if the platform handles the vec_shr_optab).

First stage is to add some tests of non-(min/max/plus) reductions; here, multiplies. The first is designed to be non-foldable, so we make sure the architectural instructions line up with what the tree codes specify. The second is designed to be easily constant-propagated, to test the (currently endianness-dependent) constant folding code.

In lib/target-supports.exp, I've defined a new check_effective_target_whole_vector_shift, which I intended to define to true for platforms with the vec_shr optab. However, I've not managed to make this test pass on PowerPC - even with -maltivec, -fdump-tree-vect-details gives me a message about the target not supporting vector multiplication - so I've omitted PowerPC from the whole_vector_shift. This doesn't feel right, suggestions welcomed from PowerPC maintainers?

Tests passing on arm-none-eabi and x86_64-none-linux-gnu;
also verified the scan-tree-dump part works on ia64-none-linux-gnu (by compiling to assembly only). (Tests are not run on AArch64, because we have no vec_shr_optab at this point; PowerPC, as above; or MIPS, as check_effective_target_vect_int_mult yields 0.)

gcc/testsuite/ChangeLog:

        * lib/target-supports.exp (check_effective_target_whole_vector_shift):
        New.

        * gcc.dg/vect/vect-reduc-mul_1.c: New test.
        * gcc.dg/vect/vect-reduc-mul_2.c: New test.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..44f026ff9b561bcf314224c44d51bdd19448851b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c
@@ -0,0 +1,36 @@
+/* { dg-require-effective-target vect_int_mult } */
+/* { dg-require-effective-target whole_vector_shift } */
+
+/* Write a reduction loop to be reduced using vector shifts.  */
+
+extern void abort(void);
+
+unsigned char in[16];
+
+int
+main (unsigned char argc, char **argv)
+{
+  unsigned char i = 0;
+  unsigned char sum = 1;
+
+  for (i = 0; i < 16; i++)
+    in[i] = i + i + 1;
+
+  /* Prevent constant propagation of the entire loop below.  */
+  asm volatile ("" : : : "memory");
+
+  for (i = 0; i < 16; i++)
+    sum *= in[i];
+
+  if (sum != 33)
+    {
+      __builtin_printf("Failed %d\n", sum);
+      abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..414fba7a5c96c4dd89030682492edb57ebba3b16
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c
@@ -0,0 +1,32 @@
+/* { dg-require-effective-target vect_int_mult } */
+/* { dg-require-effective-target whole_vector_shift } */
+
+/* Write a reduction loop to be reduced using vector shifts and folded.  */
+
+extern void abort(void);
+
+int
+main (unsigned char argc, char **argv)
+{
+  unsigned char in[16];
+  unsigned char i = 0;
+  unsigned char sum = 1;
+
+  for (i = 0; i < 16; i++)
+    in[i] = i + i + 1;
+
+  for (i = 0; i < 16; i++)
+    sum *= in[i];
+
+  if (sum != 33)
+    {
+      __builtin_printf("Failed %d\n", sum);
+      abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index fa5137ea472e1773be60759caad32bbc7ab4c551..0f4bebd533c9268adfcd4ed250f06fca825c92b1 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3320,6 +3320,22 @@ proc check_effective_target_vect_shift { } {
     return $et_vect_shift_saved
 }
 
+proc check_effective_target_whole_vector_shift { } {
+    if { [istarget x86_64-*-*]
+	 || [istarget ia64-*-*]
+	 || ([check_effective_target_arm32]
+	     && [check_effective_target_arm_little_endian])
+	 || ([istarget mips*-*-*]
+	     && [check_effective_target_mips_loongson]) } {
+	set answer 1
+    } else {
+	set answer 0
+    }
+
+    verbose "check_effective_target_vect_long: returning $answer" 2
+    return $answer
+}
+
 # Return 1 if the target supports vector bswap operations.
 
 proc check_effective_target_vect_bswap { } {

Reply via email to