This patch addresses a code quality regression in GCC 12 by implementing
some constant folding/simplification transformations for REDUC_PLUS_EXPR
in match.pd.  The motivating example is gcc.dg/vect/pr89440.c which with
-O2 -ffast-math (with vectorization now enabled) gets optimized to:

float f (float x)
{
  vector(4) float vect_x_14.11;
  vector(4) float _2;
  float _32;

  _2 = {x_9(D), 0.0, 0.0, 0.0};
  vect_x_14.11_29 = _2 + { 1.0e+1, 2.6e+1, 4.2e+1, 5.8e+1 };
  _32 = .REDUC_PLUS (vect_x_14.11_29); [tail call]
  return _32;
}

With these proposed new transformations, we can simplify the
above code even further.

float f (float x)
{
  float _32;
  _32 = x_9(D) + 1.36e+2;
  return _32;
}

[which happens to match what we'd produce with -fno-tree-vectorize,
and with GCC 11].

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check with no new failures.  Ok for mainline?


2022-02-21  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * fold-const.cc (ctor_single_nonzero_element): New function to
        return the single non-zero element of a (vector) constructor.
        * fold-const.h (ctor_single_nonzero_element): Prototype here.
        * match.pd (reduc (constructor@0)): Simplify reductions of a
        constructor containing a single non-zero element.
        (reduc (@0 op VECTOR_CST) ->  (reduc @0) op CONST): Simplify
        reductions of vector operations of the same operator with
        constant vector operands.

gcc/testsuite/ChangeLog
        * gcc.dg/fold-reduc-1.c: New test case.


Thanks in advance,
Roger
--

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 386d573..4283308 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -16792,6 +16792,33 @@ address_compare (tree_code code, tree type, tree op0, 
tree op1,
   return equal;
 }
 
+/* Return the single non-zero element of a CONSTRUCTOR or NULL_TREE.  */
+tree
+ctor_single_nonzero_element (const_tree t)
+{
+  unsigned HOST_WIDE_INT idx;
+  constructor_elt *ce;
+  tree elt = NULL_TREE;
+
+  if (TREE_CODE (t) == SSA_NAME)
+    {
+      gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (t));
+      if (gimple_assign_rhs_code (def) == CONSTRUCTOR)
+        t = gimple_assign_rhs1 (def);
+    }
+
+  if (TREE_CODE (t) != CONSTRUCTOR)
+    return NULL_TREE;
+  for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++)
+    if (!integer_zerop (ce->value) && !real_zerop (ce->value))
+      {
+       if (elt)
+         return NULL_TREE;
+       elt = ce->value;
+      }
+  return elt;
+}
+
 #if CHECKING_P
 
 namespace selftest {
diff --git a/gcc/fold-const.h b/gcc/fold-const.h
index f217598..b2f0a2f 100644
--- a/gcc/fold-const.h
+++ b/gcc/fold-const.h
@@ -224,6 +224,7 @@ extern const char *c_getstr (tree);
 extern wide_int tree_nonzero_bits (const_tree);
 extern int address_compare (tree_code, tree, tree, tree, tree &, tree &,
                            poly_int64 &, poly_int64 &, bool);
+extern tree ctor_single_nonzero_element (const_tree);
 
 /* Return OFF converted to a pointer offset type suitable as offset for
    POINTER_PLUS_EXPR.  Use location LOC for this conversion.  */
diff --git a/gcc/match.pd b/gcc/match.pd
index d9d8359..047fb50 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7528,6 +7528,20 @@ and,
        (BIT_FIELD_REF:elt_type @0 { size; } { pos; })
        { elt; })))))))
 
+/* Fold reduction of a single nonzero element constructor.  */
+(for reduc (IFN_REDUC_PLUS IFN_REDUC_IOR IFN_REDUC_XOR)
+  (simplify (reduc (CONSTRUCTOR@0))
+    (with { tree elt = ctor_single_nonzero_element (@0); }
+      (if (elt)
+        (non_lvalue { elt; })))))
+
+/* Fold REDUC (@0 op VECTOR_CST) as REDUC (@0) op REDUC (VECTOR_CST).  */
+(for reduc (IFN_REDUC_PLUS IFN_REDUC_MAX IFN_REDUC_MIN IFN_REDUC_FMAX
+            IFN_REDUC_FMIN IFN_REDUC_AND IFN_REDUC_IOR IFN_REDUC_XOR)
+     op (plus max min IFN_FMAX IFN_FMIN bit_and bit_ior bit_xor)
+  (simplify (reduc (op @0 VECTOR_CST@1))
+    (op (reduc:type @0) (reduc:type @1))))
+
 (simplify
  (vec_perm @0 @1 VECTOR_CST@2)
  (with
diff --git a/gcc/testsuite/gcc.dg/fold-reduc-1.c 
b/gcc/testsuite/gcc.dg/fold-reduc-1.c
new file mode 100644
index 0000000..c8360b0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-reduc-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
+float foo (float x)
+{
+ int i;
+ float j;
+ float a = 0;
+ for (i = 0; i < 4; ++i)
+   {
+     for (j = 0; j < 4; ++j)
+       {
+         a += 1;
+         x += a;
+       }
+   }
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-not "REDUC_PLUS" "optimized"} } */

Reply via email to