Hi!

The following patch canonicalizes fancy ways of writing cond ? A : B
into COND_EXPR, which is what we expect people writing and thus are able to
optimize it better.  If in some case we wouldn't optimize it better,
the right way would be improve the COND_EXPR handling, as that is what
people are using in the wild most of the time.
E.g. on the testcase in the patch on x86_64-linux with -O2, the difference
is that test used to be 519 bytes long and now is 223, with -O2
-march=skylake used to be the same 519 bytes long and now is 275 bytes (in
that case it uses the SSE4.1+ min/max).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-12-06  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/92834
        * match.pd (A - ((A - B) & -(C cmp D)) -> (C cmp D) ? B : A,
        A + ((B - A) & -(C cmp D)) -> (C cmp D) ? B : A): New simplifications.

        * gcc.dg/tree-ssa/pr92834.c: New test.

--- gcc/match.pd.jj     2019-12-06 14:07:26.877749065 +0100
+++ gcc/match.pd        2019-12-06 15:06:08.042953309 +0100
@@ -2697,6 +2697,31 @@ (define_operator_list COND_TERNARY
   (cmp (minmax @0 INTEGER_CST@1) INTEGER_CST@2)
   (comb (cmp @0 @2) (cmp @1 @2))))
 
+/* Undo fancy way of writing max/min or other ?: expressions,
+   like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
+   People normally use ?: and that is what we actually try to optimize.  */
+(for cmp (simple_comparison)
+ (simplify
+  (minus @0 (bit_and:c (minus @0 @1)
+                      (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+  (if (INTEGRAL_TYPE_P (type)
+       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+          || !TYPE_UNSIGNED (TREE_TYPE (@4))))
+   (cond (cmp @2 @3) @1 @0)))
+ (simplify
+  (plus:c @0 (bit_and:c (minus @1 @0)
+                       (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+  (if (INTEGRAL_TYPE_P (type)
+       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+          || !TYPE_UNSIGNED (TREE_TYPE (@4))))
+   (cond (cmp @2 @3) @1 @0))))
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
--- gcc/testsuite/gcc.dg/tree-ssa/pr92834.c.jj  2019-12-06 15:24:56.817353747 
+0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr92834.c     2019-12-06 15:24:08.921100518 
+0100
@@ -0,0 +1,122 @@
+/* PR tree-optimization/92834 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "MIN_EXPR <" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "MAX_EXPR <" 8 "optimized" } } */
+
+static inline unsigned
+umax1 (unsigned a, unsigned b)
+{
+  return a - ((a - b) & -(a < b));
+}
+
+static inline unsigned
+umin1 (unsigned a, unsigned b)
+{
+  return a - ((a - b) & -(a > b));
+}
+
+static inline int
+smax1 (int a, int b)
+{
+  return a - ((a - b) & -(a < b));
+}
+
+static inline int
+smin1 (int a, int b)
+{
+  return a - ((a - b) & -(a > b));
+}
+
+static inline unsigned long long
+umax2 (unsigned long long a, unsigned long long b)
+{
+  return a - ((a - b) & -(a <= b));
+}
+
+static inline unsigned long long
+umin2 (unsigned long long a, unsigned long long b)
+{
+  return a - ((a - b) & -(a >= b));
+}
+
+static inline long long
+smax2 (long long a, long long b)
+{
+  return a - ((a - b) & -(a <= b));
+}
+
+static inline long long
+smin2 (long long a, long long b)
+{
+  return a - ((a - b) & -(a >= b));
+}
+
+static inline unsigned
+umax3 (unsigned a, unsigned b)
+{
+  return a + ((b - a) & -(a < b));
+}
+
+static inline unsigned
+umin3 (unsigned a, unsigned b)
+{
+  return a + ((b - a) & -(a > b));
+}
+
+static inline int
+smax3 (int a, int b)
+{
+  return a + ((b - a) & -(a < b));
+}
+
+static inline int
+smin3 (int a, int b)
+{
+  return a + ((b - a) & -(a > b));
+}
+
+static inline unsigned long long
+umax4 (unsigned long long a, unsigned long long b)
+{
+  return a + ((b - a) & -(a <= b));
+}
+
+static inline unsigned long long
+umin4 (unsigned long long a, unsigned long long b)
+{
+  return a + ((b - a) & -(a >= b));
+}
+
+static inline long long
+smax4 (long long a, long long b)
+{
+  return a + ((b - a) & -(a <= b));
+}
+
+static inline long long
+smin4 (long long a, long long b)
+{
+  return a + ((b - a) & -(a >= b));
+}
+
+void
+test (unsigned *x, int *y, unsigned long long *z, long long *w)
+{
+  x[2] = umax1 (x[0], x[1]);
+  x[5] = umin1 (x[2], x[3]);
+  y[2] = smax1 (y[0], y[1]);
+  y[5] = smin1 (y[2], y[3]);
+  z[2] = umax2 (z[0], z[1]);
+  z[5] = umin2 (z[2], z[3]);
+  w[2] = smax2 (w[0], w[1]);
+  w[5] = smin2 (w[2], w[3]);
+  x[8] = umax3 (x[6], x[7]);
+  x[11] = umin3 (x[9], x[10]);
+  y[8] = smax3 (y[6], y[7]);
+  y[11] = smin3 (y[9], y[10]);
+  z[8] = umax4 (z[6], z[7]);
+  z[11] = umin4 (z[9], z[10]);
+  w[8] = smax4 (w[6], w[7]);
+  w[11] = smin4 (w[9], w[10]);
+}

        Jakub

Reply via email to