On January 4, 2019 11:49:45 PM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >The following patch adds an optimization, if we know certain SSA_NAME >has two possible values and have GIMPLE_COND EQ_EXPR/NE_EXPR of that >SSA_NAME with one of the two values and PHI which has two adjacent >constants, we can optimize it into addition or subtraction. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > >The pr15826.c change is just a small change on what is present in >GIMPLE >with this patch, previously (from the bugzilla apparently only on some >targets) we had a (_Bool) cast and now we have & 1, which effectively >results in exactly the same expansion. > >2019-01-04 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/88676 > * tree-ssa-phiopt.c (two_value_replacement): New function. > (tree_ssa_phiopt_worker): Call it. > > * gcc.dg/tree-ssa/pr88676.c: New test. > * gcc.dg/pr88676.c: New test. > * gcc.dg/tree-ssa/pr15826.c: Just verify there is no goto, > allow &. > >--- gcc/tree-ssa-phiopt.c.jj 2019-01-01 12:37:17.716965779 +0100 >+++ gcc/tree-ssa-phiopt.c 2019-01-04 13:55:26.293746657 +0100 >@@ -48,6 +48,8 @@ along with GCC; see the file COPYING3. > #include "case-cfn-macros.h" > > static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); >+static bool two_value_replacement (basic_block, basic_block, edge, >gphi *, >+ tree, tree); > static bool conditional_replacement (basic_block, basic_block, > edge, edge, gphi *, tree, tree); >static gphi *factor_out_conditional_conversion (edge, edge, gphi *, >tree, tree, >@@ -332,8 +334,11 @@ tree_ssa_phiopt_worker (bool do_store_el > } > > /* Do the replacement of conditional if it can be done. */ >- if (!early_p >- && conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) >+ if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) >+ cfgchanged = true; >+ else if (!early_p >+ && conditional_replacement (bb, bb1, e1, e2, phi, >+ arg0, arg1)) > cfgchanged = true; > else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) > cfgchanged = true; >@@ -572,6 +577,118 @@ factor_out_conditional_conversion (edge > return newphi; > } > >+/* Optimize >+ # x_5 in range [cst1, cst2] where cst2 = cst1 + 1 >+ if (x_5 op cstN) # where op is == or != and N is 1 or 2 >+ goto bb3; >+ else >+ goto bb4; >+ bb3: >+ bb4: >+ # r_6 = PHI<cst3(2), cst4(3)> # where cst3 == cst4 + 1 or cst4 == >cst3 + 1 >+ >+ to r_6 = x_5 + (min (cst3, cst4) - cst1) or >+ r_6 = (min (cst3, cst4) + cst1) - x_5 depending on op, N and which >+ of cst3 and cst4 is smaller. */ >+ >+static bool >+two_value_replacement (basic_block cond_bb, basic_block middle_bb, >+ edge e1, gphi *phi, tree arg0, tree arg1) >+{ >+ /* Only look for adjacent integer constants. */ >+ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) >+ || !INTEGRAL_TYPE_P (TREE_TYPE (arg1)) >+ || TREE_CODE (arg0) != INTEGER_CST >+ || TREE_CODE (arg1) != INTEGER_CST >+ || (tree_int_cst_lt (arg0, arg1) >+ ? wi::to_widest (arg0) + 1 != wi::to_widest (arg1) >+ : wi::to_widest (arg1) + 1 != wi::to_widest (arg1))) >+ return false; >+ >+ if (!empty_block_p (middle_bb)) >+ return false; >+ >+ gimple *stmt = last_stmt (cond_bb); >+ tree lhs = gimple_cond_lhs (stmt); >+ tree rhs = gimple_cond_rhs (stmt); >+ >+ if (TREE_CODE (lhs) != SSA_NAME >+ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) >+ || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE >+ || TREE_CODE (rhs) != INTEGER_CST) >+ return false; >+ >+ switch (gimple_cond_code (stmt)) >+ { >+ case EQ_EXPR: >+ case NE_EXPR: >+ break; >+ default: >+ return false; >+ } >+ >+ wide_int min, max; >+ if (get_range_info (lhs, &min, &max) != VR_RANGE >+ || min + 1 != max >+ || (wi::to_wide (rhs) != min >+ && wi::to_wide (rhs) != max)) >+ return false; >+ >+ /* We need to know which is the true edge and which is the false >+ edge so that we know when to invert the condition below. */ >+ edge true_edge, false_edge; >+ extract_true_false_edges_from_block (cond_bb, &true_edge, >&false_edge); >+ if ((gimple_cond_code (stmt) == EQ_EXPR) >+ ^ (wi::to_wide (rhs) == max) >+ ^ (e1 == false_edge)) >+ std::swap (arg0, arg1); >+ >+ tree type; >+ if (TYPE_PRECISION (TREE_TYPE (lhs)) == TYPE_PRECISION (TREE_TYPE >(arg0))) >+ { >+ if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE >+ || !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0))) >+ type = TREE_TYPE (lhs); >+ else >+ type = TREE_TYPE (arg0);
This misses some comments. Presumably you want to avoid arithmetic with bool types (or generally non-mode precision types!?). >+ } >+ else if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION >(TREE_TYPE (arg0))) >+ type = TREE_TYPE (lhs); >+ else >+ type = TREE_TYPE (arg0); >+ if (!TYPE_OVERFLOW_WRAPS (type)) >+ type = unsigned_type_for (type); Would doing this unconditionally simplify things? I think "optimizing" the - fwrapv case isn't worth it. >+ >+ tree m = fold_convert (type, wide_int_to_tree (TREE_TYPE (lhs), >min)); >+ enum tree_code code; >+ if (tree_int_cst_lt (arg0, arg1)) >+ { >+ code = PLUS_EXPR; >+ m = int_const_binop (MINUS_EXPR, fold_convert (type, arg0), m); >+ } >+ else >+ { >+ code = MINUS_EXPR; >+ m = int_const_binop (PLUS_EXPR, fold_convert (type, arg0), m); >+ } Delay wide-int to tree conversion until here. >+ >+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt); >+ if (!useless_type_conversion_p (type, TREE_TYPE (lhs))) >+ lhs = gimplify_build1 (&gsi, NOP_EXPR, type, lhs); >+ tree new_rhs; >+ if (code == PLUS_EXPR) >+ new_rhs = gimplify_build2 (&gsi, PLUS_EXPR, type, lhs, m); >+ else >+ new_rhs = gimplify_build2 (&gsi, MINUS_EXPR, type, m, lhs); >+ if (!useless_type_conversion_p (TREE_TYPE (arg0), type)) >+ new_rhs = gimplify_build1 (&gsi, NOP_EXPR, TREE_TYPE (arg0), >new_rhs); >+ >+ replace_phi_edge_with_variable (cond_bb, e1, phi, new_rhs); >+ >+ /* Note that we optimized this PHI. */ >+ return true; >+} >+ >/* The function conditional_replacement does the main work of doing >the > conditional replacement. Return true if the replacement is done. > Otherwise return false. >--- gcc/testsuite/gcc.dg/tree-ssa/pr88676.c.jj 2019-01-04 >14:20:59.659542587 +0100 >+++ gcc/testsuite/gcc.dg/tree-ssa/pr88676.c 2019-01-04 >14:25:38.122965795 +0100 >@@ -0,0 +1,133 @@ >+/* PR tree-optimization/88676 */ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-optimized" } */ >+/* { dg-final { scan-tree-dump-not " = PHI <" "optimized" } } */ >+ >+void bar (int, int, int); >+ >+__attribute__((noipa)) int >+f1 (unsigned x) >+{ >+ int r; >+ if (x >= 2) >+ __builtin_unreachable (); >+ switch (x) >+ { >+ case 0: >+ r = 1; >+ break; >+ case 1: >+ r = 2; >+ break; >+ default: >+ r = 0; >+ break; >+ } >+ return r; >+} >+ >+__attribute__((noipa)) void >+f2 (int x) >+{ >+ int y; >+ x = (x & 1) + 98; >+ if (x != 98) >+ y = 115; >+ else >+ y = 116; >+ bar (x, y, 116); >+} >+ >+__attribute__((noipa)) void >+f3 (int x) >+{ >+ int y; >+ x = (x & 1) + 98; >+ if (x == 98) >+ y = 115; >+ else >+ y = 116; >+ bar (x, y, 115); >+} >+ >+__attribute__((noipa)) void >+f4 (int x) >+{ >+ int y; >+ x = (x & 1) + 98; >+ if (x != 99) >+ y = 115; >+ else >+ y = 116; >+ bar (x, y, 115); >+} >+ >+__attribute__((noipa)) void >+f5 (int x) >+{ >+ int y; >+ x = (x & 1) + 98; >+ if (x == 99) >+ y = 115; >+ else >+ y = 116; >+ bar (x, y, 116); >+} >+ >+__attribute__((noipa)) void >+f6 (int x) >+{ >+ int y; >+ x = (x & 1) + 98; >+ if (x != 98) >+ y = 116; >+ else >+ y = 115; >+ bar (x, y, 115); >+} >+ >+__attribute__((noipa)) void >+f7 (int x) >+{ >+ int y; >+ x = (x & 1) + 98; >+ if (x == 98) >+ y = 116; >+ else >+ y = 115; >+ bar (x, y, 116); >+} >+ >+__attribute__((noipa)) void >+f8 (int x) >+{ >+ int y; >+ x = (x & 1) + 98; >+ if (x != 99) >+ y = 116; >+ else >+ y = 115; >+ bar (x, y, 116); >+} >+ >+__attribute__((noipa)) void >+f9 (int x) >+{ >+ int y; >+ x = (x & 1) + 98; >+ if (x == 99) >+ y = 116; >+ else >+ y = 115; >+ bar (x, y, 115); >+} >+ >+__attribute__((noipa)) int >+f10 (int x) >+{ >+ x = (x & 1) + 36; >+ if (x == 36) >+ return 85; >+ else >+ return 84; >+} >--- gcc/testsuite/gcc.dg/pr88676.c.jj 2019-01-04 14:29:55.821731065 >+0100 >+++ gcc/testsuite/gcc.dg/pr88676.c 2019-01-04 14:29:39.048006694 +0100 >@@ -0,0 +1,48 @@ >+/* PR tree-optimization/88676 */ >+/* { dg-do run } */ >+/* { dg-options "-O2" } */ >+ >+#include "tree-ssa/pr88676.c" >+ >+__attribute__((noipa)) void >+bar (int x, int y, int z) >+{ >+ if (z != 115 && z != 116) >+ __builtin_abort (); >+ if (x == 98) >+ { >+ if (y != z) >+ __builtin_abort (); >+ } >+ else if (x != 99) >+ __builtin_abort (); >+ else if (z == 115) >+ { >+ if (y != 116) >+ __builtin_abort (); >+ } >+ else if (y != 115) >+ __builtin_abort (); >+} >+ >+int >+main () >+{ >+ if (f1 (0) != 1 || f1 (1) != 2) >+ __builtin_abort (); >+ int i; >+ for (i = -12; i < 12; i++) >+ { >+ f2 (i); >+ f3 (i); >+ f4 (i); >+ f5 (i); >+ f6 (i); >+ f7 (i); >+ f8 (i); >+ f9 (i); >+ if (f10 (i) != ((i & 1) ? 84 : 85)) >+ __builtin_abort (); >+ } >+ return 0; >+} >--- gcc/testsuite/gcc.dg/tree-ssa/pr15826.c.jj 2016-02-27 >07:41:54.377920386 +0100 >+++ gcc/testsuite/gcc.dg/tree-ssa/pr15826.c 2019-01-04 >23:41:36.300566686 +0100 >@@ -33,4 +33,4 @@ andrew (struct s *p) > return i; > } > >-/* { dg-final { scan-tree-dump-times " & | goto " 0 "optimized" } } */ >+/* { dg-final { scan-tree-dump-not "goto " "optimized" } } */ > > Jakub