Hi,

the very recent addition of the if_to_switch pass has partially disabled the 
optimization I added back in June:

2020-06-26  Eric Botcazou  <ebotca...@adacore.com>

        * tree-ssa-reassoc.c (dump_range_entry): New function.
        (debug_range_entry): New debug function.
        (update_range_test): Invoke dump_range_entry for dumping.
        (optimize_range_tests_to_bit_test): Merge the entry test in the
        bit test when possible and lower the profitability threshold.

as witnessed by the 3 new failures in the gnat.dg testsuite.  It turns out 
that both tree-ssa-reassoc.c and tree-switch-conversion.c can turn things into 
bit tests so the optimization must also be added to bit_test_cluster::emit.

The patch also contains a secondary optimization, whereby the full bit-test 
sequence is sent to the folder before being gimplified in case there is only 
one test, so that the most optimized sequence (bt + jc on x86) can be emitted 
like with optimize_range_tests_to_bit_test.

Bootstrapped/regtested on x86-64/Linux, OK for the mainline?


2020-12-07  Eric Botcazou  <ebotca...@adacore.com>

        PR tree-optimization/96344
        * tree-switch-conversion.c (bit_test_cluster::emit): Compute the
        range only if an entry test is necessary.  Merge the entry test in
        the bit test when possible.  Use PREC local variable consistently.
        When there is only one test, do a single gimplification at the end.

-- 
Eric Botcazou
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index a87a2a3cd15..989bd7710d1 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1511,7 +1511,6 @@ bit_test_cluster::emit (tree index_expr, tree index_type,
 
   tree minval = get_low ();
   tree maxval = get_high ();
-  tree range = int_const_binop (MINUS_EXPR, maxval, minval);
   unsigned HOST_WIDE_INT bt_range = get_range (minval, maxval);
 
   /* Go through all case labels, and collect the case labels, profile
@@ -1550,11 +1549,38 @@ bit_test_cluster::emit (tree index_expr, tree index_type,
 
   qsort (test, count, sizeof (*test), case_bit_test::cmp);
 
+  /* If every possible relative value of the index expression is a valid shift
+     amount, then we can merge the entry test in the bit test.  */
+  wide_int min, max;
+  bool entry_test_needed;
+  if (TREE_CODE (index_expr) == SSA_NAME
+      && get_range_info (index_expr, &min, &max) == VR_RANGE
+      && wi::leu_p (max - min, prec - 1))
+    {
+      wide_int iminval = wi::to_wide (minval);
+      tree minval_type = TREE_TYPE (minval);
+      if (wi::lt_p (min, iminval, TYPE_SIGN (minval_type)))
+	{
+	  minval = wide_int_to_tree (minval_type, min);
+	  for (i = 0; i < count; i++)
+	    test[i].mask = wi::lshift (test[i].mask, iminval - min);
+	}
+      else if (wi::gt_p (min, iminval, TYPE_SIGN (minval_type)))
+	{
+	  minval = wide_int_to_tree (minval_type, min);
+	  for (i = 0; i < count; i++)
+	    test[i].mask = wi::lrshift (test[i].mask, min - iminval);
+	}
+      maxval = wide_int_to_tree (minval_type, max);
+      entry_test_needed = false;
+    }
+  else
+    entry_test_needed = true;
+
   /* If all values are in the 0 .. BITS_PER_WORD-1 range, we can get rid of
      the minval subtractions, but it might make the mask constants more
      expensive.  So, compare the costs.  */
-  if (compare_tree_int (minval, 0) > 0
-      && compare_tree_int (maxval, GET_MODE_BITSIZE (word_mode)) < 0)
+  if (compare_tree_int (minval, 0) > 0 && compare_tree_int (maxval, prec) < 0)
     {
       int cost_diff;
       HOST_WIDE_INT m = tree_to_uhwi (minval);
@@ -1577,7 +1603,6 @@ bit_test_cluster::emit (tree index_expr, tree index_type,
 	  for (i = 0; i < count; i++)
 	    test[i].mask = wi::lshift (test[i].mask, m);
 	  minval = build_zero_cst (TREE_TYPE (minval));
-	  range = maxval;
 	}
     }
 
@@ -1593,8 +1618,9 @@ bit_test_cluster::emit (tree index_expr, tree index_type,
 				  /*simple=*/true, NULL_TREE,
 				  /*before=*/true, GSI_SAME_STMT);
 
-  if (m_handles_entire_switch)
+  if (m_handles_entire_switch && entry_test_needed)
     {
+      tree range = int_const_binop (MINUS_EXPR, maxval, minval);
       /* if (idx > range) goto default */
       range
 	= force_gimple_operand_gsi (&gsi,
@@ -1608,16 +1634,22 @@ bit_test_cluster::emit (tree index_expr, tree index_type,
       gsi = gsi_last_bb (new_bb);
     }
 
-  /* csui = (1 << (word_mode) idx) */
-  csui = make_ssa_name (word_type_node);
   tmp = fold_build2 (LSHIFT_EXPR, word_type_node, word_mode_one,
 		     fold_convert (word_type_node, idx));
-  tmp = force_gimple_operand_gsi (&gsi, tmp,
-				  /*simple=*/false, NULL_TREE,
-				  /*before=*/true, GSI_SAME_STMT);
-  shift_stmt = gimple_build_assign (csui, tmp);
-  gsi_insert_before (&gsi, shift_stmt, GSI_SAME_STMT);
-  update_stmt (shift_stmt);
+
+  /* csui = (1 << (word_mode) idx) */
+  if (count > 1)
+    {
+      csui = make_ssa_name (word_type_node);
+      tmp = force_gimple_operand_gsi (&gsi, tmp,
+				     /*simple=*/false, NULL_TREE,
+				     /*before=*/true, GSI_SAME_STMT);
+      shift_stmt = gimple_build_assign (csui, tmp);
+      gsi_insert_before (&gsi, shift_stmt, GSI_SAME_STMT);
+      update_stmt (shift_stmt);
+    }
+  else
+    csui = tmp;
 
   profile_probability prob = profile_probability::always ();
 
@@ -1630,10 +1662,10 @@ bit_test_cluster::emit (tree index_expr, tree index_type,
       bt_range -= test[k].bits;
       tmp = wide_int_to_tree (word_type_node, test[k].mask);
       tmp = fold_build2 (BIT_AND_EXPR, word_type_node, csui, tmp);
+      tmp = fold_build2 (NE_EXPR, boolean_type_node, tmp, word_mode_zero);
       tmp = force_gimple_operand_gsi (&gsi, tmp,
 				      /*simple=*/true, NULL_TREE,
 				      /*before=*/true, GSI_SAME_STMT);
-      tmp = fold_build2 (NE_EXPR, boolean_type_node, tmp, word_mode_zero);
       basic_block new_bb
 	= hoist_edge_and_branch_if_true (&gsi, tmp, test[k].target_bb, prob);
       gsi = gsi_last_bb (new_bb);

Reply via email to