Hi,

bootstrapped and regtested on x86_64-linux.  Ok to push?

Cheers,
Filip Kastl


-- 8< --


The gen_pow2p function generates (a & -a) == a as a fallback for
POPCOUNT (a) == 1.  Not only is the bitmagic not equivalent to
POPCOUNT (a) == 1 but it also introduces UB (consider signed
a = INT_MIN).

This patch rewrites gen_pow2p to always use __builtin_popcount instead.
This means that what the end result GIMPLE code is gets decided by an
already existing machinery in a later pass.  That is a cleaner solution
I think.  This existing machinery also uses a ^ (a - 1) > a - 1 which is
the correct bitmagic.

While rewriting gen_pow2p I had to add logic for converting the
operand's type to a type that __builtin_popcount accepts.  I naturally
also added this logic to gen_log2.  Thanks to this, exponential index
transform gains the capability to handle all operand types with
precision at most that of long long int.

            PR tree-optimization/116355

gcc/ChangeLog:

        * tree-switch-conversion.cc (can_log2): Take into account the
        conversion added to gen_log2.
        (gen_log2): Add a conversion to a type compatible with FFS.
        (can_pow2p): New function.
        (gen_pow2p): Rewrite to use __builtin_popcount instead of
        manually inserting an internal fn call or bitmagic.
        (switch_conversion::is_exp_index_transform_viable): Call
        can_pow2p.
        (switch_conversion::exp_index_transform): Params of gen_pow2p
        changed so update its call.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/switch-exp-transform-1.c: Don't test for
        presence of POPCOUNT internal fn after switch conversion.  Test
        for it after __builtin_popcount has had a chance to get
        expanded.
        * gcc.target/i386/switch-exp-transform-3.c: Also test char and
        short.

Signed-off-by: Filip Kastl <fka...@suse.cz>
---
 .../gcc.target/i386/switch-exp-transform-1.c  |   7 +-
 .../gcc.target/i386/switch-exp-transform-3.c  |  98 ++++++++++++++-
 gcc/tree-switch-conversion.cc                 | 117 ++++++++++++++----
 3 files changed, 192 insertions(+), 30 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c 
b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
index 53d31460ba3..a8c9e03e515 100644
--- a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
+++ b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
@@ -1,9 +1,10 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-switchconv -mpopcnt -mbmi" } */
+/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-widening_mul -mpopcnt 
-mbmi" } */
 
 /* Checks that exponential index transform enables switch conversion to convert
    this switch into an array lookup.  Also checks that the "index variable is a
-   power of two" check has been generated.  */
+   power of two" check has been generated and that it has been later expanded
+   into an internal function.  */
 
 int foo(unsigned bar)
 {
@@ -29,4 +30,4 @@ int foo(unsigned bar)
 }
 
 /* { dg-final { scan-tree-dump "CSWTCH" "switchconv" } } */
-/* { dg-final { scan-tree-dump "POPCOUNT" "switchconv" } } */
+/* { dg-final { scan-tree-dump "POPCOUNT" "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-3.c 
b/gcc/testsuite/gcc.target/i386/switch-exp-transform-3.c
index 64a7b146172..5011d1ebb0e 100644
--- a/gcc/testsuite/gcc.target/i386/switch-exp-transform-3.c
+++ b/gcc/testsuite/gcc.target/i386/switch-exp-transform-3.c
@@ -3,10 +3,104 @@
 
 /* Checks that the exponential index transformation is done for all these types
    of the index variable:
+   - (unsigned) char
+   - (unsigned) short
    - (unsigned) int
    - (unsigned) long
    - (unsigned) long long  */
 
+int unopt_char(char bit_position)
+{
+    switch (bit_position)
+    {
+        case (1 << 0):
+            return 0;
+        case (1 << 1):
+            return 1;
+        case (1 << 2):
+            return 2;
+        case (1 << 3):
+            return 3;
+        case (1 << 4):
+            return 4;
+        case (1 << 5):
+            return 5;
+        case (1 << 6):
+            return 6;
+        default:
+            return 0;
+    }
+}
+
+int unopt_unsigned_char(unsigned char bit_position)
+{
+    switch (bit_position)
+    {
+        case (1 << 0):
+            return 0;
+        case (1 << 1):
+            return 1;
+        case (1 << 2):
+            return 2;
+        case (1 << 3):
+            return 3;
+        case (1 << 4):
+            return 4;
+        case (1 << 5):
+            return 5;
+        case (1 << 6):
+            return 6;
+        default:
+            return 0;
+    }
+}
+
+int unopt_short(short bit_position)
+{
+    switch (bit_position)
+    {
+        case (1 << 0):
+            return 0;
+        case (1 << 1):
+            return 1;
+        case (1 << 2):
+            return 2;
+        case (1 << 3):
+            return 3;
+        case (1 << 4):
+            return 4;
+        case (1 << 5):
+            return 5;
+        case (1 << 6):
+            return 6;
+        default:
+            return 0;
+    }
+}
+
+int unopt_unsigned_short(unsigned short bit_position)
+{
+    switch (bit_position)
+    {
+        case (1 << 0):
+            return 0;
+        case (1 << 1):
+            return 1;
+        case (1 << 2):
+            return 2;
+        case (1 << 3):
+            return 3;
+        case (1 << 4):
+            return 4;
+        case (1 << 5):
+            return 5;
+        case (1 << 6):
+            return 6;
+        default:
+            return 0;
+    }
+}
+
 int unopt_int(int bit_position)
 {
     switch (bit_position)
@@ -149,5 +243,5 @@ int unopt_unsigned_long_long(unsigned long long 
bit_position)
 
 #endif
 
-/* { dg-final { scan-tree-dump-times "Applying exponential index transform" 4 
"switchconv" { target ia32 } } } */
-/* { dg-final { scan-tree-dump-times "Applying exponential index transform" 6 
"switchconv" { target { ! ia32 } } } } */
+/* { dg-final { scan-tree-dump-times "Applying exponential index transform" 8 
"switchconv" { target ia32 } } } */
+/* { dg-final { scan-tree-dump-times "Applying exponential index transform" 10 
"switchconv" { target { ! ia32 } } } } */
diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index 4b11c8d25f4..9dc703f737c 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -66,63 +66,131 @@ using namespace tree_switch_conversion;
 /* Does the target have optabs needed to efficiently compute exact base two
    logarithm of a value with type TYPE?
 
-   See gen_log2.  */
+   Also see gen_log2.  */
 
 static bool
 can_log2 (tree type, optimization_type opt_type)
 {
   /* Check if target supports FFS.  */
-  return direct_internal_fn_supported_p (IFN_FFS, type, opt_type);
+  int prec = TYPE_PRECISION (type);
+  int i_prec = TYPE_PRECISION (integer_type_node);
+  int li_prec = TYPE_PRECISION (long_integer_type_node);
+  int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
+  tree new_type;
+  if (prec <= i_prec)
+    new_type = integer_type_node;
+  else if (prec <= li_prec)
+    new_type = long_integer_type_node;
+  else if (prec <= lli_prec)
+    new_type = long_long_integer_type_node;
+  else
+    return false;
+  return direct_internal_fn_supported_p (IFN_FFS, new_type, opt_type);
 }
 
 /* Assume that OP is a power of two.  Build a sequence of gimple statements
    efficiently computing the base two logarithm of OP using special optabs.
    Return the ssa name represeting the result of the logarithm through RESULT.
 
-   Should only be used if target supports the needed optabs.  See can_log2.  */
+   Should only be used if can_log2 returns true for type of OP.  */
 
 static gimple_seq
 gen_log2 (tree op, location_t loc, tree *result)
 {
-  tree type = TREE_TYPE (op);
   gimple_seq stmts = NULL;
   gimple_stmt_iterator gsi = gsi_last (stmts);
-  tree tmp1 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, IFN_FFS, type, op);
-  tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, MINUS_EXPR, type,
-                           tmp1, build_one_cst (type));
-  *result = tmp2;
+
+  tree orig_type = TREE_TYPE (op);
+  int prec = TYPE_PRECISION (orig_type);
+  int i_prec = TYPE_PRECISION (integer_type_node);
+  int li_prec = TYPE_PRECISION (long_integer_type_node);
+
+  tree type;
+  if (prec <= i_prec)
+    type = integer_type_node;
+  else if (prec <= li_prec)
+    type = long_integer_type_node;
+  else
+    type = long_long_integer_type_node;
+  gcc_checking_assert (prec <= TYPE_PRECISION (long_long_integer_type_node));
+
+  /* Convert op to one of the types that FFS accepts.  */
+  tree tmp1 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, CONVERT_EXPR, type,
+                           op);
+  /* Build FFS (op) - 1.  */
+  tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, IFN_FFS, orig_type,
+                           tmp1);
+  tree tmp3 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, MINUS_EXPR,
+                           orig_type, tmp2, build_one_cst (orig_type));
+  *result = tmp3;
   return stmts;
 }
 
+/* Is it possible to efficiently check that a value of TYPE is a power of 2?
+
+   Also see gen_pow2p.  */
+
+static bool
+can_pow2p (tree type)
+{
+  /* Check that we can express "is power of 2" using __builtin_popcount.  */
+  int lli_prec = TYPE_PRECISION (long_long_unsigned_type_node);
+  return TYPE_PRECISION (type) <= lli_prec;
+}
+
 /* Build a sequence of gimple statements checking that OP is a power of 2.  Use
    special optabs if target supports them.  Return the result as a
-   boolen_type_node ssa name through RESULT.  */
+   boolean_type_node ssa name through RESULT.  Assumes that OP's value will
+   be non-negative.  The generated check may give arbitrary answer for negative
+   values.
+
+   Should only be used if can_pow2p returns true for type of OP.  */
 
 static gimple_seq
-gen_pow2p (tree op, location_t loc, optimization_type opt_type, tree *result)
+gen_pow2p (tree op, location_t loc, tree *result)
 {
-  tree type = TREE_TYPE (op);
   gimple_seq stmts = NULL;
   gimple_stmt_iterator gsi = gsi_last (stmts);
-  if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, opt_type))
+
+  int prec = TYPE_PRECISION (TREE_TYPE (op));
+  int i_prec = TYPE_PRECISION (unsigned_type_node);
+  int li_prec = TYPE_PRECISION (long_unsigned_type_node);
+
+  tree fn;
+  tree type;
+  if (prec <= i_prec)
     {
-      tree tmp = gimple_build (&gsi, false, GSI_NEW_STMT, loc, IFN_POPCOUNT,
-                              type, op);
-      *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, EQ_EXPR,
-                             boolean_type_node, tmp, build_one_cst (type));
+      type = unsigned_type_node;
+      fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
+    }
+  else if (prec <= li_prec)
+    {
+      type = long_unsigned_type_node;
+      fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
     }
   else
     {
-      tree tmp1 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, NEGATE_EXPR,
-                               type, op);
-      tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, BIT_AND_EXPR,
-                               type, op, tmp1);
-      *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, EQ_EXPR,
-                             boolean_type_node, tmp2, op);
+      type = long_long_unsigned_type_node;
+      fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
     }
+  gcc_checking_assert (prec <= TYPE_PRECISION (long_long_unsigned_type_node));
+
+  /* Convert op to one of the types that __builtin_popcount{l,ll} accepts.  */
+  tree tmp1 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, CONVERT_EXPR, type,
+                           op);
+  /* Build __builtin_popcount{l,ll} (op) == 1.  */
+  gcall *call = gimple_build_call (fn, 1, tmp1);
+  tree tmp2 = make_ssa_name (integer_type_node);
+  gimple_set_lhs (call, tmp2);
+  gsi_insert_after (&gsi, call, GSI_NEW_STMT);
+  *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, EQ_EXPR,
+                         boolean_type_node, tmp2,
+                         build_one_cst (integer_type_node));
+
   return stmts;
 }
 
+
 /* Constructor.  */
 
 switch_conversion::switch_conversion (): m_final_bb (NULL),
@@ -285,7 +353,7 @@ switch_conversion::is_exp_index_transform_viable (gswitch 
*swtch)
   unsigned num_labels = gimple_switch_num_labels (swtch);
 
   optimization_type opt_type = bb_optimization_type (swtch_bb);
-  if (!can_log2 (index_type, opt_type))
+  if (!can_log2 (index_type, opt_type) || !can_pow2p (index_type))
     return false;
 
   /* Check that each case label corresponds only to one value
@@ -380,8 +448,7 @@ switch_conversion::exp_index_transform (gswitch *swtch)
   new_edge2->probability = profile_probability::even ();
 
   tree tmp;
-  optimization_type opt_type = bb_optimization_type (cond_bb);
-  gimple_seq stmts = gen_pow2p (index, UNKNOWN_LOCATION, opt_type, &tmp);
+  gimple_seq stmts = gen_pow2p (index, UNKNOWN_LOCATION, &tmp);
   gsi = gsi_last_bb (cond_bb);
   gsi_insert_seq_after (&gsi, stmts, GSI_LAST_NEW_STMT);
   gcond *stmt_cond = gimple_build_cond (NE_EXPR, tmp, boolean_false_node,
-- 
2.46.0

Reply via email to