On Tue, Aug 6, 2024 at 4:38 PM Andi Kleen <[email protected]> wrote:
>
> The gimple-if-to-switch pass converts if statements with
> multiple equal checks on the same value to a switch. This breaks
> vectorization which cannot handle switches.
>
> Teach the tree-if-conv pass used by the vectorizer to handle
> simple switch statements, like those created by if-to-switch earlier.
> These are switches that only have a single non default block,
> and no ranges. They are handled similar to if in if conversion.
>
> Some notes:
>
> In theory this handles switches with case ranges, but it seems
> for the simple "one target label" switch case that is supported
> here these are always optimized by the cfg passes to COND,
> so this case is latent.
>
> This makes the vect-bitfield-read-1-not test fail. The test
> checks for a bitfield analysis failing, but it actually
> relied on the ifcvt erroring out early because the test
> is using a switch. The if conversion still does not
> work because the switch is not in a form that this
> patch can handle, but it fails much later and the bitfield
> analysis succeeds, which makes the test fail. I marked
> it xfail because it doesn't seem to be testing what it wants
> to test.
>
> gcc/ChangeLog:
>
> PR tree-opt/115866
> * tree-if-conv.cc (if_convertible_switch_p): New function.
> (if_convertible_stmt_p): Check for switch.
> (get_loop_body_in_if_conv_order): Handle switch.
> (predicate_bbs): Likewise.
> (predicate_statements): Likewise.
> (remove_conditions_and_labels): Likewise.
> (ifcvt_split_critical_edges): Likewise.
> (ifcvt_local_dce): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/vect-switch-ifcvt-1.c: New test.
> * gcc.dg/vect/vect-switch-ifcvt-2.c: New test.
> * gcc.dg/vect/vect-switch-search-line-fast.c: New test.
> * gcc.dg/vect/vect-bitfield-read-1-not.c: Change to xfail.
> ---
> .../gcc.dg/vect/vect-bitfield-read-1-not.c | 2 +-
> .../gcc.dg/vect/vect-switch-ifcvt-1.c | 107 ++++++++++++++++++
> .../gcc.dg/vect/vect-switch-ifcvt-2.c | 28 +++++
> .../vect/vect-switch-search-line-fast.c | 17 +++
> gcc/tree-if-conv.cc | 90 ++++++++++++++-
> 5 files changed, 238 insertions(+), 6 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-1.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-2.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
> b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
> index 0d91067ebb2..85f4de8464a 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
> @@ -55,6 +55,6 @@ int main (void)
> return 0;
> }
>
> -/* { dg-final { scan-tree-dump-not "Bitfield OK to lower." "ifcvt" } } */
> +/* { dg-final { scan-tree-dump-times "Bitfield OK to lower." 0 "ifcvt" {
> xfail *-*-* } } } */
>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-1.c
> b/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-1.c
> new file mode 100644
> index 00000000000..0b06d3c84a7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-1.c
> @@ -0,0 +1,107 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +extern void abort (void);
> +
> +int
> +f1 (char *s)
> +{
> + int c = 0;
> + int i;
> + for (i = 0; i < 64; i++)
> + {
> + switch (*s)
> + {
> + case ',':
> + case '|':
> + c++;
> + }
> + s++;
> + }
> + return c;
> +}
> +
> +int
> +f2 (char *s)
> +{
> + int c = 0;
> + int i;
> + for (i = 0; i < 64; i++)
> + {
> + if (*s != '#')
> + {
> + switch (*s)
> + {
> + case ',':
> + case '|':
> + c++;
> + }
> + }
> + s++;
> + }
> + return c;
> +}
> +
> +int
> +f3 (char *s)
> +{
> + int c = 0;
> + int i;
> + for (i = 0; i < 64; i++)
> + {
> + if (*s != '#')
> + if (*s == ',' || *s == '|' || *s == '@' || *s == '*')
> + c++;
> + s++;
> + }
> + return c;
> +}
> +
> +
> +int
> +f4 (char *s)
> +{
> + int c = 0;
> + int i;
> + for (i = 0; i < 64; i++)
> + {
> + if (*s == ',' || *s == '|' || *s == '@' || *s == '*')
> + c++;
> + s++;
> + }
> + return c;
> +}
> +
> +#define CHECK(f, str, res) \
> + __builtin_strcpy(buf, str); n = f(buf); if (n != res) abort();
> +
> +int
> +main ()
> +{
> + int n;
> + char buf[64];
> +
> + CHECK (f1, ",,,,,,,,,,", 10);
> + CHECK (f1, "||||||||||", 10);
> + CHECK (f1, "aaaaaaaaaa", 0);
> + CHECK (f1, "", 0);
> + CHECK (f1, ",|,|xxxxxx", 4);
> +
> + CHECK (f2, ",|,|xxxxxx", 4);
> + CHECK (f2, ",|,|xxxxxx", 4);
> + CHECK (f2, ",|,|xxxxxx", 4);
> + CHECK (f2, ",|,|xxxxxx", 4);
> +
> + CHECK (f3, ",|,|xxxxxx", 4);
> + CHECK (f3, ",|,|xxxxxx", 4);
> + CHECK (f3, ",|,|xxxxxx", 4);
> + CHECK (f3, ",|,|xxxxxx", 4);
> +
> + CHECK (f4, ",|,|xxxxxx", 4);
> + CHECK (f4, ",|,|xxxxxx", 4);
> + CHECK (f4, ",|,|xxxxxx", 4);
> + CHECK (f4, ",|,|xxxxxx", 4);
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-2.c
> b/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-2.c
> new file mode 100644
> index 00000000000..c9da6ebb40b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-2.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* Check for cases currently not supported by switch tree if conversion. */
> +
> +int
> +f1 (char *s)
> +{
> + int c = 0;
> + int i;
> + for (i = 0; i < 64; i++)
> + {
> + switch (*s)
> + {
> + case ',':
> + case '|':
> + c++;
> + break;
> + case '^':
> + c += 2;
> + break;
> + }
> + s++;
> + }
> + return c;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c
> b/gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c
> new file mode 100644
> index 00000000000..15f3a4ef38a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c
> @@ -0,0 +1,17 @@
> +/* PR116126 -- once this works use this version in libcpp/lex.c.
> + This also requires working value range propagation for s/end. */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +
> +const unsigned char *search_line_fast2 (const unsigned char *s,
> + const unsigned char *end)
> +{
> + while (s < end) {
> + if (*s == '\n' || *s == '\r' || *s == '\\' || *s == '?')
> + break;
> + s++;
> + }
> + return s;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail
> *-*-* } } } */
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index 57992b6deca..e41ccd421c8 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -124,6 +124,7 @@ along with GCC; see the file COPYING3. If not see
> #include "tree-vectorizer.h"
> #include "tree-eh.h"
> #include "cgraph.h"
> +#include "print-tree.h"
>
> /* For lang_hooks.types.type_for_mode. */
> #include "langhooks.h"
> @@ -1082,11 +1083,30 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt,
> return true;
> }
>
> +/* Return true when SW switch statement is equivalent to cond, that
> + all non default labels point to the same label.
> + This is intended for switches created by the if-switch-conversion
> + pass, but can handle some programmer supplied cases too. */
> +
> +static bool
> +if_convertible_switch_p (gswitch *sw)
> +{
> + gcc_checking_assert (gimple_switch_num_labels (sw) > 1);
Defensive programming asks for
if (gimple_switch_num_labels (sw) <= 1)
return false;
alternatively the assert is redundant - the access of label '1' below
will assert the same.
> + tree label = CASE_LABEL (gimple_switch_label (sw, 1));
> + for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++)
> + {
> + if (CASE_LABEL (gimple_switch_label (sw, i)) != label)
> + return false;
> + }
> + return true;
> +}
> +
> /* Return true when STMT is if-convertible.
>
> A statement is if-convertible if:
> - it is an if-convertible GIMPLE_ASSIGN,
> - it is a GIMPLE_LABEL or a GIMPLE_COND,
> + - it is a switch equivalent to COND
> - it is builtins call,
> - it is a call to a function with a SIMD clone. */
>
> @@ -1100,6 +1120,9 @@ if_convertible_stmt_p (gimple *stmt,
> vec<data_reference_p> refs)
> case GIMPLE_COND:
> return true;
>
> + case GIMPLE_SWITCH:
> + return if_convertible_switch_p (as_a <gswitch *> (stmt));
> +
> case GIMPLE_ASSIGN:
> return if_convertible_gimple_assign_stmt_p (stmt, refs);
>
> @@ -1327,6 +1350,7 @@ get_loop_body_in_if_conv_order (const class loop *loop)
> case GIMPLE_CALL:
> case GIMPLE_DEBUG:
> case GIMPLE_COND:
> + case GIMPLE_SWITCH:
> gimple_set_uid (gsi_stmt (gsi), 0);
> break;
> default:
> @@ -1426,6 +1450,60 @@ predicate_bbs (loop_p loop)
> cond = NULL_TREE;
> }
>
> + /* Assumes the limited COND like switches checked for earlier. */
> + if (gswitch *sw = safe_dyn_cast <gswitch *> (*gsi_last_bb (bb)))
else if
> + {
> + location_t loc = gimple_location (*gsi_last_bb (bb));
> +
> + tree default_label = CASE_LABEL (gimple_switch_label (sw, 0));
gimple_switch_default_label (sw)
> + tree cond_label = CASE_LABEL (gimple_switch_label (sw, 1));
> +
> + edge false_edge = find_edge (bb, label_to_block (cfun,
> default_label));
> + edge true_edge = find_edge (bb, label_to_block (cfun, cond_label));
> +
> + /* Create chain of switch tests for each case. */
> + tree switch_cond = NULL_TREE;
> + tree index = gimple_switch_index (sw);
> + for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++)
> + {
> + tree label = gimple_switch_label (sw, i);
> + tree case_cond;
> + /* This currently cannot happen because tree-cfg lowers range
> + switches with a single destination to COND. */
But it should also lower non-range switches with a single destination ...?
See convert_single_case_switch. You say
switch (i)
{
case 1:
case 5 ... 7:
return 42;
default:
return 0;
}
doesn't hit here with a CASE_HIGH for the 5 ... 7 CASE_LABEL?
Otherwise the patch looks Ok to me.
Thanks,
Richard.
> + if (CASE_HIGH (label))
> + {
> + tree low = build2_loc (loc, GE_EXPR,
> + boolean_type_node,
> + index, CASE_LOW (label));
> + tree high = build2_loc (loc, LE_EXPR,
> + boolean_type_node,
> + index, CASE_HIGH (label));
> + case_cond = build2_loc (loc, TRUTH_AND_EXPR,
> + boolean_type_node,
> + low, high);
> + }
> + else
> + case_cond = build2_loc (loc, EQ_EXPR,
> + boolean_type_node,
> + index,
> + CASE_LOW (gimple_switch_label (sw,
> i)));
> + if (i > 1)
> + switch_cond = build2_loc (loc, TRUTH_OR_EXPR,
> + boolean_type_node,
> + case_cond, switch_cond);
> + else
> + switch_cond = case_cond;
> + }
> +
> + add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
> + unshare_expr (switch_cond));
> + switch_cond = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
> + unshare_expr (switch_cond));
> + add_to_dst_predicate_list (loop, false_edge,
> + unshare_expr (cond), switch_cond);
> + cond = NULL_TREE;
> + }
> +
> /* If current bb has only one successor, then consider it as an
> unconditional goto. */
> if (single_succ_p (bb))
> @@ -2852,9 +2930,9 @@ predicate_statements (loop_p loop)
> }
> }
>
> -/* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
> - other than the exit and latch of the LOOP. Also resets the
> - GIMPLE_DEBUG information. */
> +/* Remove all GIMPLE_CONDs and GIMPLE_LABELs and GIMPLE_SWITCH of all
> + the basic blocks other than the exit and latch of the LOOP. Also
> + resets the GIMPLE_DEBUG information. */
>
> static void
> remove_conditions_and_labels (loop_p loop)
> @@ -2875,6 +2953,7 @@ remove_conditions_and_labels (loop_p loop)
> {
> case GIMPLE_COND:
> case GIMPLE_LABEL:
> + case GIMPLE_SWITCH:
> gsi_remove (&gsi, true);
> break;
>
> @@ -3265,7 +3344,8 @@ ifcvt_split_critical_edges (class loop *loop, bool
> aggressive_if_conv)
> continue;
>
> /* Skip basic blocks not ending with conditional branch. */
> - if (!safe_is_a <gcond *> (*gsi_last_bb (bb)))
> + if (!safe_is_a <gcond *> (*gsi_last_bb (bb))
> + && !safe_is_a <gswitch *> (*gsi_last_bb (bb)))
> continue;
>
> FOR_EACH_EDGE (e, ei, bb->succs)
> @@ -3330,7 +3410,7 @@ ifcvt_local_dce (class loop *loop)
> continue;
> }
> code = gimple_code (stmt);
> - if (code == GIMPLE_COND || code == GIMPLE_CALL)
> + if (code == GIMPLE_COND || code == GIMPLE_CALL || code ==
> GIMPLE_SWITCH)
> {
> gimple_set_plf (stmt, GF_PLF_2, true);
> worklist.safe_push (stmt);
> --
> 2.45.2
>