Manolis Tsamis <manolis.tsa...@vrull.eu> writes:
> Currently the operations allowed for if conversion of a basic block with
> multiple sets are few, namely REG, SUBREG and CONST_INT (as controlled by
> bb_ok_for_noce_convert_multiple_sets).
>
> This commit allows more operations (arithmetic, compare, etc) to participate
> in if conversion. The target's profitability hook and ifcvt's costing is
> expected to reject sequences that are unprofitable.
>
> This is especially useful for targets which provide a rich selection of
> conditional instructions (like aarch64 which has cinc, csneg, csinv, ccmp, 
> ...)
> which are currently not used in basic blocks with more than a single set.
>
> gcc/ChangeLog:
>
>       * ifcvt.cc (try_emit_cmove_seq): Modify comments.
>       (noce_convert_multiple_sets_1): Modify comments.
>       (bb_ok_for_noce_convert_multiple_sets): Allow more operations.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/ifcvt_multiple_sets_arithm.c: New test.
>
> Signed-off-by: Manolis Tsamis <manolis.tsa...@vrull.eu>
> ---
>
> Changes in v3:
>         - Add SCALAR_INT_MODE_P check in bb_ok_for_noce_convert_multiple_sets.
>         - Allow rewiring of multiple regs.
>         - Refactor code with noce_multiple_sets_info.
>         - Remove old code for subregs.
>
>  gcc/ifcvt.cc                                  | 63 ++++++++++-----
>  .../aarch64/ifcvt_multiple_sets_arithm.c      | 79 +++++++++++++++++++
>  2 files changed, 123 insertions(+), 19 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
>
> diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> index 3273aeca125..efe8ab1577a 100644
> --- a/gcc/ifcvt.cc
> +++ b/gcc/ifcvt.cc
> @@ -3215,13 +3215,13 @@ try_emit_cmove_seq (struct noce_if_info *if_info, rtx 
> temp,
>  /* We have something like:
>  
>       if (x > y)
> -       { i = a; j = b; k = c; }
> +       { i = EXPR_A; j = EXPR_B; k = EXPR_C; }
>  
>     Make it:
>  
> -     tmp_i = (x > y) ? a : i;
> -     tmp_j = (x > y) ? b : j;
> -     tmp_k = (x > y) ? c : k;
> +     tmp_i = (x > y) ? EXPR_A : i;
> +     tmp_j = (x > y) ? EXPR_B : j;
> +     tmp_k = (x > y) ? EXPR_C : k;
>       i = tmp_i;
>       j = tmp_j;
>       k = tmp_k;
> @@ -3637,11 +3637,10 @@ noce_convert_multiple_sets_1 (struct noce_if_info 
> *if_info,
>  
>  
>  
> -/* Return true iff basic block TEST_BB is comprised of only
> -   (SET (REG) (REG)) insns suitable for conversion to a series
> -   of conditional moves.  Also check that we have more than one set
> -   (other routines can handle a single set better than we would), and
> -   fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
> +/* Return true iff basic block TEST_BB is suitable for conversion to a
> +   series of conditional moves.  Also check that we have more than one
> +   set (other routines can handle a single set better than we would),
> +   and fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
>     through the insns store the sum of their potential costs in COST.  */
>  
>  static bool
> @@ -3667,20 +3666,46 @@ bb_ok_for_noce_convert_multiple_sets (basic_block 
> test_bb, unsigned *cost)
>        rtx dest = SET_DEST (set);
>        rtx src = SET_SRC (set);
>  
> -      /* We can possibly relax this, but for now only handle REG to REG
> -      (including subreg) moves.  This avoids any issues that might come
> -      from introducing loads/stores that might violate data-race-freedom
> -      guarantees.  */
> -      if (!REG_P (dest))
> +      /* Do not handle anything involving memory loads/stores since it might
> +      violate data-race-freedom guarantees.  */
> +      if (!REG_P (dest) || contains_mem_rtx_p (src))
> +     return false;
> +
> +      if (!SCALAR_INT_MODE_P (GET_MODE (src)))
>       return false;
>  
> -      if (!((REG_P (src) || CONSTANT_P (src))
> -         || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
> -           && subreg_lowpart_p (src))))
> +      /* Allow a wide range of operations and let the costing function decide
> +      if the conversion is worth it later.  */
> +      enum rtx_code code = GET_CODE (src);
> +      if (!(CONSTANT_P (src)
> +         || code == REG
> +         || code == SUBREG
> +         || code == ZERO_EXTEND
> +         || code == SIGN_EXTEND
> +         || code == NOT
> +         || code == NEG
> +         || code == PLUS
> +         || code == MINUS
> +         || code == AND
> +         || code == IOR
> +         || code == MULT
> +         || code == ASHIFT
> +         || code == ASHIFTRT
> +         || code == NE
> +         || code == EQ
> +         || code == GE
> +         || code == GT
> +         || code == LE
> +         || code == LT
> +         || code == GEU
> +         || code == GTU
> +         || code == LEU
> +         || code == LTU
> +         || code == COMPARE))
>       return false;

I'm nervous about lists of operations like these, for two reasons:

(1) It isn't obvious what criteria are used to select the codes.

(2) It requires the top-level code to belong to a given set, but it
    allows subrtxes of src to be arbitrarily complex.  E.g. (to pick
    a silly example) a toplevel (popcount ...) would be rejected, but
    (plus (popcount ...) (const_int 1)) would be OK.

Could we just remove this condition instead?

Thanks,
Richard

>  
> -      /* Destination must be appropriate for a conditional write.  */
> -      if (!noce_operand_ok (dest))
> +      /* Destination and source must be appropriate.  */
> +      if (!noce_operand_ok (dest) || !noce_operand_ok (src))
>       return false;
>  
>        /* We must be able to conditionally move in this mode.  */
> diff --git a/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c 
> b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> new file mode 100644
> index 00000000000..d977f4d62ec
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> @@ -0,0 +1,79 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-rtl-ce1" } */
> +
> +void sink2(int, int);
> +void sink3(int, int, int);
> +
> +void cond1(int cond, int x, int y)
> +{
> +  if (cond)
> +    {
> +      x = x << 4;
> +      y = 1;
> +    }
> +
> +  sink2(x, y);
> +}
> +
> +void cond2(int cond, int x, int y)
> +{
> +  if (cond)
> +    {
> +      x++;
> +      y++;
> +    }
> +
> +  sink2(x, y);
> +}
> +
> +void cond3(int cond, int x1, int x2, int x3)
> +{
> +  if (cond)
> +    {
> +      x1++;
> +      x2++;
> +      x3++;
> +    }
> +
> +  sink3(x1, x2, x3);
> +}
> +
> +void cond4(int cond, int x, int y)
> +{
> +  if (cond)
> +    {
> +      x += 2;
> +      y += 3;
> +    }
> +
> +  sink2(x, y);
> +}
> +
> +void cond5(int cond, int x, int y, int r1, int r2)
> +{
> +  if (cond)
> +    {
> +      x = r1 + 2;
> +      y = r2 - 34;
> +    }
> +
> +  sink2(x, y);
> +}
> +
> +void cond6(int cond, int x, int y)
> +{
> +  if (cond)
> +    {
> +      x = -x;
> +      y = ~y;
> +    }
> +
> +  sink2(x, y);
> +}
> +
> +/* { dg-final { scan-assembler-times "cinc\t" 5 } } */
> +/* { dg-final { scan-assembler-times "csneg\t" 1 } } */
> +/* { dg-final { scan-assembler-times "csinv\t" 1 } } */
> +/* { dg-final { scan-assembler "csel\t" } } */
> +
> +/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through 
> noce_convert_multiple_sets" 6 "ce1" } } */
> \ No newline at end of file

Reply via email to