As mentioned in PR65768, ARM gcc generates suboptimal code for constant Uses in loop. Part of the reason is that ARM back-end is splitting constants during expansion of RTL, making it hard for the RTL optimization passes to optimize it. Zhenqiang posted a patch at https://gcc.gnu.org/ml/gcc-patches/2014-08/msg00325.html to fix this
As mentioned in PR65768, I tried with few more test-cases and enhanced it. Regression tested on arm-none-linux-gnu and no new regressions. Is this OK for trunk? Thanks, Kugan gcc/ChangeLog: 2015-04-15 Kugan Vivekanandarajah <kug...@linaro.org> Zhenqiang Chen <zhenqiang.c...@linaro.org> PR target/65768 * config/arm/arm-protos.h (const_ok_for_split): New definition. * config/arm/arm.c (const_ok_for_split): New function. * config/arm/arm.md (subsi3, andsi3, iorsi3, xorsi3, movsi): Keep some large constants in register instead of splitting them. gcc/testsuite/ChangeLog: 2015-04-15 Kugan Vivekanandarajah <kug...@linaro.org> Zhenqiang Chen <zhenqiang.c...@linaro.org> PR target/65768 * gcc.target/arm/maskdata.c: New test.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 16eb854..1b131a9 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -58,6 +58,7 @@ extern bool arm_modes_tieable_p (machine_mode, machine_mode); extern int const_ok_for_arm (HOST_WIDE_INT); extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code); +extern int const_ok_for_split (HOST_WIDE_INT, enum rtx_code); extern int arm_split_constant (RTX_CODE, machine_mode, rtx, HOST_WIDE_INT, rtx, rtx, int); extern int legitimate_pic_operand_p (rtx); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 8fd1388..0c13666 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -3745,6 +3745,41 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code) } } +/* Return true if I is a valid constant for split with the operation CODE. + The condition should align with the constrain of the corresponding + define_insn_and_split pattern to make sure later pass can optimize + the constants. */ +int +const_ok_for_split (HOST_WIDE_INT i, enum rtx_code code) +{ + if (optimize < 2 + || !can_create_pseudo_p () + || const_ok_for_arm (i) + /* Since expand pass always uses "sign-extend" to get the value + (trunc_int_for_mode called from immed_wide_int_const) for rtl, + and logs show most negative values are UNSIGNED when they are + TREE node. And combine pass is smart enough to recover the + negative value to positive value. */ + || ((i < 0) && const_ok_for_arm (-i))) + return 1; + + switch (code) + { + case AND: + /* zero_extendhi instruction is efficient. */ + return const_ok_for_arm (~i) || (i == 0xffff); + + case IOR: + return TARGET_THUMB2 && const_ok_for_arm (~i); + + case SET: + return const_ok_for_arm (i) || const_ok_for_arm (~i); + + default: + return 1; + } +} + /* Emit a sequence of insns to handle a large constant. CODE is the code of the operation required, it can be any of SET, PLUS, IOR, AND, XOR, MINUS; diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 164ac13..a169775 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -1164,10 +1164,16 @@ { if (TARGET_32BIT) { - arm_split_constant (MINUS, SImode, NULL_RTX, - INTVAL (operands[1]), operands[0], - operands[2], optimize && can_create_pseudo_p ()); - DONE; + if (!const_ok_for_split (INTVAL (operands[1]), MINUS)) + operands[1] = force_reg (SImode, operands[1]); + else + { + arm_split_constant (MINUS, SImode, NULL_RTX, + INTVAL (operands[1]), operands[0], + operands[2], + optimize && can_create_pseudo_p ()); + DONE; + } } else /* TARGET_THUMB1 */ operands[1] = force_reg (SImode, operands[1]); @@ -2078,14 +2084,19 @@ operands[1] = convert_to_mode (QImode, operands[1], 1); emit_insn (gen_thumb2_zero_extendqisi2_v6 (operands[0], operands[1])); + DONE; } + else if (!const_ok_for_split (INTVAL (operands[2]), AND)) + operands[2] = force_reg (SImode, operands[2]); else - arm_split_constant (AND, SImode, NULL_RTX, - INTVAL (operands[2]), operands[0], - operands[1], - optimize && can_create_pseudo_p ()); + { + arm_split_constant (AND, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], + operands[1], + optimize && can_create_pseudo_p ()); - DONE; + DONE; + } } } else /* TARGET_THUMB1 */ @@ -2884,10 +2895,16 @@ { if (TARGET_32BIT) { - arm_split_constant (IOR, SImode, NULL_RTX, - INTVAL (operands[2]), operands[0], operands[1], - optimize && can_create_pseudo_p ()); - DONE; + if (!const_ok_for_split (INTVAL (operands[2]), IOR)) + operands[2] = force_reg (SImode, operands[2]); + else + { + arm_split_constant (IOR, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], + operands[1], + optimize && can_create_pseudo_p ()); + DONE; + } } else /* TARGET_THUMB1 */ { @@ -3054,10 +3071,16 @@ { if (TARGET_32BIT) { - arm_split_constant (XOR, SImode, NULL_RTX, - INTVAL (operands[2]), operands[0], operands[1], - optimize && can_create_pseudo_p ()); - DONE; + if (!const_ok_for_split (INTVAL (operands[2]), XOR)) + operands[2] = force_reg (SImode, operands[2]); + else + { + arm_split_constant (XOR, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], + operands[1], + optimize && can_create_pseudo_p ()); + DONE; + } } else /* TARGET_THUMB1 */ { @@ -5554,10 +5577,18 @@ && !(const_ok_for_arm (INTVAL (operands[1])) || const_ok_for_arm (~INTVAL (operands[1])))) { - arm_split_constant (SET, SImode, NULL_RTX, - INTVAL (operands[1]), operands[0], NULL_RTX, - optimize && can_create_pseudo_p ()); - DONE; + if (!const_ok_for_split (INTVAL (operands[1]), SET)) + { + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; + } + else + { + arm_split_constant (SET, SImode, NULL_RTX, + INTVAL (operands[1]), operands[0], NULL_RTX, + optimize && can_create_pseudo_p ()); + DONE; + } } } else /* TARGET_THUMB1... */ diff --git a/gcc/testsuite/gcc.target/arm/maskdata.c b/gcc/testsuite/gcc.target/arm/maskdata.c index e69de29..6d6bb39 100644 --- a/gcc/testsuite/gcc.target/arm/maskdata.c +++ b/gcc/testsuite/gcc.target/arm/maskdata.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options " -O2 -fno-gcse " } */ +/* { dg-require-effective-target arm_thumb2_ok } */ + +#define MASK 0xff00ff +void maskdata (int * data, int len) +{ + int i = len; + for (; i > 0; i -= 2) + { + data[i] &= MASK; + data[i + 1] &= MASK; + } +} +/* { dg-final { scan-assembler-not "65280" } } */