Hi, This patch defines LOGICAL_OP_NON_SHORT_CIRCUIT in arm back-end by calling a new hook function(logical_op_non_short_circuit") in tune_params structure. For most cases the value of the macro is same as the default version in fold-const.c, while it is "FALSE" to prefer short circuit when optimizing for size on armv6-m processors. This brings us ~0.2% code size improvement for CSiBE benchmark on cortex-m0. Also tunes on other ARM processes could be followed.
No regression introduced, is it OK? Thanks 2012-07-26 Bin Cheng <bin.ch...@arm.com> * config/arm/arm-cores.def (cortex-m1, cortex-m0, cortex-m0plus): Use v6m. * config/arm/arm-protos.h (tune_params): Add logical_op_non_short_circuit hook. * config/arm/arm.c (arm_default_logical_op_non_short_circuit) (arm_v6m_logical_op_non_short_circuit): New functions. (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune) (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune, arm_cortex_a15_tune) (arm_cortex_a5_tune, arm_cortex_a9_tune, arm_fa726te_tune): Set the field logical_op_non_short_circuit to arm_default_logical_op_non_short_circuit. (arm_v6m_tune): New tune_params struct. * config/arm/arm.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Use the hook logical_op_non_short_circuit from current_tune structure.
Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c (revision 189835) +++ gcc/config/arm/arm.c (working copy) @@ -265,6 +265,9 @@ static int arm_default_branch_cost (bool, bool); static int arm_cortex_a5_branch_cost (bool, bool); +static bool arm_default_logical_op_non_short_circuit (void); +static bool arm_v6m_logical_op_non_short_circuit (void); + static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, const unsigned char *sel); @@ -876,7 +879,8 @@ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer LDRD/STRD. */ + false, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; const struct tune_params arm_fastmul_tune = @@ -888,7 +892,8 @@ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer LDRD/STRD. */ + false, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; /* StrongARM has early execution of branches, so a sequence that is worth @@ -903,7 +908,8 @@ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer LDRD/STRD. */ + false, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; const struct tune_params arm_xscale_tune = @@ -915,7 +921,8 @@ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer LDRD/STRD. */ + false, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; const struct tune_params arm_9e_tune = @@ -927,7 +934,8 @@ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer LDRD/STRD. */ + false, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; const struct tune_params arm_v6t2_tune = @@ -939,7 +947,8 @@ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer LDRD/STRD. */ + false, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; /* Generic Cortex tuning. Use more specific tunings if appropriate. */ @@ -952,7 +961,8 @@ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer LDRD/STRD. */ + false, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; const struct tune_params arm_cortex_a15_tune = @@ -964,7 +974,8 @@ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ arm_default_branch_cost, - true /* Prefer LDRD/STRD. */ + true, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; /* Branches can be dual-issued on Cortex-A5, so conditional execution is @@ -979,7 +990,8 @@ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ arm_cortex_a5_branch_cost, - false /* Prefer LDRD/STRD. */ + false, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; const struct tune_params arm_cortex_a9_tune = @@ -991,9 +1003,25 @@ ARM_PREFETCH_BENEFICIAL(4,32,32), false, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer LDRD/STRD. */ + false, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; +/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than + arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */ +const struct tune_params arm_v6m_tune = +{ + arm_9e_rtx_costs, + NULL, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + arm_v6m_logical_op_non_short_circuit, +}; + const struct tune_params arm_fa726te_tune = { arm_9e_rtx_costs, @@ -1003,7 +1031,8 @@ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer LDRD/STRD. */ + false, /* Prefer LDRD/STRD. */ + arm_default_logical_op_non_short_circuit, }; @@ -8637,7 +8666,24 @@ return cost; } + +static bool +arm_default_logical_op_non_short_circuit (void) +{ + return (BRANCH_COST (optimize_function_for_speed_p (cfun), + false) >= 2); +} +static bool +arm_v6m_logical_op_non_short_circuit (void) +{ + /* Prefer short circuit operation on armv6-m when optimizing for size. */ + if (optimize_size) + return false; + + return arm_default_logical_op_non_short_circuit (); +} + static int arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED) { Index: gcc/config/arm/arm.h =================================================================== --- gcc/config/arm/arm.h (revision 189835) +++ gcc/config/arm/arm.h (working copy) @@ -1994,10 +1994,14 @@ || (X) == arg_pointer_rtx) /* Try to generate sequences that don't involve branches, we can then use - conditional instructions */ + conditional instructions. */ #define BRANCH_COST(speed_p, predictable_p) \ (current_tune->branch_cost (speed_p, predictable_p)) +/* False if short circuit operation is preferred. */ +#define LOGICAL_OP_NON_SHORT_CIRCUIT \ + (current_tune->logical_op_non_short_circuit ()) + /* Position Independent Code. */ /* We decide which register to use based on the compilation options and Index: gcc/config/arm/arm-cores.def =================================================================== --- gcc/config/arm/arm-cores.def (revision 189835) +++ gcc/config/arm/arm-cores.def (working copy) @@ -135,6 +135,6 @@ ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex) ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex) -ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex) -ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex) -ARM_CORE("cortex-m0plus", cortexm0plus, 6M, FL_LDSCHED, cortex) +ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, v6m) +ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, v6m) +ARM_CORE("cortex-m0plus", cortexm0plus, 6M, FL_LDSCHED, v6m) Index: gcc/config/arm/arm-protos.h =================================================================== --- gcc/config/arm/arm-protos.h (revision 189835) +++ gcc/config/arm/arm-protos.h (working copy) @@ -240,6 +240,7 @@ int (*branch_cost) (bool, bool); /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM. */ bool prefer_ldrd_strd; + bool (*logical_op_non_short_circuit) (void); }; extern const struct tune_params *current_tune;