This patch adds support for the TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS hook. When the cost of GENERAL_REGS and FP_REGS is identical, the register allocator always uses ALL_REGS even when it has a much higher cost. The hook changes the class to either FP_REGS or GENERAL_REGS depending on the mode of the register. This results in better register allocation overall, fewer spills and reduced codesize - particularly in SPEC2006 gamess.
GCC regression passes with several minor fixes. OK for commit? ChangeLog: 2015-11-06 Wilco Dijkstra <wdijk...@arm.com> * gcc/config/aarch64/aarch64.c (TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS): New define. (aarch64_ira_change_pseudo_allocno_class): New function. * gcc/testsuite/gcc.target/aarch64/cvtf_1.c: Build with -O2. * gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c (test_corners_sisd_di): Improve force to SIMD register. (test_corners_sisd_si): Likewise. * gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c: Build with -O2. * gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile-fp.c: Remove scan-assembler check for ldr. -- gcc/config/aarch64/aarch64.c | 22 ++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/cvtf_1.c | 2 +- gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c | 4 ++-- gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c | 2 +- .../gcc.target/aarch64/vect-ld1r-compile-fp.c | 1 - 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 6da7245..9b60666 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -597,6 +597,24 @@ aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg) error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg); } +/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. + The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have + the same cost even if ALL_REGS has a much larger cost. This results in bad + allocations and spilling. To avoid this we force the class to GENERAL_REGS + if the mode is integer. */ + +static reg_class_t +aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class) +{ + enum machine_mode mode; + + if (allocno_class != ALL_REGS) + return allocno_class; + + mode = PSEUDO_REGNO_MODE (regno); + return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS; +} + static unsigned int aarch64_min_divisions_for_recip_mul (enum machine_mode mode) { @@ -13113,6 +13131,10 @@ aarch64_promoted_type (const_tree t) #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS aarch64_init_builtins +#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS +#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \ + aarch64_ira_change_pseudo_allocno_class + #undef TARGET_LEGITIMATE_ADDRESS_P #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p diff --git a/gcc/testsuite/gcc.target/aarch64/cvtf_1.c b/gcc/testsuite/gcc.target/aarch64/cvtf_1.c index 5f2ff81..96501db 100644 --- a/gcc/testsuite/gcc.target/aarch64/cvtf_1.c +++ b/gcc/testsuite/gcc.target/aarch64/cvtf_1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-save-temps -fno-inline -O1" } */ +/* { dg-options "-save-temps -fno-inline -O2" } */ #define FCVTDEF(ftype,itype) \ void \ diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c b/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c index 363f554..8465c89 100644 --- a/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c +++ b/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c @@ -186,9 +186,9 @@ test_corners_sisd_di (Int64x1 b) { force_simd_di (b); b = b >> 63; + force_simd_di (b); b = b >> 0; b += b >> 65; /* { dg-warning "right shift count >= width of type" } */ - force_simd_di (b); return b; } @@ -199,9 +199,9 @@ test_corners_sisd_si (Int32x1 b) { force_simd_si (b); b = b >> 31; + force_simd_si (b); b = b >> 0; b += b >> 33; /* { dg-warning "right shift count >= width of type" } */ - force_simd_si (b); return b; } diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c index a49db3e..c5a9c52 100644 --- a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c +++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c @@ -1,6 +1,6 @@ /* Test vdup_lane intrinsics work correctly. */ /* { dg-do run } */ -/* { dg-options "-O1 --save-temps" } */ +/* { dg-options "-O2 --save-temps" } */ #include <arm_neon.h> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile-fp.c b/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile-fp.c index 66e0168..4711c61 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile-fp.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile-fp.c @@ -8,6 +8,5 @@ DEF (float) DEF (double) /* { dg-final { scan-assembler "ld1r\\t\{v\[0-9\]+\.4s"} } */ -/* { dg-final { scan-assembler "ldr\\t\x\[0-9\]+"} } */ /* { dg-final { scan-assembler "ld1r\\t\{v\[0-9\]+\.2d"} } */ -- 1.8.3