In debugging the glibc function __ieee754_scalbl when compiling for a power7 target, we discovered that the compiler was using VSX load and store instructions to save long double types that are passed in floating point registes. However, since long double types are passed as two scalar floating point values, it wastes space, and takes extra setup to use the VSX instructions. This patch only uses scalar load/store instructions to save long double and __Decimal128, which has the same problem.
I have bootstraped the compiler and found no regressions in the code. Is this patch acceptable to check into the trunk, and gcc 4.8/4.7 branches? [gcc] 2013-05-03 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/57150 * config/rs6000/rs6000.h (HARD_REGNO_CALLER_SAVE_MODE): Use DFmode to save TFmode registers and DImode to save TImode registers for caller save operations. (HARD_REGNO_CALL_PART_CLOBBERED): TFmode and TDmode do not need to mark being partially clobbered since they only use the first double word. * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): TFmode and TDmode only use the upper 64-bits of each VSX register. [gcc/testsuite] 2013-05-03 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/57150 * gcc.target/powerpc/pr57150.c: New file. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 198584) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -2335,8 +2335,16 @@ rs6000_init_hard_regno_mode_ok (bool glo reg_size = UNITS_PER_WORD; for (m = 0; m < NUM_MACHINE_MODES; ++m) - rs6000_class_max_nregs[m][c] - = (GET_MODE_SIZE (m) + reg_size - 1) / reg_size; + { + int reg_size2 = reg_size; + + /* TFmode/TDmode always takes 2 registers, even in VSX. */ + if (m == TDmode || m == TFmode) + reg_size2 = UNITS_PER_FP_WORD; + + rs6000_class_max_nregs[m][c] + = (GET_MODE_SIZE (m) + reg_size2 - 1) / reg_size2; + } } if (TARGET_E500_DOUBLE) Index: gcc/config/rs6000/rs6000.h =================================================================== --- gcc/config/rs6000/rs6000.h (revision 198584) +++ gcc/config/rs6000/rs6000.h (working copy) @@ -1071,12 +1071,17 @@ extern unsigned rs6000_pointer_size; #define HARD_REGNO_NREGS(REGNO, MODE) rs6000_hard_regno_nregs[(MODE)][(REGNO)] /* When setting up caller-save slots (MODE == VOIDmode) ensure we allocate - enough space to account for vectors in FP regs. */ + enough space to account for vectors in FP regs. However, TFmode/TDmode + should not use VSX instructions to do a caller save. */ #define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ (TARGET_VSX \ && ((MODE) == VOIDmode || ALTIVEC_OR_VSX_VECTOR_MODE (MODE)) \ - && FP_REGNO_P (REGNO) \ - ? V2DFmode \ + && FP_REGNO_P (REGNO) \ + ? V2DFmode \ + : ((MODE) == TFmode && FP_REGNO_P (REGNO)) \ + ? DFmode \ + : ((MODE) == TDmode && FP_REGNO_P (REGNO)) \ + ? DImode \ : choose_hard_reg_mode ((REGNO), (NREGS), false)) #define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \ @@ -1084,7 +1089,8 @@ extern unsigned rs6000_pointer_size; && (GET_MODE_SIZE (MODE) > 4) \ && INT_REGNO_P (REGNO)) ? 1 : 0) \ || (TARGET_VSX && FP_REGNO_P (REGNO) \ - && GET_MODE_SIZE (MODE) > 8)) + && GET_MODE_SIZE (MODE) > 8 && ((MODE) != TDmode) \ + && ((MODE) != TFmode))) #define VSX_VECTOR_MODE(MODE) \ ((MODE) == V4SFmode \ Index: gcc/testsuite/gcc.target/powerpc/pr57150.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr57150.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr57150.c (revision 0) @@ -0,0 +1,23 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O3 -mcpu=power7 -fcaller-saves" } */ +/* { dg-final { scan-assembler-not "lxvd2x" } } */ +/* { dg-final { scan-assembler-not "lxvw4x" } } */ +/* { dg-final { scan-assembler-not "lvx" } } */ +/* { dg-final { scan-assembler-not "stxvd2x" } } */ +/* { dg-final { scan-assembler-not "stxvw4x" } } */ +/* { dg-final { scan-assembler-not "stvx" } } */ + +/* Insure caller save on long double does not use VSX instructions. */ + +extern long double modify (long double); + +void +sum (long double *ptr, long double value, unsigned long n) +{ + unsigned long i; + + for (i = 0; i < n; i++) + ptr[i] += modify (value); +}