In debugging the glibc function __ieee754_scalbl when compiling for a power7
target, we discovered that the compiler was using VSX load and store
instructions to save long double types that are passed in floating point
registes.  However, since long double types are passed as two scalar floating
point values, it wastes space, and takes extra setup to use the VSX
instructions.  This patch only uses scalar load/store instructions to save long
double and __Decimal128, which has the same problem.

I have bootstraped the compiler and found no regressions in the code.  Is this
patch acceptable to check into the trunk, and gcc 4.8/4.7 branches?

[gcc]
2013-05-03  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        PR target/57150
        * config/rs6000/rs6000.h (HARD_REGNO_CALLER_SAVE_MODE): Use DFmode
        to save TFmode registers and DImode to save TImode registers for
        caller save operations.
        (HARD_REGNO_CALL_PART_CLOBBERED): TFmode and TDmode do not need to
        mark being partially clobbered since they only use the first
        double word.

        * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): TFmode
        and TDmode only use the upper 64-bits of each VSX register.

[gcc/testsuite]
2013-05-03  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        PR target/57150
        * gcc.target/powerpc/pr57150.c: New file.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 198584)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -2335,8 +2335,16 @@ rs6000_init_hard_regno_mode_ok (bool glo
        reg_size = UNITS_PER_WORD;
 
       for (m = 0; m < NUM_MACHINE_MODES; ++m)
-       rs6000_class_max_nregs[m][c]
-         = (GET_MODE_SIZE (m) + reg_size - 1) / reg_size;
+       {
+         int reg_size2 = reg_size;
+
+         /* TFmode/TDmode always takes 2 registers, even in VSX.  */
+         if (m == TDmode || m == TFmode)
+           reg_size2 = UNITS_PER_FP_WORD;
+
+         rs6000_class_max_nregs[m][c]
+           = (GET_MODE_SIZE (m) + reg_size2 - 1) / reg_size2;
+       }
     }
 
   if (TARGET_E500_DOUBLE)
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h  (revision 198584)
+++ gcc/config/rs6000/rs6000.h  (working copy)
@@ -1071,12 +1071,17 @@ extern unsigned rs6000_pointer_size;
 #define HARD_REGNO_NREGS(REGNO, MODE) rs6000_hard_regno_nregs[(MODE)][(REGNO)]
 
 /* When setting up caller-save slots (MODE == VOIDmode) ensure we allocate
-   enough space to account for vectors in FP regs. */
+   enough space to account for vectors in FP regs.  However, TFmode/TDmode
+   should not use VSX instructions to do a caller save. */
 #define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE)                        
\
   (TARGET_VSX                                                          \
    && ((MODE) == VOIDmode || ALTIVEC_OR_VSX_VECTOR_MODE (MODE))                
\
-   && FP_REGNO_P (REGNO)                               \
-   ? V2DFmode                                          \
+   && FP_REGNO_P (REGNO)                                               \
+   ? V2DFmode                                                          \
+   : ((MODE) == TFmode && FP_REGNO_P (REGNO))                          \
+   ? DFmode                                                            \
+   : ((MODE) == TDmode && FP_REGNO_P (REGNO))                          \
+   ? DImode                                                            \
    : choose_hard_reg_mode ((REGNO), (NREGS), false))
 
 #define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)                    \
@@ -1084,7 +1089,8 @@ extern unsigned rs6000_pointer_size;
      && (GET_MODE_SIZE (MODE) > 4)                                     \
      && INT_REGNO_P (REGNO)) ? 1 : 0)                                  \
    || (TARGET_VSX && FP_REGNO_P (REGNO)                                        
\
-       && GET_MODE_SIZE (MODE) > 8))
+       && GET_MODE_SIZE (MODE) > 8 && ((MODE) != TDmode)               \
+       && ((MODE) != TFmode)))
 
 #define VSX_VECTOR_MODE(MODE)          \
         ((MODE) == V4SFmode            \
Index: gcc/testsuite/gcc.target/powerpc/pr57150.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr57150.c  (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr57150.c  (revision 0)
@@ -0,0 +1,23 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O3 -mcpu=power7 -fcaller-saves" } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+/* { dg-final { scan-assembler-not "lxvw4x" } } */
+/* { dg-final { scan-assembler-not "lvx" } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvw4x" } } */
+/* { dg-final { scan-assembler-not "stvx" } } */
+
+/* Insure caller save on long double does not use VSX instructions.  */
+
+extern long double modify (long double);
+
+void
+sum (long double *ptr, long double value, unsigned long n)
+{
+  unsigned long i;
+
+  for (i = 0; i < n; i++)
+    ptr[i] += modify (value);
+}

Reply via email to