[PATCH, rs6000] Update Power7 scheduling

2011-10-27 Thread Pat Haugen
The following patch fixes some issues with the Power7 scheduling description. 
The patch is neutral on cpu2006 (was actually hoping to see some improvements, 
but it's still the right thing to do since it more accurately describes the 
hardware).


Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk?

-Pat


2011-10-27  Pat Haugen 

* config/rs6000/rs6000.md (define_attr "type"): Add vecdouble.
* config/rs6000/vsx.md (VStype_simple, VStype_mul): Use vecdouble
type for V2DF.
(VStype_div): Use vector types for V2DF/V4SF.
(VStype_sqrt): Use *sqrt types.
(VS_spdp_type): Change type to vecdouble.
(*vsx_fmav2df4, *vsx_nfmsv2df4, vsx_xvcvdpsxws, vsx_xvcvdpuxws,
vsx_xvcvuxdsp, vsx_xvcvsxwdp, vsx_xvcvuxwdp, vsx_xvcvspsxds,
vsx_xvcvspuxds): Likewise.
(*vsx_fms4): Set type via .
(*vsx_eq__p, *vsx_gt__p, *vsx_ge__p): Set type via
.
* config/rs6000/power7.md (power7-vecstore): Correct VSU pipe.
(power7-fpcompare, power7-sdiv, power7-ddiv, power7-sqrt,
power7-dsqrt): Correct insn latency.
(power7-vecsimple): Add veccmp type and correct dispatch/VSU values.
(power7-veccmp): Delete.
(power7-vecfloat): Correct latency/dispatch/VSU values.
(define_bypass "power7-vecfloat"): Correct latency and types.
(power7-veccomplex, power7-vecperm): Correct dispatch/VSU values.
(power7-vecdouble, power7-vecfdiv, power7-vecdiv): New.

Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md	(revision 180100)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -144,7 +144,7 @@ (define_c_enum "unspecv"
 
 ;; Define an insn type attribute.  This is used in function unit delay
 ;; computations.
-(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel"
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel"
   (const_string "integer"))
 
 ;; Define floating point instruction sub-types for use with Xfpu.md
Index: gcc/config/rs6000/vsx.md
===
--- gcc/config/rs6000/vsx.md	(revision 180100)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -120,7 +120,7 @@ (define_mode_attr VSv	[(V16QI "v")
 			 (DF"s")])
 
 ;; Appropriate type for add ops (and other simple FP ops)
-(define_mode_attr VStype_simple	[(V2DF "vecfloat")
+(define_mode_attr VStype_simple	[(V2DF "vecdouble")
  (V4SF "vecfloat")
  (DF   "fp")])
 
@@ -129,7 +129,7 @@ (define_mode_attr VSfptype_simple [(V2DF
    (DF   "fp_addsub_d")])
 
 ;; Appropriate type for multiply ops
-(define_mode_attr VStype_mul	[(V2DF "vecfloat")
+(define_mode_attr VStype_mul	[(V2DF "vecdouble")
  (V4SF "vecfloat")
  (DF   "dmul")])
 
@@ -137,10 +137,9 @@ (define_mode_attr VSfptype_mul	[(V2DF "f
  (V4SF "fp_mul_s")
  (DF   "fp_mul_d")])
 
-;; Appropriate type for divide ops.  For now, just lump the vector divide with
-;; the scalar divides
-(define_mode_attr VStype_div	[(V2DF "ddiv")
- (V4SF "sdiv")
+;; Appropriate type for divide ops.
+(define_mode_attr VStype_div	[(V2DF "vecdiv")
+ (V4SF "vecfdiv")
  (DF   "ddiv")])
 
 (define_mode_attr VSfptype_div	[(V2DF "fp_div_d")
@@ -150,8 +149,8 @@ (define_mode_attr VSfptype_div	[(V2DF "f
 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 ;; the scalar sqrt
 (define_mode_attr VStype_sqrt	[(V2DF "dsqrt")
- (V4SF "sdiv")
- (DF   "ddiv")])
+ (V4SF "ssqrt")
+	

Re: [v3] Library bits of c++/49813

2011-07-28 Thread Pat Haugen

On 07/28/2011 04:43 AM, Paolo Carlini wrote:

/usr/local/gcc/gcc-20110728/Build/ia64-suse-linux/libstdc++-v3/include/cmath:
In function 'constexpr float std::fma(float, float, float)':
/usr/local/gcc/gcc-20110728/Build/ia64-suse-linux/libstdc++-v3/include/cmath:1288:43:
sorry, unimplemented: unexpected ast of kind fma_expr
/usr/local/gcc/gcc-20110728/Build/ia64-suse-linux/libstdc++-v3/include/cmath:1288:43:
internal compiler error: in potential_constant_expression_1, at
cp/semantics.c:8094

in the past we encountered already a few small problems of this kind, with cases
missing from the potential_constant_expression_1 switch. I believe something
quite close to what I'm attaching below should be enough, can you give it a try?

In any case, we definitely want Jason to have a look as soon as possible. If you
want to restore the ia64 bootstrap in the meanwhile, feel free to comment out
any troublesome constexpr specifier in that file (or replacing it with inline).

Thanks!
Paolo.

//



p


Index: semantics.c
===
--- semantics.c (revision 176846)
+++ semantics.c (working copy)
@@ -8057,6 +8057,13 @@ potential_constant_expression_1 (tree t, bool want
  return false;
return true;

+case FMA_EXPR:
+  for (i = 0; i<  3; ++i)
+   if (!potential_constant_expression_1 (TREE_OPERAND (t, i),
+ true, flags))
+ return false;
+  return true;
+
  case COND_EXPR:
  case VEC_COND_EXPR:
/* If the condition is a known constant, we know which of the legs we


I am seeing the same error on PowerPC and the above patch fixes it.

-Pat


Re: [PATCH, rs6000] Fix REG_CLASS_CONTENTS

2011-06-03 Thread Pat Haugen

On 05/31/2011 02:18 PM, David Edelsohn wrote:

On Tue, May 31, 2011 at 12:08 PM, Pat Haugen
  wrote:

>  The following patch fixes an issue I noticed where vr0..vr2 were
>  inadvertently included in NON_FLOAT_REGS.
>
>  Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk?
>
>  -Pat
>
>
>  2011-05-31  Pat Haugen
>
>  * config/rs6000/rs6000.h (REG_CLASS_CONTENTS): Remove vr0..vr2 from
>  NON_FLOAT_REGS.

Okay.



I should have asked before, ok for 4.6 also after bootstrap/regtest?

-Pat



[PATCH, rs6000] Fix REG_CLASS_CONTENTS

2011-05-31 Thread Pat Haugen
The following patch fixes an issue I noticed where vr0..vr2 were inadvertently 
included in NON_FLOAT_REGS.


Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk?

-Pat


2011-05-31  Pat Haugen 

* config/rs6000/rs6000.h (REG_CLASS_CONTENTS): Remove vr0..vr2 from
NON_FLOAT_REGS.



Index: gcc/config/rs6000/rs6000.h
===
--- gcc/config/rs6000/rs6000.h  (revision 174304)
+++ gcc/config/rs6000/rs6000.h  (working copy)
@@ -1224,7 +1224,7 @@ enum reg_class
   { 0x, 0x, 0x000f, 0x00022000 }, /* SPEC_OR_GEN_REGS */ \
   { 0x, 0x, 0x0010, 0x }, /* CR0_REGS */\
   { 0x, 0x, 0x0ff0, 0x }, /* CR_REGS */ \
-  { 0x, 0x, 0xefff, 0x0002 }, /* NON_FLOAT_REGS */   \
+  { 0x, 0x, 0x0fff, 0x0002 }, /* NON_FLOAT_REGS */   \
   { 0x, 0x, 0x1000, 0x }, /* CA_REGS */ \
   { 0x, 0x, 0x, 0x0003 }  /* ALL_REGS */\
 }



[PATCH, rs6000] Tidy up dumping of register/memory move cost

2011-05-25 Thread Pat Haugen
The following fixes a problem when dumping register costs, where the incorrect 
'from' value was being written out because the code modified the incoming 
parameter value. It also changes things so that register/memory costs are only 
dumped on the outermost call, eliminating intermediate output when a cost 
calculation requires going through memory or GPRs.


Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk?

-Pat


2011-05-25  Pat Haugen 

* config/rs6000/rs6000.c (rs6000_register_move_cost): Preserve from
parameter value for dump. Dump cost on outermost call only.
(rs6000_memory_move_cost): Dump cost on outermost call only.

Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c	(revision 174138)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -189,6 +189,8 @@ enum reg_class rs6000_regno_regclass[FIR
 /* Reload functions based on the type and the vector unit.  */
 static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2];
 
+static int dbg_cost_ctrl;
+
 /* Built in types.  */
 tree rs6000_builtin_types[RS6000_BTI_MAX];
 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
@@ -26428,26 +26430,31 @@ rs6000_register_move_cost (enum machine_
 {
   int ret;
 
+  if (TARGET_DEBUG_COST)
+dbg_cost_ctrl++;
+
   /*  Moves from/to GENERAL_REGS.  */
   if (reg_classes_intersect_p (to, GENERAL_REGS)
   || reg_classes_intersect_p (from, GENERAL_REGS))
 {
+  reg_class_t rclass = from;
+
   if (! reg_classes_intersect_p (to, GENERAL_REGS))
-	from = to;
+	rclass = to;
 
-  if (from == FLOAT_REGS || from == ALTIVEC_REGS || from == VSX_REGS)
-	ret = (rs6000_memory_move_cost (mode, from, false)
+  if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
+	ret = (rs6000_memory_move_cost (mode, rclass, false)
 	   + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
 
   /* It's more expensive to move CR_REGS than CR0_REGS because of the
 	 shift.  */
-  else if (from == CR_REGS)
+  else if (rclass == CR_REGS)
 	ret = 4;
 
   /* Power6 has slower LR/CTR moves so make them more expensive than
 	 memory in order to bias spills to memory .*/
   else if (rs6000_cpu == PROCESSOR_POWER6
-	   && reg_classes_intersect_p (from, LINK_OR_CTR_REGS))
+	   && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
 ret = 6 * hard_regno_nregs[0][mode];
 
   else
@@ -26471,10 +26478,14 @@ rs6000_register_move_cost (enum machine_
 	   + rs6000_register_move_cost (mode, from, GENERAL_REGS));
 
   if (TARGET_DEBUG_COST)
-fprintf (stderr,
-	 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
-	 ret, GET_MODE_NAME (mode), reg_class_names[from],
-	 reg_class_names[to]);
+{
+  if (dbg_cost_ctrl == 1)
+	fprintf (stderr,
+		 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
+		 ret, GET_MODE_NAME (mode), reg_class_names[from],
+		 reg_class_names[to]);
+  dbg_cost_ctrl--;
+}
 
   return ret;
 }
@@ -26488,6 +26499,9 @@ rs6000_memory_move_cost (enum machine_mo
 {
   int ret;
 
+  if (TARGET_DEBUG_COST)
+dbg_cost_ctrl++;
+
   if (reg_classes_intersect_p (rclass, GENERAL_REGS))
 ret = 4 * hard_regno_nregs[0][mode];
   else if (reg_classes_intersect_p (rclass, FLOAT_REGS))
@@ -26498,9 +26512,13 @@ rs6000_memory_move_cost (enum machine_mo
 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
 
   if (TARGET_DEBUG_COST)
-fprintf (stderr,
-	 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
-	 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
+{
+  if (dbg_cost_ctrl == 1)
+	fprintf (stderr,
+		 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
+		 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
+  dbg_cost_ctrl--;
+}
 
   return ret;
 }


Re: [PATCH, rs6000 committed] Fix PowerPC bootstrap

2011-04-15 Thread Pat Haugen

On 04/12/2011 08:22 PM, Alan Modra wrote:

On Tue, Apr 12, 2011 at 04:00:45PM -0500, Pat Haugen wrote:

>  --- gcc/config/rs6000/rs6000.c  (revision 172327)
>  +++ gcc/config/rs6000/rs6000.c  (working copy)
>  @@ -7976,7 +7976,7 @@ call_ABI_of_interest (tree fndecl)
>   return true;
>
>  /* Interesting functions that we are emitting in this object file.  
*/
>  -  c_node = cgraph_node (fndecl);
>  +  c_node = cgraph_get_create_node (fndecl);
>  return !cgraph_only_called_directly_p (c_node);
>}
>  return false;

I think we should use cgraph_get_node here.


OK, fixed with following.


2011-04-15  Pat Haugen 

* config/rs6000/rs6000.c (call_ABI_of_interest): Call
cgraph_get_node instead of cgraph_get_create_node.


Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 172498)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -7976,7 +7976,7 @@ call_ABI_of_interest (tree fndecl)
return true;

   /* Interesting functions that we are emitting in this object file.  */
-  c_node = cgraph_get_create_node (fndecl);
+  c_node = cgraph_get_node (fndecl);
   return !cgraph_only_called_directly_p (c_node);
 }
   return false;


[PATCH, rs6000 committed] Fix PowerPC bootstrap

2011-04-12 Thread Pat Haugen
Discussed the following with Martin on irc to bring rs6000 target up to date 
with his changes to the cgraph code.  Bootstrap/regtest on powerpc64-linux. 
Committed as obvious.


-Pat


2011-04-12  Pat Haugen 

* config/rs6000/rs6000.c (call_ABI_of_interest): Call
cgraph_get_create_node instead of cgraph_node.


Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 172327)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -7976,7 +7976,7 @@ call_ABI_of_interest (tree fndecl)
return true;

   /* Interesting functions that we are emitting in this object file.  */
-  c_node = cgraph_node (fndecl);
+  c_node = cgraph_get_create_node (fndecl);
   return !cgraph_only_called_directly_p (c_node);
 }
   return false;


[PATCH, rs6000] Make LR/CTR moves expensive for Power7 also

2011-04-11 Thread Pat Haugen

The following was overlooked on initial Power7 support.

Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk and 
4.6?

-Pat


2011-04-11  Pat Haugen 

* config/rs6000/rs6000.c (rs6000_register_move_cost): Make LR/CTR
moves expensive on Power7 also.



Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 172255)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -26701,9 +26701,10 @@ rs6000_register_move_cost (enum machine_
   else if (from == CR_REGS)
ret = 4;

-  /* Power6 has slower LR/CTR moves so make them more expensive than
-memory in order to bias spills to memory .*/
-  else if (rs6000_cpu == PROCESSOR_POWER6
+  /* For those processors that have slow LR/CTR moves, make them more
+expensive than memory in order to bias spills to memory .*/
+  else if ((rs6000_cpu == PROCESSOR_POWER6
+   || rs6000_cpu == PROCESSOR_POWER7)
   && reg_classes_intersect_p (from, LINK_OR_CTR_REGS))
 ret = 6 * hard_regno_nregs[0][mode];


[PATCH][4.5] Fix backport of PR 47862 to 4.5 branch

2011-03-15 Thread Pat Haugen
The following changes are already present on trunk as part of the fix for 
PR44364. Not having them on 4.5 caused problems with the subject fix when trying 
to spill FP regs with a stack > 32K.


Bootstrap/regtest 4.5 branch on powerpc with no new failures, ok for 4.5?

-Pat


2011-03-15  Pat Haugen 

PR target/47862
* caller-save.c (insert_restore, insert_save): Use non-validate
form of adjust_address.


Index: gcc/caller-save.c
===
--- gcc/caller-save.c   (revision 170999)
+++ gcc/caller-save.c   (working copy)
@@ -1215,7 +1215,7 @@ insert_restore (struct insn_chain *chain
   /* Check that insn to restore REGNO in save_mode[regno] is
 correct.  */
   && reg_save_code (regno, save_mode[regno]) >= 0)
-mem = adjust_address (mem, save_mode[regno], 0);
+mem = adjust_address_nv (mem, save_mode[regno], 0);
   else
 mem = copy_rtx (mem);

@@ -1296,7 +1296,7 @@ insert_save (struct insn_chain *chain, i
   /* Check that insn to save REGNO in save_mode[regno] is
 correct.  */
   && reg_save_code (regno, save_mode[regno]) >= 0)
-mem = adjust_address (mem, save_mode[regno], 0);
+mem = adjust_address_nv (mem, save_mode[regno], 0);
   else
 mem = copy_rtx (mem);



[PATCH] PR 47862: Fix caller-save vector spill on PowerPC

2011-03-04 Thread Pat Haugen
Fix subject PR by defining HARD_REGNO_CALLER_SAVE_MODE to return V2DFmode for 
vectors in FP regs.


Bootstrap/regtest on powerpc with no new failures. Ok for trunk and 4.5 (after 
successful 4.5 bootstrap/regtest)?


-Pat


2011-03-04  Pat Haugen 

* config/rs6000/rs6000.h (HARD_REGNO_CALLER_SAVE_MODE): Define.
* config/rs6000/e500.h (HARD_REGNO_CALLER_SAVE_MODE): Undefine
 before definition.

testsuite/ChangeLog
* testsuite/gcc.target/powerpc/pr47862.c: New.

Index: gcc/config/rs6000/rs6000.h
===
--- gcc/config/rs6000/rs6000.h	(revision 170651)
+++ gcc/config/rs6000/rs6000.h	(working copy)
@@ -1005,6 +1005,16 @@
 
 #define HARD_REGNO_NREGS(REGNO, MODE) rs6000_hard_regno_nregs[(MODE)][(REGNO)]
 
+/* When setting up caller-save slots (MODE == VOIDmode) ensure we allocate
+   enough space to account for vectors in FP regs. */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE)	\
+  (TARGET_VSX		\
+   && ((MODE) == VOIDmode || VSX_VECTOR_MODE (MODE)	\
+   || ALTIVEC_VECTOR_MODE (MODE))			\
+   && FP_REGNO_P (REGNO)\
+   ? V2DFmode		\
+   : choose_hard_reg_mode ((REGNO), (NREGS), false))
+
 #define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)			\
   (((TARGET_32BIT && TARGET_POWERPC64	\
  && (GET_MODE_SIZE (MODE) > 4)	\
Index: gcc/config/rs6000/e500.h
===
--- gcc/config/rs6000/e500.h	(revision 170651)
+++ gcc/config/rs6000/e500.h	(working copy)
@@ -47,6 +47,8 @@
   }	\
   } while (0)
 
+/* Override rs6000.h definition.  */
+#undef HARD_REGNO_CALLER_SAVE_MODE
 /* When setting up caller-save slots (MODE == VOIDmode) ensure we
allocate space for DFmode.  Save gprs in the correct mode too.  */
 #define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
Index: gcc/testsuite/gcc.target/powerpc/pr47862.c
===
--- gcc/testsuite/gcc.target/powerpc/pr47862.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr47862.c	(revision 0)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+/* { dg-final { scan-assembler-not "stfd" } } */
+
+/* PR 47862: Verify caller-save spill of vectors in FP regs do not use
+   legacy FP insns, which spill only half the vector.  */
+extern vector double dd[15];
+
+vector double foo() {
+  vector double a,b,c,d,e,f,g,h,i,j,k,l,m,n;
+
+  a=dd[1]; b=dd[2]; c=dd[3]; d=dd[4]; e=dd[5]; f=dd[6]; g=dd[7]; h=dd[8]; i=dd[9];
+  j=dd[10]; k=dd[11]; l=dd[12]; m=dd[13]; n=dd[14];
+  bar();
+  return (a+b+c+d+e+f+g+h+i+j+k+l+m+n);
+}
+


<    1   2   3