[PATCH, rs6000] Update Power7 scheduling
The following patch fixes some issues with the Power7 scheduling description. The patch is neutral on cpu2006 (was actually hoping to see some improvements, but it's still the right thing to do since it more accurately describes the hardware). Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk? -Pat 2011-10-27 Pat Haugen * config/rs6000/rs6000.md (define_attr "type"): Add vecdouble. * config/rs6000/vsx.md (VStype_simple, VStype_mul): Use vecdouble type for V2DF. (VStype_div): Use vector types for V2DF/V4SF. (VStype_sqrt): Use *sqrt types. (VS_spdp_type): Change type to vecdouble. (*vsx_fmav2df4, *vsx_nfmsv2df4, vsx_xvcvdpsxws, vsx_xvcvdpuxws, vsx_xvcvuxdsp, vsx_xvcvsxwdp, vsx_xvcvuxwdp, vsx_xvcvspsxds, vsx_xvcvspuxds): Likewise. (*vsx_fms4): Set type via . (*vsx_eq__p, *vsx_gt__p, *vsx_ge__p): Set type via . * config/rs6000/power7.md (power7-vecstore): Correct VSU pipe. (power7-fpcompare, power7-sdiv, power7-ddiv, power7-sqrt, power7-dsqrt): Correct insn latency. (power7-vecsimple): Add veccmp type and correct dispatch/VSU values. (power7-veccmp): Delete. (power7-vecfloat): Correct latency/dispatch/VSU values. (define_bypass "power7-vecfloat"): Correct latency and types. (power7-veccomplex, power7-vecperm): Correct dispatch/VSU values. (power7-vecdouble, power7-vecfdiv, power7-vecdiv): New. Index: gcc/config/rs6000/rs6000.md === --- gcc/config/rs6000/rs6000.md (revision 180100) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -144,7 +144,7 @@ (define_c_enum "unspecv" ;; Define an insn type attribute. This is used in function unit delay ;; computations. -(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel" +(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel" (const_string "integer")) ;; Define floating point instruction sub-types for use with Xfpu.md Index: gcc/config/rs6000/vsx.md === --- gcc/config/rs6000/vsx.md (revision 180100) +++ gcc/config/rs6000/vsx.md (working copy) @@ -120,7 +120,7 @@ (define_mode_attr VSv [(V16QI "v") (DF"s")]) ;; Appropriate type for add ops (and other simple FP ops) -(define_mode_attr VStype_simple [(V2DF "vecfloat") +(define_mode_attr VStype_simple [(V2DF "vecdouble") (V4SF "vecfloat") (DF "fp")]) @@ -129,7 +129,7 @@ (define_mode_attr VSfptype_simple [(V2DF (DF "fp_addsub_d")]) ;; Appropriate type for multiply ops -(define_mode_attr VStype_mul [(V2DF "vecfloat") +(define_mode_attr VStype_mul [(V2DF "vecdouble") (V4SF "vecfloat") (DF "dmul")]) @@ -137,10 +137,9 @@ (define_mode_attr VSfptype_mul [(V2DF "f (V4SF "fp_mul_s") (DF "fp_mul_d")]) -;; Appropriate type for divide ops. For now, just lump the vector divide with -;; the scalar divides -(define_mode_attr VStype_div [(V2DF "ddiv") - (V4SF "sdiv") +;; Appropriate type for divide ops. +(define_mode_attr VStype_div [(V2DF "vecdiv") + (V4SF "vecfdiv") (DF "ddiv")]) (define_mode_attr VSfptype_div [(V2DF "fp_div_d") @@ -150,8 +149,8 @@ (define_mode_attr VSfptype_div [(V2DF "f ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with ;; the scalar sqrt (define_mode_attr VStype_sqrt [(V2DF "dsqrt") - (V4SF "sdiv") - (DF "ddiv")]) + (V4SF "ssqrt") +
Re: [v3] Library bits of c++/49813
On 07/28/2011 04:43 AM, Paolo Carlini wrote: /usr/local/gcc/gcc-20110728/Build/ia64-suse-linux/libstdc++-v3/include/cmath: In function 'constexpr float std::fma(float, float, float)': /usr/local/gcc/gcc-20110728/Build/ia64-suse-linux/libstdc++-v3/include/cmath:1288:43: sorry, unimplemented: unexpected ast of kind fma_expr /usr/local/gcc/gcc-20110728/Build/ia64-suse-linux/libstdc++-v3/include/cmath:1288:43: internal compiler error: in potential_constant_expression_1, at cp/semantics.c:8094 in the past we encountered already a few small problems of this kind, with cases missing from the potential_constant_expression_1 switch. I believe something quite close to what I'm attaching below should be enough, can you give it a try? In any case, we definitely want Jason to have a look as soon as possible. If you want to restore the ia64 bootstrap in the meanwhile, feel free to comment out any troublesome constexpr specifier in that file (or replacing it with inline). Thanks! Paolo. // p Index: semantics.c === --- semantics.c (revision 176846) +++ semantics.c (working copy) @@ -8057,6 +8057,13 @@ potential_constant_expression_1 (tree t, bool want return false; return true; +case FMA_EXPR: + for (i = 0; i< 3; ++i) + if (!potential_constant_expression_1 (TREE_OPERAND (t, i), + true, flags)) + return false; + return true; + case COND_EXPR: case VEC_COND_EXPR: /* If the condition is a known constant, we know which of the legs we I am seeing the same error on PowerPC and the above patch fixes it. -Pat
Re: [PATCH, rs6000] Fix REG_CLASS_CONTENTS
On 05/31/2011 02:18 PM, David Edelsohn wrote: On Tue, May 31, 2011 at 12:08 PM, Pat Haugen wrote: > The following patch fixes an issue I noticed where vr0..vr2 were > inadvertently included in NON_FLOAT_REGS. > > Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk? > > -Pat > > > 2011-05-31 Pat Haugen > > * config/rs6000/rs6000.h (REG_CLASS_CONTENTS): Remove vr0..vr2 from > NON_FLOAT_REGS. Okay. I should have asked before, ok for 4.6 also after bootstrap/regtest? -Pat
[PATCH, rs6000] Fix REG_CLASS_CONTENTS
The following patch fixes an issue I noticed where vr0..vr2 were inadvertently included in NON_FLOAT_REGS. Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk? -Pat 2011-05-31 Pat Haugen * config/rs6000/rs6000.h (REG_CLASS_CONTENTS): Remove vr0..vr2 from NON_FLOAT_REGS. Index: gcc/config/rs6000/rs6000.h === --- gcc/config/rs6000/rs6000.h (revision 174304) +++ gcc/config/rs6000/rs6000.h (working copy) @@ -1224,7 +1224,7 @@ enum reg_class { 0x, 0x, 0x000f, 0x00022000 }, /* SPEC_OR_GEN_REGS */ \ { 0x, 0x, 0x0010, 0x }, /* CR0_REGS */\ { 0x, 0x, 0x0ff0, 0x }, /* CR_REGS */ \ - { 0x, 0x, 0xefff, 0x0002 }, /* NON_FLOAT_REGS */ \ + { 0x, 0x, 0x0fff, 0x0002 }, /* NON_FLOAT_REGS */ \ { 0x, 0x, 0x1000, 0x }, /* CA_REGS */ \ { 0x, 0x, 0x, 0x0003 } /* ALL_REGS */\ }
[PATCH, rs6000] Tidy up dumping of register/memory move cost
The following fixes a problem when dumping register costs, where the incorrect 'from' value was being written out because the code modified the incoming parameter value. It also changes things so that register/memory costs are only dumped on the outermost call, eliminating intermediate output when a cost calculation requires going through memory or GPRs. Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk? -Pat 2011-05-25 Pat Haugen * config/rs6000/rs6000.c (rs6000_register_move_cost): Preserve from parameter value for dump. Dump cost on outermost call only. (rs6000_memory_move_cost): Dump cost on outermost call only. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 174138) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -189,6 +189,8 @@ enum reg_class rs6000_regno_regclass[FIR /* Reload functions based on the type and the vector unit. */ static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2]; +static int dbg_cost_ctrl; + /* Built in types. */ tree rs6000_builtin_types[RS6000_BTI_MAX]; tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT]; @@ -26428,26 +26430,31 @@ rs6000_register_move_cost (enum machine_ { int ret; + if (TARGET_DEBUG_COST) +dbg_cost_ctrl++; + /* Moves from/to GENERAL_REGS. */ if (reg_classes_intersect_p (to, GENERAL_REGS) || reg_classes_intersect_p (from, GENERAL_REGS)) { + reg_class_t rclass = from; + if (! reg_classes_intersect_p (to, GENERAL_REGS)) - from = to; + rclass = to; - if (from == FLOAT_REGS || from == ALTIVEC_REGS || from == VSX_REGS) - ret = (rs6000_memory_move_cost (mode, from, false) + if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS) + ret = (rs6000_memory_move_cost (mode, rclass, false) + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); /* It's more expensive to move CR_REGS than CR0_REGS because of the shift. */ - else if (from == CR_REGS) + else if (rclass == CR_REGS) ret = 4; /* Power6 has slower LR/CTR moves so make them more expensive than memory in order to bias spills to memory .*/ else if (rs6000_cpu == PROCESSOR_POWER6 - && reg_classes_intersect_p (from, LINK_OR_CTR_REGS)) + && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS)) ret = 6 * hard_regno_nregs[0][mode]; else @@ -26471,10 +26478,14 @@ rs6000_register_move_cost (enum machine_ + rs6000_register_move_cost (mode, from, GENERAL_REGS)); if (TARGET_DEBUG_COST) -fprintf (stderr, - "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n", - ret, GET_MODE_NAME (mode), reg_class_names[from], - reg_class_names[to]); +{ + if (dbg_cost_ctrl == 1) + fprintf (stderr, + "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n", + ret, GET_MODE_NAME (mode), reg_class_names[from], + reg_class_names[to]); + dbg_cost_ctrl--; +} return ret; } @@ -26488,6 +26499,9 @@ rs6000_memory_move_cost (enum machine_mo { int ret; + if (TARGET_DEBUG_COST) +dbg_cost_ctrl++; + if (reg_classes_intersect_p (rclass, GENERAL_REGS)) ret = 4 * hard_regno_nregs[0][mode]; else if (reg_classes_intersect_p (rclass, FLOAT_REGS)) @@ -26498,9 +26512,13 @@ rs6000_memory_move_cost (enum machine_mo ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS); if (TARGET_DEBUG_COST) -fprintf (stderr, - "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n", - ret, GET_MODE_NAME (mode), reg_class_names[rclass], in); +{ + if (dbg_cost_ctrl == 1) + fprintf (stderr, + "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n", + ret, GET_MODE_NAME (mode), reg_class_names[rclass], in); + dbg_cost_ctrl--; +} return ret; }
Re: [PATCH, rs6000 committed] Fix PowerPC bootstrap
On 04/12/2011 08:22 PM, Alan Modra wrote: On Tue, Apr 12, 2011 at 04:00:45PM -0500, Pat Haugen wrote: > --- gcc/config/rs6000/rs6000.c (revision 172327) > +++ gcc/config/rs6000/rs6000.c (working copy) > @@ -7976,7 +7976,7 @@ call_ABI_of_interest (tree fndecl) > return true; > > /* Interesting functions that we are emitting in this object file. */ > - c_node = cgraph_node (fndecl); > + c_node = cgraph_get_create_node (fndecl); > return !cgraph_only_called_directly_p (c_node); >} > return false; I think we should use cgraph_get_node here. OK, fixed with following. 2011-04-15 Pat Haugen * config/rs6000/rs6000.c (call_ABI_of_interest): Call cgraph_get_node instead of cgraph_get_create_node. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 172498) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -7976,7 +7976,7 @@ call_ABI_of_interest (tree fndecl) return true; /* Interesting functions that we are emitting in this object file. */ - c_node = cgraph_get_create_node (fndecl); + c_node = cgraph_get_node (fndecl); return !cgraph_only_called_directly_p (c_node); } return false;
[PATCH, rs6000 committed] Fix PowerPC bootstrap
Discussed the following with Martin on irc to bring rs6000 target up to date with his changes to the cgraph code. Bootstrap/regtest on powerpc64-linux. Committed as obvious. -Pat 2011-04-12 Pat Haugen * config/rs6000/rs6000.c (call_ABI_of_interest): Call cgraph_get_create_node instead of cgraph_node. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 172327) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -7976,7 +7976,7 @@ call_ABI_of_interest (tree fndecl) return true; /* Interesting functions that we are emitting in this object file. */ - c_node = cgraph_node (fndecl); + c_node = cgraph_get_create_node (fndecl); return !cgraph_only_called_directly_p (c_node); } return false;
[PATCH, rs6000] Make LR/CTR moves expensive for Power7 also
The following was overlooked on initial Power7 support. Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk and 4.6? -Pat 2011-04-11 Pat Haugen * config/rs6000/rs6000.c (rs6000_register_move_cost): Make LR/CTR moves expensive on Power7 also. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 172255) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -26701,9 +26701,10 @@ rs6000_register_move_cost (enum machine_ else if (from == CR_REGS) ret = 4; - /* Power6 has slower LR/CTR moves so make them more expensive than -memory in order to bias spills to memory .*/ - else if (rs6000_cpu == PROCESSOR_POWER6 + /* For those processors that have slow LR/CTR moves, make them more +expensive than memory in order to bias spills to memory .*/ + else if ((rs6000_cpu == PROCESSOR_POWER6 + || rs6000_cpu == PROCESSOR_POWER7) && reg_classes_intersect_p (from, LINK_OR_CTR_REGS)) ret = 6 * hard_regno_nregs[0][mode];
[PATCH][4.5] Fix backport of PR 47862 to 4.5 branch
The following changes are already present on trunk as part of the fix for PR44364. Not having them on 4.5 caused problems with the subject fix when trying to spill FP regs with a stack > 32K. Bootstrap/regtest 4.5 branch on powerpc with no new failures, ok for 4.5? -Pat 2011-03-15 Pat Haugen PR target/47862 * caller-save.c (insert_restore, insert_save): Use non-validate form of adjust_address. Index: gcc/caller-save.c === --- gcc/caller-save.c (revision 170999) +++ gcc/caller-save.c (working copy) @@ -1215,7 +1215,7 @@ insert_restore (struct insn_chain *chain /* Check that insn to restore REGNO in save_mode[regno] is correct. */ && reg_save_code (regno, save_mode[regno]) >= 0) -mem = adjust_address (mem, save_mode[regno], 0); +mem = adjust_address_nv (mem, save_mode[regno], 0); else mem = copy_rtx (mem); @@ -1296,7 +1296,7 @@ insert_save (struct insn_chain *chain, i /* Check that insn to save REGNO in save_mode[regno] is correct. */ && reg_save_code (regno, save_mode[regno]) >= 0) -mem = adjust_address (mem, save_mode[regno], 0); +mem = adjust_address_nv (mem, save_mode[regno], 0); else mem = copy_rtx (mem);
[PATCH] PR 47862: Fix caller-save vector spill on PowerPC
Fix subject PR by defining HARD_REGNO_CALLER_SAVE_MODE to return V2DFmode for vectors in FP regs. Bootstrap/regtest on powerpc with no new failures. Ok for trunk and 4.5 (after successful 4.5 bootstrap/regtest)? -Pat 2011-03-04 Pat Haugen * config/rs6000/rs6000.h (HARD_REGNO_CALLER_SAVE_MODE): Define. * config/rs6000/e500.h (HARD_REGNO_CALLER_SAVE_MODE): Undefine before definition. testsuite/ChangeLog * testsuite/gcc.target/powerpc/pr47862.c: New. Index: gcc/config/rs6000/rs6000.h === --- gcc/config/rs6000/rs6000.h (revision 170651) +++ gcc/config/rs6000/rs6000.h (working copy) @@ -1005,6 +1005,16 @@ #define HARD_REGNO_NREGS(REGNO, MODE) rs6000_hard_regno_nregs[(MODE)][(REGNO)] +/* When setting up caller-save slots (MODE == VOIDmode) ensure we allocate + enough space to account for vectors in FP regs. */ +#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ + (TARGET_VSX \ + && ((MODE) == VOIDmode || VSX_VECTOR_MODE (MODE) \ + || ALTIVEC_VECTOR_MODE (MODE)) \ + && FP_REGNO_P (REGNO)\ + ? V2DFmode \ + : choose_hard_reg_mode ((REGNO), (NREGS), false)) + #define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \ (((TARGET_32BIT && TARGET_POWERPC64 \ && (GET_MODE_SIZE (MODE) > 4) \ Index: gcc/config/rs6000/e500.h === --- gcc/config/rs6000/e500.h (revision 170651) +++ gcc/config/rs6000/e500.h (working copy) @@ -47,6 +47,8 @@ } \ } while (0) +/* Override rs6000.h definition. */ +#undef HARD_REGNO_CALLER_SAVE_MODE /* When setting up caller-save slots (MODE == VOIDmode) ensure we allocate space for DFmode. Save gprs in the correct mode too. */ #define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ Index: gcc/testsuite/gcc.target/powerpc/pr47862.c === --- gcc/testsuite/gcc.target/powerpc/pr47862.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr47862.c (revision 0) @@ -0,0 +1,19 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7" } */ +/* { dg-final { scan-assembler-not "stfd" } } */ + +/* PR 47862: Verify caller-save spill of vectors in FP regs do not use + legacy FP insns, which spill only half the vector. */ +extern vector double dd[15]; + +vector double foo() { + vector double a,b,c,d,e,f,g,h,i,j,k,l,m,n; + + a=dd[1]; b=dd[2]; c=dd[3]; d=dd[4]; e=dd[5]; f=dd[6]; g=dd[7]; h=dd[8]; i=dd[9]; + j=dd[10]; k=dd[11]; l=dd[12]; m=dd[13]; n=dd[14]; + bar(); + return (a+b+c+d+e+f+g+h+i+j+k+l+m+n); +} +