Re: [PATCH,i386] Add -mstack-protector-guard= for i386
Sure. Put back Init(SSP_TLS) and rebase to r197955. If no further feedback, please merge. 2013-04-15 Andrew Hsieh andrewhsieh.google.com * config/i386/i386.opt: New option mstack-protector-guard=. * config/i386/i386-opts.h: Add enum stack_protector_guard. * config/i386/i386.h: Introduce TARGET_SSP_GLOBAL_GUARD and TARGET_SSP_TLS_GUARD. * config/i386/i386.c (ix86_option_override_internal): Default to SSP_TLS unless it's bionic * config/i386/i386.md: define_expand/insn stack_protect_set/test... if TARGET_SSP_TLS_GUARD * doc/invoke.texi (i386 Option): Document. Index: gcc/config/i386/i386.opt === --- gcc/config/i386/i386.opt (revision 197955) +++ gcc/config/i386/i386.opt (working copy) @@ -626,3 +626,17 @@ mrtm Target Report Mask(ISA_RTM) Var(ix86_isa_flags) Save Support RTM built-in functions and code generation + +mstack-protector-guard= +Target RejectNegative Joined Enum(stack_protector_guard) Var(ix86_stack_protector_guard) Init(SSP_TLS) +Use given stack-protector guard + +Enum +Name(stack_protector_guard) Type(enum stack_protector_guard) +Known stack protector guard (for use with the -mstack-protector-guard= option): + +EnumValue +Enum(stack_protector_guard) String(tls) Value(SSP_TLS) + +EnumValue +Enum(stack_protector_guard) String(global) Value(SSP_GLOBAL) Index: gcc/config/i386/i386.md === --- gcc/config/i386/i386.md (revision 197955) +++ gcc/config/i386/i386.md (working copy) @@ -17058,7 +17058,7 @@ (define_expand stack_protect_set [(match_operand 0 memory_operand) (match_operand 1 memory_operand)] - !TARGET_HAS_BIONIC + TARGET_SSP_TLS_GUARD { rtx (*insn)(rtx, rtx); @@ -17083,7 +17083,7 @@ UNSPEC_SP_SET)) (set (match_scratch:PTR 2 =r) (const_int 0)) (clobber (reg:CC FLAGS_REG))] - !TARGET_HAS_BIONIC + TARGET_SSP_TLS_GUARD mov{imodesuffix}\t{%1, %2|%2, %1}\;mov{imodesuffix}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2 [(set_attr type multi)]) @@ -17101,7 +17101,7 @@ [(match_operand 0 memory_operand) (match_operand 1 memory_operand) (match_operand 2)] - !TARGET_HAS_BIONIC + TARGET_SSP_TLS_GUARD { rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); @@ -17131,7 +17131,7 @@ (match_operand:PTR 2 memory_operand m)] UNSPEC_SP_TEST)) (clobber (match_scratch:PTR 3 =r))] - !TARGET_HAS_BIONIC + TARGET_SSP_TLS_GUARD mov{imodesuffix}\t{%1, %3|%3, %1}\;xor{imodesuffix}\t{%2, %3|%3, %2} [(set_attr type multi)]) Index: gcc/config/i386/i386-opts.h === --- gcc/config/i386/i386-opts.h (revision 197955) +++ gcc/config/i386/i386-opts.h (working copy) @@ -85,4 +85,9 @@ ix86_veclibabi_type_acml }; +enum stack_protector_guard { + SSP_TLS, /* per-thread canary in TLS block */ + SSP_GLOBAL/* global canary */ +}; + #endif Index: gcc/config/i386/i386.c === --- gcc/config/i386/i386.c (revision 197955) +++ gcc/config/i386/i386.c (working copy) @@ -3922,6 +3922,10 @@ if (main_args_p) target_option_default_node = target_option_current_node = build_target_option_node (); + + /* Handle stack protector */ + if (!global_options_set.x_ix86_stack_protector_guard) +ix86_stack_protector_guard = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS; } /* Implement the TARGET_OPTION_OVERRIDE hook. */ Index: gcc/config/i386/i386.h === --- gcc/config/i386/i386.h (revision 197955) +++ gcc/config/i386/i386.h (working copy) @@ -486,6 +486,9 @@ #define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0 #endif +#define TARGET_SSP_GLOBAL_GUARD (ix86_stack_protector_guard == SSP_GLOBAL) +#define TARGET_SSP_TLS_GUARD(ix86_stack_protector_guard == SSP_TLS) + /* Fence to use after loop using storent. */ extern tree x86_mfence; Index: gcc/doc/invoke.texi === --- gcc/doc/invoke.texi (revision 197955) +++ gcc/doc/invoke.texi (working copy) @@ -657,7 +657,8 @@ -mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol -m32 -m64 -mx32 -mlarge-data-threshold=@var{num} @gol -msse2avx -mfentry -m8bit-idiv @gol --mavx256-split-unaligned-load -mavx256-split-unaligned-store} +-mavx256-split-unaligned-load -mavx256-split-unaligned-store @gol +-mstack-protector-guard=@var{guard}} @emph{i386 and x86-64 Windows Options} @gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol @@ -14592,6 +14593,13 @@ @opindex avx256-split-unaligned-store Split 32-byte AVX unaligned load and store. +@item -mstack-protector-guard=@var{guard} +@opindex mstack-protector-guard=@var{guard} +Generate stack protection code using canary at @var{guard}. Supported +locations are @samp{global} or @samp{tls} per thread at %gs:20
Re: [PATCH] Enable java for aarch64
Andrew Haley a...@redhat.com writes: Looks basically OK. What were the failures, though? FAIL: TestClosureGC run FAIL: Array_3 execution - source compiled test FAIL: Array_3 -findirect-dispatch execution - source compiled test FAIL: Array_3 -O3 execution - source compiled test FAIL: Array_3 -O3 -findirect-dispatch execution - source compiled test FAIL: Invoke_1 execution - source compiled test FAIL: Invoke_1 -findirect-dispatch execution - source compiled test FAIL: Invoke_1 -O3 execution - source compiled test FAIL: Invoke_1 -O3 -findirect-dispatch execution - source compiled test FAIL: PR218 execution - source compiled test FAIL: PR218 -O3 execution - source compiled test FAIL: StackTrace2 execution - source compiled test FAIL: StackTrace2 -findirect-dispatch execution - source compiled test FAIL: StackTrace2 -O3 execution - source compiled test FAIL: StackTrace2 -O3 -findirect-dispatch execution - source compiled test FAIL: Throw_2 execution - source compiled test FAIL: Throw_2 -findirect-dispatch execution - source compiled test FAIL: Throw_2 -O3 execution - source compiled test FAIL: Throw_2 -O3 -findirect-dispatch execution - source compiled test FAIL: Throw_3 execution - source compiled test FAIL: Throw_3 -findirect-dispatch execution - source compiled test FAIL: Throw_3 -O3 execution - source compiled test FAIL: Throw_3 -O3 -findirect-dispatch execution - source compiled test FAIL: pr83 -findirect-dispatch execution - source compiled test FAIL: pr83 -O3 -findirect-dispatch execution - source compiled test FAIL: sourcelocation output - source compiled test FAIL: sourcelocation -findirect-dispatch output - source compiled test FAIL: sourcelocation -O3 output - source compiled test FAIL: sourcelocation -O3 -findirect-dispatch output - source compiled test Andreas. -- Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 And now for something completely different.
Re: [PATCH] Enable java for aarch64
Yvan's patch was checked in a few days ago... /Marcus r197770 | clyon | 2013-04-11 13:06:04 +0100 (Thu, 11 Apr 2013) | 12 lines 2013-03-16 Yvan Roux yvan.r...@linaro.org * include/private/gcconfig.h (AARCH64): New macro (defined only if __aarch64__). * include/private/gcconfig.h (mach_type_known): Update comment adding ARM AArch64 target. * include/private/gcconfig.h (NOSYS, mach_type_known,CPP_WORDSZ, MACH_TYPE, ALIGNMENT, HBLKSIZE, OS_TYPE, LINUX_STACKBOTTOM, USE_GENERIC_PUSH_REGS, DYNAMIC_LOADING, DATASTART, DATAEND, STACKBOTTOM): Define for AArch64. On 14 April 2013 12:17, Matthias Klose d...@ubuntu.com wrote: Am 13.04.2013 20:21, schrieb Andreas Schwab: This enables building java for aarch64. Afaics, the aarch64 changes for boehm-gc are not yet checked in. Aren't these needed as a prerequisite?
RE: [PATCH][ARM][thumb1] Reduce lr save for leaf function with non-far jump
-Original Message- From: Ramana Radhakrishnan Sent: Thursday, April 11, 2013 4:40 PM To: Joey Ye Cc: gcc-patches@gcc.gnu.org Subject: Re: [PATCH][ARM][thumb1] Reduce lr save for leaf function with non-far jump On 12/20/12 09:53, Joey Ye wrote: Current GCC thumb1 has an annoying problem that always assuming far branch. So it forces to save lr, even when unnecessarily. The most extreme case complained by partner is: // compiled with -mthumb -mcpu=cortex-m0 -Os. void foo() { for (;;); } = foo: push{lr} // Crazy!!! .L2: b .L2 The reason is that thumb1 far jump is only resolved in the very late pass shorten_branch. Prologue/epilogue pass doesn't actually know a branch is far or not from its attribute. It has to conservatively save/restore lr whenever there is a branch. This patch tries to fix it with a simple heuristic, i.e., using function size to decide if a far jump will likely be used. Function size information is meaningful in prologue/epilogue pass. The heuristic uses following check to decide if lr should be saved for far jump: function_size * 3 = 2048 // yes: save lr for possible far jump. No: don't save lr for far jump The scheme has an issue: if some corner case does break above condition, there is no chance to fix-up but to ICE. But the heuristic condition is very conservative. It is base on the worse normal condition that each instruction is associated with a 4 byte literal ( (2+4)/2=3, blooming size by 3 times ). I can't think of a real case to trigger the ICE. So I think it should work. Other approaches than the heuristic scheme are too expensive to implement for this small size/performance issue. I did explored some but none of them persuaded myself. Tests passed: * build libgcc, libstdc++, newlib, libm * make check-gcc with cpu=cortex-m0 * Small and extreme test cases ChangeLog: 2012-12-20 Joey Ye joey...@arm.com * config/arm/arm.c(thumb1_final_prescan_insn): Assert lr save for real far jump. (thumb_far_jump_used_p): Count instruction size and set far_jump_used. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 327ef22..ad79451 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -21790,6 +21857,11 @@ thumb1_final_prescan_insn (rtx insn) else if (conds != CONDS_NOCOND) cfun-machine-thumb1_cc_insn = NULL_RTX; } + +/* Check if unexpected far jump is used. */ +if (cfun-machine-lr_save_eliminated + get_attr_far_jump (insn) == FAR_JUMP_YES) + internal_error(Unexpected thumb1 far jump); } int @@ -21815,6 +21887,8 @@ static int thumb_far_jump_used_p (void) { rtx insn; + bool far_jump = false; + unsigned int func_size = 0; /* This test is only important for leaf functions. */ /* assert (!leaf_function_p ()); */ @@ -21870,6 +21944,26 @@ thumb_far_jump_used_p (void) get_attr_far_jump (insn) == FAR_JUMP_YES ) { + far_jump = true; + } + func_size += get_attr_length (insn); +} + + /* Attribute far_jump will always be true for thumb1 before shorten_branch + pass. So checking far_jump attribute before shorten_branch isn't much + useful. + + Following heuristic tries to estimate more accurately if a far + jump may + finally be used. The heuristic is very conservative as there is + no chance + to roll-back the decision of not to use far jump. + + Thumb1 long branch offset is -2048 to 2046. The worst case is + each 2-byte + insn is associated with a 4 byte constant pool. Using function size + 2048/3 as the threshold is conservative enough. */ if + (far_jump) +{ + if ((func_size * 3) = 2048) +{ /* Record the fact that we have decided that the function does use far jumps. */ cfun-machine-far_jump_used = 1; Check for 80 character line length in the comments above - I can never tell if it is my mail client or yours. Further shorten the lines. Otherwise ok if no regressions.. Make check targeting Cortex-M0/M3 with qemu. No regression. Committed as 197956 - Joey
Re: [Patch, fortran] PR 56919 SYSTEM_CLOCK on Windows
Janne Blomqvist wrote: Attached is an updated patch which uses GetTickCount for system_clock_4; this should be fine as system_clock_4 wraps around in ~25 days anyways. For system_clock_8 it uses QueryPerformance{Counter,Frequency}. The patch also adds an additional check for _POSIX_MONOTONIC_CLOCK. Ok for trunk? Regarding the documentation, I wonder whether one should do the following additional changes: - Explicitly suggest to use a kind=8 argument für system_clock (for higher resolution and to avoid overflows). - To change the system_clock example to use an integer(8) argument. Possibly, via iso_fortran_env's int64 or via selected_int_kind(18) + #if defined(CLOCK_MONOTONIC) defined(_POSIX_MONOTONIC_CLOCK) I'd add _POSIX_MONOTONIC_CLOCK = 0 as POSIX states: If a symbolic constant is defined with the value -1, the option is not supported. + uint32_t cnt = GetTickCount (); I wonder whether a comment stating that GetTickCount instead of QueryPerformanceCounter is used as the extra precision and 49.7-days overflow do not matter with the 32bit system_clock - and as QueryPerformanceCounter has issues on some (very few) systems. Otherwise, it looks fine to me. Tobias
Re: [PATCH] color diagnostics markers
On Sun, Apr 14, 2013 at 4:53 PM, Marc Glisse marc.gli...@inria.fr wrote: On Wed, 10 Apr 2013, Gabriel Dos Reis wrote: On Wed, Apr 10, 2013 at 1:42 PM, Manuel López-Ibáñez lopeziba...@gmail.com wrote: On 9 April 2013 15:21, Jakub Jelinek ja...@redhat.com wrote: white). The default is still -fdiagnostics-color=never, can be changed later on. Apart from my comments elsewhere (http://gcc.gnu.org/ml/gcc-patches/2013-04/msg00614.html), the patch looks fine to me. But perhaps we should change the default to auto, at least during Stage 1, to find out whether some bug was introduced. If agreed, I could do this in a follow-up patch that also disables colors for the testsuite. Cheers, Manuel. I am still of the opinion that the default should be discussed differently, and I strongly suggest that it defaults to never. I do not believe we do need to do otherwise now. As I stated before, our pursuit of enabling everything new thing by default may have made C++ diagnostics more terrifying. Hello, I would like to suggest that the default be auto when the environment variable GCC_COLORS is defined. It can stay never otherwise (I would prefer auto as well, colors don't make the diagnostics any longer, only more readable, and an empty GCC_COLORS is an easy way to disable them, but I see you have a strong opinion on this so I won't insist). Everybody has got strong opinions, especially those who are eager to label others as having one. Defining a variable in my environment counts as a clear intention. If you invoke GCC on command line with explicit option requesting colors, I don't think there is any doubt that. However, I dispute the intent to be so universally clear for most GCC users who happen to have GCC_COLORS set -- most users inherit whatever their sysadmin or distros set for them. That said, I am fine with the idea to GCC_COLORS = detect. -- Gaby
Re: [PATCH, testsuite]: Avoid error: inlining failed in call to always_inline with -fpic
On Wed, Apr 10, 2013 at 7:13 PM, Uros Bizjak ubiz...@gmail.com wrote: Attached testsuite patch fixes: pr33992.c: In function ‘do_test’: pr33992.c:11:1: error: inlining failed in call to always_inline ‘foo’: function body can be overwritten at link time pr33992.c:28:9: error: called from here foo (r); ^ errors through gcc and g++ testsuite when tested with -fpic. 2013-04-10 Uros Bizjak ubiz...@gmail.com * g++.dg/ipa/devirt-c-7.C: Require nonpic effective target. * gcc.c-torture/execute/pr33992.c (foo): Declare as static void. * gcc.dg/uninit-pred-5_a.c (foo): Ditto. * gcc.dg/uninit-pred-5_b.c (foo): Ditto. OK for mainline and release branches? I would like to proceed with this fairly obvious issue, so If there are no objections, I plan to commit the patch tomorrow. Thanks, Uros.
Unreviewed build, driver patch
The following patch has remained unreviewed for a week: [build] Use -z ignore instead of --as-needed on Solaris http://gcc.gnu.org/ml/gcc-patches/2013-04/msg00425.html It needs build and driver maintainers to review. Thanks. Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [patch] PR middle-end/43631
This didn't really help make things look prettier. The notes are enumerated in many other places, for various purposes. I didn't like special-casing this one INSN_NOTE oddity when there are so many others already. So I created a note_outside_basic_block_p instead, I hope you agree with this approach. Sure, thanks for investigating the proposal in any case. Updated patch attached, similarly tested. OK for trunk? Yes, modulo a few nits: +/* Like add_insn_after, but try to set BLOCK_FOR_INSN. + If BB is NULL, an attempt is made to infer the bb from before. + + This and the next function should be the only functions called + to insert an insn once delay slots have been filled since only + they know how to update a SEQUENCE. */ + +void +add_insn_after (rtx insn, rtx after, basic_block bb) Like add_insn_after_nobb, ... -/* Add INSN into the doubly-linked list before insn BEFORE. This and - the previous should be the only functions called to insert an insn - once delay slots have been filled since only they know how to - update a SEQUENCE. If BB is NULL, an attempt is made to infer the - bb from before. */ +/* Like add_insn_before_nobb, but try to set BLOCK_FOR_INSN. + If BB is NULL, an attempt is made to infer the bb from before. */ void add_insn_before (rtx insn, rtx before, basic_block bb) Keep the blurb about SEQUENCEs: This and the previous function should... +/* Return true iff a note of kind SUBTYPE should be emitted with via + routines that never set BLOCK_FOR_INSN on NOTE. BB_BOUNDARY is true + if the caller is asked to emit a note before BB_HEAD, or after BB_END. */ Superfluous with or via. +static bool +note_outside_basic_block_p (enum insn_note subtype, bool on_bb_boundary_p) +{ This should be implemented with a switch statement. -- Eric Botcazou
[Patch, Fortran, committed] Fix small issues
Found with the Coverity scanner.* Committed as Rev. 197961 after build+regtesting on x86-64-gnu-linux. Tobias * There are more issues, some real, some false positive; thus, if someone wants to reduce the number of true issues ... 2013-04-15 Tobias Burnus bur...@net-b.de * class.c (gfc_find_intrinsic_vtab): Removed unused var. * dependency.c (check_data_pointer_types): Fix check. * frontend-passes.c (check_data_pointer_types): Remove superfluous statement. * parse.c (decode_omp_directive): Add missing break. * resolve.c (resolve_typebound_subroutine: Free variable. * trans-decl.c (create_function_arglist): Correct condition. diff --git a/gcc/fortran/class.c b/gcc/fortran/class.c index f3fe178..349f494 100644 --- a/gcc/fortran/class.c +++ b/gcc/fortran/class.c @@ -2480,7 +2480,7 @@ gfc_symbol * gfc_find_intrinsic_vtab (gfc_typespec *ts) { gfc_namespace *ns; - gfc_symbol *vtab = NULL, *vtype = NULL, *found_sym = NULL, *def_init = NULL; + gfc_symbol *vtab = NULL, *vtype = NULL, *found_sym = NULL; gfc_symbol *copy = NULL, *src = NULL, *dst = NULL; int charlen = 0; @@ -2689,8 +2689,6 @@ cleanup: gfc_commit_symbol (vtab); if (vtype) gfc_commit_symbol (vtype); - if (def_init) - gfc_commit_symbol (def_init); if (copy) gfc_commit_symbol (copy); if (src) diff --git a/gcc/fortran/dependency.c b/gcc/fortran/dependency.c index 6f8e6df..38921b1 100644 --- a/gcc/fortran/dependency.c +++ b/gcc/fortran/dependency.c @@ -1200,7 +1200,7 @@ check_data_pointer_types (gfc_expr *expr1, gfc_expr *expr2) bool seen_component_ref; if (expr1-expr_type != EXPR_VARIABLE - || expr1-expr_type != EXPR_VARIABLE) + || expr2-expr_type != EXPR_VARIABLE) return false; sym1 = expr1-symtree-n.sym; diff --git a/gcc/fortran/frontend-passes.c b/gcc/fortran/frontend-passes.c index 9749314..3946c0c 100644 --- a/gcc/fortran/frontend-passes.c +++ b/gcc/fortran/frontend-passes.c @@ -1045,8 +1045,6 @@ combine_array_constructor (gfc_expr *e) newbase = NULL; e-expr_type = EXPR_ARRAY; - c = gfc_constructor_first (oldbase); - for (c = gfc_constructor_first (oldbase); c; c = gfc_constructor_next (c)) { diff --git a/gcc/fortran/parse.c b/gcc/fortran/parse.c index 74a5b4b..8301113 100644 --- a/gcc/fortran/parse.c +++ b/gcc/fortran/parse.c @@ -621,6 +621,7 @@ decode_omp_directive (void) match (taskyield, gfc_match_omp_taskyield, ST_OMP_TASKYIELD); match (threadprivate, gfc_match_omp_threadprivate, ST_OMP_THREADPRIVATE); + break; case 'w': match (workshare, gfc_match_omp_workshare, ST_OMP_WORKSHARE); break; diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index 30cfcd0..418a2d2 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -5820,6 +5843,8 @@ resolve_typebound_subroutine (gfc_code *code) correct typespec. */ code-expr1-ts = ts; } + else if (new_ref) +gfc_free_ref_list (new_ref); return true; } diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index f2cf2de..b94ffb3 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -2146,7 +2146,7 @@ create_function_arglist (gfc_symbol * sym) hence, the optional status cannot be transfered via a NULL pointer. Thus, we will use a hidden argument in that case. */ else if (f-sym-attr.optional f-sym-attr.value - !f-sym-attr.dimension !f-sym-ts.type != BT_CLASS + !f-sym-attr.dimension f-sym-ts.type != BT_CLASS f-sym-ts.type != BT_DERIVED) { tree tmp;
Re: [patch] Fix ICE during RTL expansion at -O1
On Sun, Apr 14, 2013 at 9:46 AM, Eric Botcazou ebotca...@adacore.com wrote: This is a quadratic algorithm and as such not ok. We already have aliasing_component_refs_p in tree-ssa-alias.c which is supposed to be the non-quadratic replacement. It's not used via decl_refs_may_alias_p, so that may be the thing to fix. aliasing_component_refs_p isn't powerful enough, it eliminates the quadratic aspect by assuming that all offsets are constants, so it misses cases like (*p)[i].f1 vs a[j].f2. Moreover it assumes TBAA and we don't need it here. Note that looking at the access path _is_ assuming TBAA constraints as soon as the base objects are not the same (in the above case '*p' and 'a' are not the same and p could alias a in a way that all f1 and f2 overlap). I can rewrite nonoverlapping_component_refs_of_decl_p to make it non-quadratic and catch the same cases I think, patch attached (without the vect testsuite adjustments, but they are still needed). nonoverlapping_component_refs_of_decl_p on RTL should go - in fact we do call the tree oracle from all its callers so we only ever do redundant work (after your proposed patch even more so). Not clear if the tree oracle can catch the above case with *p and a, but, yes, nonoverlapping_component_refs_p should go in the long term. * alias.c (nonoverlapping_component_refs_p): Protect again LTO quirk. * tree-ssa-alias.c (nonoverlapping_component_refs_of_decl_p): New. (decl_refs_may_alias_p): Add REF1 and REF2 parameters. Use nonoverlapping_component_refs_of_decl_p to disambiguate component references. (refs_may_alias_p_1): Adjust call to decl_refs_may_alias_p. * tree-streamer.c (record_common_node): Adjust reference in comment. Index: alias.c === --- alias.c (revision 197926) +++ alias.c (working copy) @@ -2232,8 +2232,11 @@ nonoverlapping_component_refs_p (const_r found: /* If we're left with accessing different fields of a structure, then no -possible overlap, unless they are both bitfields. */ - if (TREE_CODE (typex) == RECORD_TYPE fieldx != fieldy) +possible overlap, unless they are both bitfields. +??? Pointer inequality is too fragile in the LTO compiler. */ + if (TREE_CODE (typex) == RECORD_TYPE + fieldx != fieldy + DECL_NAME (fieldx) != DECL_NAME (fieldy)) this, if at all, should go in with a separate patch and a testcase. And I think it should _not_ go in. Instead, as the case passes if (typex == typey) goto found; earlier you should assert that DECL_CONTEXT (fieldx) == DECL_CONTEXT (fieldy) == typex == typey here. Note that fails of this test are expected even in the non-LTO case because I cannot find any IL verification that would verify that for a COMPONENT_REF TREE_TYPE (TREE_OPERAND (cr, 0)) == DECL_CONTEXT (TREE_OPERAND (cr, 1)) (due to sharing of the FIELD_DECL chain between different type variants the check will fail for all non-main-variants I think, so refining it to look at the main variant is probably advised). Otoh... + /* ??? We cannot simply use the type of operand #0 of the refs here +as the Fortran compiler smuggles type punning into COMPONENT_REFs +for common blocks instead of using unions like everyone else. */ + tree type1 = TYPE_MAIN_VARIANT (DECL_CONTEXT (field1)); + tree type2 = TYPE_MAIN_VARIANT (DECL_CONTEXT (field2)); + + if (type1 != type2 || TREE_CODE (type1) != RECORD_TYPE) +goto may_overlap; + + /* ??? Pointer inequality is too fragile in the LTO compiler. */ + if (field1 != field2 DECL_NAME (field1) != DECL_NAME (field2)) this suggests you are seeing multiple FIELD_DECLs for the same field in the _same_ FIELD_DECL chain ...?! Are you sure this happens with GCC 4.8? There were some fixes in that area in the LTO type merging code. Index: tree-streamer.c === --- tree-streamer.c (revision 197926) +++ tree-streamer.c (working copy) @@ -267,10 +267,9 @@ record_common_node (struct streamer_tree /* The FIELD_DECLs of structures should be shared, so that every COMPONENT_REF uses the same tree node when referencing a field. Pointer equality between FIELD_DECLs is used by the alias -machinery to compute overlapping memory references (See -nonoverlapping_component_refs_p). */ - tree f; - for (f = TYPE_FIELDS (node); f; f = TREE_CHAIN (f)) +machinery to compute overlapping component references (see +nonoverlapping_component_refs_of_decl_p). */ + for (tree f = TYPE_FIELDS (node); f; f = TREE_CHAIN (f)) record_common_node (cache, f); } } without actually removing nonoverlapping_component_refs_p it still applies to both... Can you port the non-quadratic algorithm
Re: [patch] Fix PR middle-end/56474
On Sun, Apr 14, 2013 at 10:05 AM, Eric Botcazou ebotca...@adacore.com wrote: Hi, this is a regression present on the mainline and 4.8 branch and introduced by the latest series of sizetype changes. Associated adjustments were made in the various front-ends for it, most notably Ada which was the most affected, but this issue slipped through the cracks in the form of a bogus overflow detection for 0-based arrays with variable upper bound included in a record with discriminant. The proposed fix is to disable overflow detection in sizetype for one special case (0 - 1) in size_binop_loc. An equivalent kludge was added to layout_type to disable overflow detection for the size expression of [0, -1] arrays. Tested on x86_64-suse-linux, OK for the mainline and 4.8 branch? I think I already rejected this and asked you to fix the users (like layout_type is a user). Clearly 0 - 1 in unsigned arithmetic overflows. Not indicating this may cause bugs elsewhere as easily as it fixes code not dealing with this fact. Richard. 2013-04-14 Eric Botcazou ebotca...@adacore.com PR middle-end/56474 * fold-const.c (size_binop_loc): Disable overflow detection for 0 - 1. 2013-04-14 Eric Botcazou ebotca...@adacore.com * gnat.dg/specs/array3.ads: New test. -- Eric Botcazou
Re: [build] Use -z ignore instead of --as-needed on Solaris
Il 08/04/2013 14:20, Rainer Orth ha scritto: While the Solaris linker doesn't support the --as-needed/--no-as-needed options (yet), it long has provided the equivalent -z ignore/-z record options. This patch makes use of them, avoiding unnecessary dependencies on libgcc_s.so.1. Bootstrapped without regressions on i386-pc-solaris2.11 (and checking that many dependencies on libgcc_s.so.1 in runtime libraries are gone that were flagged as unused by ldd -u) and x86_64-unknown-linux-gnu (gcc/specs unchanged, make check still running). Ok for mainline if it passes? Ok, the gcc.c parts are trivial enough. Paolo Thanks. Rainer 2013-04-05 Rainer Orth r...@cebitec.uni-bielefeld.de * configure.ac (gcc_cv_ld_as_needed): Set gcc_cv_ld_as_needed_option, gcc_cv_no_as_needed_option. Use -z ignore, -z record on *-*-solaris2*. (HAVE_LD_AS_NEEDED): Update comment. (LD_AS_NEEDED_OPTION, LD_NO_AS_NEEDED_OPTION): Define. * configure: Regenerate. * config.in: Regenerate. * gcc.c (init_gcc_specs) [USE_LD_AS_NEEDED]: Use LD_AS_NEEDED_OPTION, LD_NO_AS_NEEDED_OPTION. * config/sol2.h [HAVE_LD_AS_NEEDED] (USE_LD_AS_NEEDED): Define. * doc/tm.texi.in (USE_LD_AS_NEEDED): Allow for --as-needed equivalents. Fix markup. * doc/tm.texi: Regenerate.
[c++-concepts] Merge from trunk
Trunk as been merged into the c++-concepts branch. -- Gaby
Re: [patch] Fix PR middle-end/56474
I think I already rejected this and asked you to fix the users (like layout_type is a user). Yes, but that would be a pain, there are too many users in the Ada front-end. Clearly 0 - 1 in unsigned arithmetic overflows. Not indicating this may cause bugs elsewhere as easily as it fixes code not dealing with this fact. !?? There is no overflow in unsigned arithmetics. Instead size_binop forces overflows artificially and I don't see the problem in deciding that 0 - 1 is a special case, like [0, -1] is a special case for layout_type. And note that this was the historical behavior, before the latest sizetype changes. -- Eric Botcazou
Re: [PATCH,i386] Add -mstack-protector-guard= for i386
On Mon, Apr 15, 2013 at 8:30 AM, Andrew Hsieh andrewhs...@google.com wrote: Sure. Put back Init(SSP_TLS) and rebase to r197955. If no further feedback, please merge. Committed with slightly changed ChangeLog entry: 2013-04-15 Andrew Hsieh andrewhsieh.google.com * config/i386/i386.opt: New option mstack-protector-guard=. * config/i386/i386-opts.h: Add enum stack_protector_guard. * config/i386/i386.h: Define TARGET_SSP_GLOBAL_GUARD and TARGET_SSP_TLS_GUARD. * config/i386/i386.c (ix86_option_override_internal): Set ix86_stack_protector_guard. * config/i386/i386.md (stack_protect_set): Enable for TARGET_SSP_TLS_GUARD only. (stack_protect_set_mode): Ditto. (stack_protect_test): Ditto. (stack_protect_test_mode): Ditto. * doc/invoke.texi (i386 Option): Document. Thanks, Uros.
Re: [build] Use -z ignore instead of --as-needed on Solaris
Rainer Orth wrote: While the Solaris linker doesn't support the --as-needed/--no-as-needed options (yet), it long has provided the equivalent -z ignore/-z record options. This patch makes use of them, avoiding unnecessary dependencies on libgcc_s.so.1. Could you also do a similar update in libgfortran/acinclude.m4's libgfor_cv_have_as_needed check? Thanks, Tobias
RE: [PATCH][ARM] Improve code generation for anddi3
Ping? Thanks, Kyrill -Original Message- From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches- ow...@gcc.gnu.org] On Behalf Of Kyrylo Tkachov Sent: 08 April 2013 13:47 To: gcc-patches@gcc.gnu.org Cc: Ramana Radhakrishnan; Richard Earnshaw Subject: [PATCH][ARM] Improve code generation for anddi3 Hi all, When compiling: unsigned long long muld (unsigned long long X, unsigned long long Y) { unsigned long long mask = 0xull; return (X mask) * (Y mask); } we get a suboptimal sequence: stmfd sp!, {r4, r5} mvn r4, #0 mov r5, #0 and r0, r0, r4 and r3, r3, r5 and r1, r1, r5 and r2, r2, r4 mul r3, r0, r3 mla r3, r2, r1, r3 umull r0, r1, r0, r2 ldmfd sp!, {r4, r5} add r1, r3, r1 bx lr This patch improves that situation by changing the anddi3 insn into an insn_and_split and simplifying the SImode ands. Also, the NEON version is merged with the non-NEON one. This allows us to generate just: umull r0, r1, r2, r0 bx lr for the above code. Regtested arm-none-eabi on qemu. Ok for trunk? Thanks, Kyrill gcc/ChangeLog 2013-04-08 Kyrylo Tkachov kyrylo.tkac...@arm.com * config/arm/arm.c (const_ok_for_dimode_op): Handle AND case. * config/arm/arm.md (*anddi3_insn): Change to insn_and_split. * config/arm/constraints.md (De): New constraint. * config/arm/neon.md (anddi3_neon): Delete. (neon_vandmode): Expand to standard anddi3 pattern. * config/arm/predicates.md (imm_for_neon_inv_logic_operand): Move earlier in the file. (neon_inv_logic_op2): Likewise. (arm_anddi_operand_neon): New predicate. gcc/testsuite/ChangeLog 2013-04-08 Kyrylo Tkachov kyrylo.tkac...@arm.com * gcc.target/arm/anddi3-opt.c: New test. * gcc.target/arm/anddi3-opt2.c: Likewise.
Re: [PATCH][ARM] Improve code generation for anddi3
On 08/04/13 13:47, Kyrylo Tkachov wrote: Hi all, When compiling: unsigned long long muld (unsigned long long X, unsigned long long Y) { unsigned long long mask = 0xull; return (X mask) * (Y mask); } we get a suboptimal sequence: stmfd sp!, {r4, r5} mvn r4, #0 mov r5, #0 and r0, r0, r4 and r3, r3, r5 and r1, r1, r5 and r2, r2, r4 mul r3, r0, r3 mla r3, r2, r1, r3 umull r0, r1, r0, r2 ldmfd sp!, {r4, r5} add r1, r3, r1 bx lr This patch improves that situation by changing the anddi3 insn into an insn_and_split and simplifying the SImode ands. Also, the NEON version is merged with the non-NEON one. This allows us to generate just: umull r0, r1, r2, r0 bx lr for the above code. Regtested arm-none-eabi on qemu. Ok for trunk? Thanks, Kyrill gcc/ChangeLog 2013-04-08 Kyrylo Tkachov kyrylo.tkac...@arm.com * config/arm/arm.c (const_ok_for_dimode_op): Handle AND case. * config/arm/arm.md (*anddi3_insn): Change to insn_and_split. * config/arm/constraints.md (De): New constraint. * config/arm/neon.md (anddi3_neon): Delete. (neon_vandmode): Expand to standard anddi3 pattern. * config/arm/predicates.md (imm_for_neon_inv_logic_operand): Move earlier in the file. (neon_inv_logic_op2): Likewise. (arm_anddi_operand_neon): New predicate. gcc/testsuite/ChangeLog 2013-04-08 Kyrylo Tkachov kyrylo.tkac...@arm.com * gcc.target/arm/anddi3-opt.c: New test. * gcc.target/arm/anddi3-opt2.c: Likewise. OK. R.
Re: [build] Use -z ignore instead of --as-needed on Solaris
Tobias Burnus bur...@net-b.de writes: Rainer Orth wrote: While the Solaris linker doesn't support the --as-needed/--no-as-needed options (yet), it long has provided the equivalent -z ignore/-z record options. This patch makes use of them, avoiding unnecessary dependencies on libgcc_s.so.1. Could you also do a similar update in libgfortran/acinclude.m4's libgfor_cv_have_as_needed check? sure, thanks for the hint. That's the only other in-tree use. Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [wwwdocs] Buildstat update for 4.8
On Tue, 2 Apr 2013, Tom G. Christensen wrote: First round of results for gcc 4.8.x. There's something in that patch that you probably did not mean to include: Missing: 4.8.0#x86_64-winnix-linux-gnu (arch/tune amdfam10/amdfam10)#http://gcc.gnu.org/ml/gcc-testresults/2013-03/msg03331.html Missing: 4.8.0#x86_64-winnix-linux-gnu (arch/tune amdfam10/bdver2)#http://gcc.gnu.org/ml/gcc-testresults/2013-03/msg03334.html Missing: 4.8.0#x86_64-winnix-linux-gnu (arch/tune bdver2/bdver2)#http://gcc.gnu.org/ml/gcc-testresults/2013-03/msg03332.html Missing: 4.8.0#x86_64-winnix-linux-gnu (arch/tune none/none)#http://gcc.gnu.org/ml/gcc-testresults/2013-03/msg0.html Testresults for 4.8.0 The rest of the patch I have committed, thanks! Are you planning to look into those results above? Gerald
Re: [PATCH] Redesign pthread in LIB_SPEC for systems without libpthread
On Tue, Apr 2, 2013 at 1:59 PM, Pavel Chupin pavel.v.chu...@gmail.com wrote: On Mon, Apr 1, 2013 at 7:07 PM, Pavel Chupin pavel.v.chu...@gmail.com wrote: On Android pthread is integrated into libc. Attached patch fixes configures for this case by trying to build test without -pthread -lpthread. 2013-04-01 Pavel Chupin pavel.v.chu...@intel.com Fix libatomic and libgomp configure for systems without libpthread * libatomic/configure.ac: Add test without -pthread -lpthread. * libgomp/configure.ac: Ditto. * libatomic/configure: Regenerate. * libgomp/configure: Regenerate. OK for trunk? I think I made a better fix: 2013-04-02 Pavel Chupin pavel.v.chu...@intel.com Redesign pthread in LIB_SPEC for systems without libpthread * gcc/config/gnu-user.h: Remove pthread from GNU_USER_TARGET_LIB_SPEC but keep in default LIB_SPEC * gcc/config/linux-android.h: Add pthread to ANDROID_LIB_SPEC Is it OK for trunk? Ping -- Pavel Chupin Intel Corporation 0001-Redesign-pthread-in-LIB_SPEC-for-systems-without-lib.patch Description: Binary data
Re: [patch] Fix PR middle-end/56474
On Mon, Apr 15, 2013 at 12:04 PM, Eric Botcazou ebotca...@adacore.com wrote: I think I already rejected this and asked you to fix the users (like layout_type is a user). Yes, but that would be a pain, there are too many users in the Ada front-end. Users that care about the special casing of 0 - 1 and overflow detection? For the C family I found exactly one - the layout_type case, and fixed it in the FEs by making empty arrays use [1, 0] domains or signed domains (I don't remember exactly). I believe the layout_type change was to make Ada happy. Clearly 0 - 1 in unsigned arithmetic overflows. Not indicating this may cause bugs elsewhere as easily as it fixes code not dealing with this fact. !?? There is no overflow in unsigned arithmetics. Instead size_binop forces overflows artificially and I don't see the problem in deciding that 0 - 1 is a special case, like [0, -1] is a special case for layout_type. And note that this was the historical behavior, before the latest sizetype changes. Historically sizetype didn't overflow because it was supposed to never overflow. Well, unless we check for overflow. It may be that enabling overflow detection for even unsigned sizetype was because of Ada as well. After all only Ada changed its sizetype sign recently. I don't like special casing 0 - 1 in a general compute function. Maybe you want to use size_diffop for the computation? That would result in a signed result and thus no overflow for 0 - 1. The other option is to for example disable overflow handling for _all_ constants and MINUS_EXPR (and then please PLUS_EXPR as well) in size_binop. Maybe it's only the MULT_EXPR overflow we want to know (byte-to-bit conversion / element size scaling IIRC). Richard. -- Eric Botcazou
Re: [patch] Fix ICE during RTL expansion at -O1
On Mon, Apr 15, 2013 at 11:47 AM, Richard Biener richard.guent...@gmail.com wrote: On Sun, Apr 14, 2013 at 9:46 AM, Eric Botcazou ebotca...@adacore.com wrote: This is a quadratic algorithm and as such not ok. We already have aliasing_component_refs_p in tree-ssa-alias.c which is supposed to be the non-quadratic replacement. It's not used via decl_refs_may_alias_p, so that may be the thing to fix. aliasing_component_refs_p isn't powerful enough, it eliminates the quadratic aspect by assuming that all offsets are constants, so it misses cases like (*p)[i].f1 vs a[j].f2. Moreover it assumes TBAA and we don't need it here. Note that looking at the access path _is_ assuming TBAA constraints as soon as the base objects are not the same (in the above case '*p' and 'a' are not the same and p could alias a in a way that all f1 and f2 overlap). I can rewrite nonoverlapping_component_refs_of_decl_p to make it non-quadratic and catch the same cases I think, patch attached (without the vect testsuite adjustments, but they are still needed). nonoverlapping_component_refs_of_decl_p on RTL should go - in fact we do call the tree oracle from all its callers so we only ever do redundant work (after your proposed patch even more so). Not clear if the tree oracle can catch the above case with *p and a, but, yes, nonoverlapping_component_refs_p should go in the long term. * alias.c (nonoverlapping_component_refs_p): Protect again LTO quirk. * tree-ssa-alias.c (nonoverlapping_component_refs_of_decl_p): New. (decl_refs_may_alias_p): Add REF1 and REF2 parameters. Use nonoverlapping_component_refs_of_decl_p to disambiguate component references. (refs_may_alias_p_1): Adjust call to decl_refs_may_alias_p. * tree-streamer.c (record_common_node): Adjust reference in comment. Index: alias.c === --- alias.c (revision 197926) +++ alias.c (working copy) @@ -2232,8 +2232,11 @@ nonoverlapping_component_refs_p (const_r found: /* If we're left with accessing different fields of a structure, then no -possible overlap, unless they are both bitfields. */ - if (TREE_CODE (typex) == RECORD_TYPE fieldx != fieldy) +possible overlap, unless they are both bitfields. +??? Pointer inequality is too fragile in the LTO compiler. */ + if (TREE_CODE (typex) == RECORD_TYPE + fieldx != fieldy + DECL_NAME (fieldx) != DECL_NAME (fieldy)) this, if at all, should go in with a separate patch and a testcase. And I think it should _not_ go in. Instead, as the case passes if (typex == typey) goto found; earlier you should assert that DECL_CONTEXT (fieldx) == DECL_CONTEXT (fieldy) == typex == typey here. Note that fails of this test are expected even in the non-LTO case because I cannot find any IL verification that would verify that for a COMPONENT_REF TREE_TYPE (TREE_OPERAND (cr, 0)) == DECL_CONTEXT (TREE_OPERAND (cr, 1)) (due to sharing of the FIELD_DECL chain between different type variants the check will fail for all non-main-variants I think, so refining it to look at the main variant is probably advised). Otoh... + /* ??? We cannot simply use the type of operand #0 of the refs here +as the Fortran compiler smuggles type punning into COMPONENT_REFs +for common blocks instead of using unions like everyone else. */ + tree type1 = TYPE_MAIN_VARIANT (DECL_CONTEXT (field1)); + tree type2 = TYPE_MAIN_VARIANT (DECL_CONTEXT (field2)); + + if (type1 != type2 || TREE_CODE (type1) != RECORD_TYPE) +goto may_overlap; + + /* ??? Pointer inequality is too fragile in the LTO compiler. */ + if (field1 != field2 DECL_NAME (field1) != DECL_NAME (field2)) this suggests you are seeing multiple FIELD_DECLs for the same field in the _same_ FIELD_DECL chain ...?! Are you sure this happens with GCC 4.8? There were some fixes in that area in the LTO type merging code. Index: tree-streamer.c === --- tree-streamer.c (revision 197926) +++ tree-streamer.c (working copy) @@ -267,10 +267,9 @@ record_common_node (struct streamer_tree /* The FIELD_DECLs of structures should be shared, so that every COMPONENT_REF uses the same tree node when referencing a field. Pointer equality between FIELD_DECLs is used by the alias -machinery to compute overlapping memory references (See -nonoverlapping_component_refs_p). */ - tree f; - for (f = TYPE_FIELDS (node); f; f = TREE_CHAIN (f)) +machinery to compute overlapping component references (see +nonoverlapping_component_refs_of_decl_p). */ + for (tree f = TYPE_FIELDS (node); f; f = TREE_CHAIN (f)) record_common_node
Re: [Patch, fortran] PR 56919 SYSTEM_CLOCK on Windows
On Mon, Apr 15, 2013 at 11:37 AM, Tobias Burnus bur...@net-b.de wrote: Janne Blomqvist wrote: Attached is an updated patch which uses GetTickCount for system_clock_4; this should be fine as system_clock_4 wraps around in ~25 days anyways. For system_clock_8 it uses QueryPerformance{Counter,Frequency}. The patch also adds an additional check for _POSIX_MONOTONIC_CLOCK. Ok for trunk? Regarding the documentation, I wonder whether one should do the following additional changes: - Explicitly suggest to use a kind=8 argument für system_clock (for higher resolution and to avoid overflows). - To change the system_clock example to use an integer(8) argument. Possibly, via iso_fortran_env's int64 or via selected_int_kind(18) + #if defined(CLOCK_MONOTONIC) defined(_POSIX_MONOTONIC_CLOCK) I'd add _POSIX_MONOTONIC_CLOCK = 0 as POSIX states: If a symbolic constant is defined with the value -1, the option is not supported. + uint32_t cnt = GetTickCount (); I wonder whether a comment stating that GetTickCount instead of QueryPerformanceCounter is used as the extra precision and 49.7-days overflow do not matter with the 32bit system_clock - and as QueryPerformanceCounter has issues on some (very few) systems. Otherwise, it looks fine to me. I committed the attached patch with most of your suggestions as r197968. Thanks for the review. -- Janne Blomqvist sysclockwin.3.diff Description: Binary data
[Patch, Fortran, committed] Init a variable
Found with the Coverity scanner. It also complains about the fall-throughs. Either the comment will silence the warning* or at least it helps human reviewers. Committed as Rev. 197969 after build+regtesting on x86-64-gnu-linux. Tobias (* Well, one can also silence the Coverity diagnostic for that spot.) 2013-04-15 Tobias Burnus bur...@net-b.de * list_read.c (finish_separator): Initialize variable. diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c index b29fdcd..c8a1bdfc 100644 --- a/libgfortran/io/list_read.c +++ b/libgfortran/io/list_read.c @@ -393,7 +393,7 @@ static int finish_separator (st_parameter_dt *dtp) { int c; - int err; + int err = LIBERROR_OK; restart: eat_spaces (dtp); @@ -433,7 +433,7 @@ finish_separator (st_parameter_dt *dtp) return err; goto restart; } - + /* Fall through. */ default: unget_char (dtp, c); break; @@ -2788,6 +2788,7 @@ nml_get_obj_data (st_parameter_dt *dtp, namelist_info **pprev_nl, namelist not terminated with / or end); goto nml_err_ret; } + /* Fall through. */ case '/': dtp-u.p.input_complete = 1; return true;
Re: [Patch, fortran] PR 56919 SYSTEM_CLOCK on Windows
Janne Blomqvist wrote: I committed the attached patch with most of your suggestions as r197968. Thanks! Although, I assume you meant kind=8 in the last sentence: +the underlying platform clock. @var{COUNT_MAX} usually equals +@code{HUGE(COUNT_MAX)}. Note that the millisecond resolution of the +@var{kind=4} version implies that the @var{COUNT} will wrap around in +roughly 25 days. In order to avoid issues with the wrap around and for +more precise timing, please use the @var{kind=4} version. Hence, I committed the attached patch as obvious. Tobias Index: gcc/fortran/ChangeLog === --- gcc/fortran/ChangeLog (Revision 197969) +++ gcc/fortran/ChangeLog (Arbeitskopie) @@ -1,3 +1,7 @@ +2013-04-15 Tobias Burnus bur...@net-b.de + + * intrinsic.texi (SYSTEM_CLOCK): Recommend kind=8. + 2013-04-15 Janne Blomqvist j...@gcc.gnu.org PR fortran/56919 Index: gcc/fortran/intrinsic.texi === --- gcc/fortran/intrinsic.texi (Revision 197969) +++ gcc/fortran/intrinsic.texi (Arbeitskopie) @@ -12052,7 +12052,7 @@ @code{HUGE(COUNT_MAX)}. Note that the millisecond resolution of the @var{kind=4} version implies that the @var{COUNT} will wrap around in roughly 25 days. In order to avoid issues with the wrap around and for -more precise timing, please use the @var{kind=4} version. +more precise timing, please use the @var{kind=8} version. If there is no clock, or querying the clock fails, @var{COUNT} is set to @code{-HUGE(COUNT)}, and @var{COUNT_RATE} and @var{COUNT_MAX} are
Re: [Patch, fortran] PR 56919 SYSTEM_CLOCK on Windows
On Mon, Apr 15, 2013 at 3:51 PM, Tobias Burnus bur...@net-b.de wrote: Janne Blomqvist wrote: I committed the attached patch with most of your suggestions as r197968. Thanks! Although, I assume you meant kind=8 in the last sentence: +the underlying platform clock. @var{COUNT_MAX} usually equals +@code{HUGE(COUNT_MAX)}. Note that the millisecond resolution of the +@var{kind=4} version implies that the @var{COUNT} will wrap around in +roughly 25 days. In order to avoid issues with the wrap around and for +more precise timing, please use the @var{kind=4} version. Hence, I committed the attached patch as obvious. Indeed, thanks for spotting it, and fixing it quickly! -- Janne Blomqvist
Re: [PATCH] PR55033: Fix
Hello, would one of the global reviewers mind having a look at this? The comment #2 of PR55033 clearly shows that this is a compiler bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55033#c2 Alan Modra proposed a patch to fix this problem only one day after the bug report. This was in October 2012. Now this issue is still open due to a lack of global reviewer approval. We also have no disapproval. We have silence. I executed the test suite again with and without the patch on the latest GCC 4.8 branch on x86_64-unknown-linux-gnu: Without patch: http://gcc.gnu.org/ml/gcc-testresults/2013-04/msg01608.html With patch: http://gcc.gnu.org/ml/gcc-testresults/2013-04/msg01610.html = There are no new failures. It would be really nice if PR55033 can be fixed for GCC 4.8 and later. On 04/02/2013 05:35 AM, Alan Modra wrote: On Sat, Mar 30, 2013 at 06:29:36PM -0400, David Edelsohn wrote: How can we make progress to get this patch committed on trunk, 4.8 and 4.7? I have OKs for the config/i386/winnt.c and config/rs6000/rs6000.c parts. I just need someone who is authorized to review patches to varasm.c, and is willing to risk their reputation to at least comment on the patch. Even a loud NO would be better than silence. As it stands, I'm sorry I offered the patch, even though I still believe the patch is correct from a design viewpoint, better than Sebastian's patch that just tackled the specific case of .sdata2. His patch is obviously easier to review, we'd be OK on powerpc.. Let's try again with all the information in one place, and perhaps a better explanation. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55033 is about a problem on powerpc eabi, where gcc generates wrong section flags (SECTION_WRITE) for an array initializer that should live in .sdata2, a read-only section, and then hits an internal consistency check because gcc gets the correct flags for .sdata2 in other cases. See attached C testcase. Sebastian offered a patch to default_section_type_flags that would correct the flags, but his approach is going back to the mess we had before Richard Henderson gave us categorize_decl_for_section. I made the comment that we ought to use categorize_decl_for_section for selecting the section flags, if we've used categorize_decl_for_section to select the section name. Specifically, default_elf_select_section should allow this to happen. rth agreed with this design approach in http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02487.html, but that particular patch of mine was flawed, and I fixed a followup PR in a non-ideal way. So http://gcc.gnu.org/ml/gcc-patches/2012-10/msg02172.html is actually a fix for a very old patch of mine. I can well understand a reviewer looking at the patch and scratching their heads a little. Not so much due to the latest patch, but because existing code in this area is suspicious. For instance, you might wonder why it is correct to have if (decl !DECL_P (decl)) decl = NULL_TREE; before calling get_section(). The answer is that get_section() is not prepared to handle !DECL_P trees when reporting errors. Arguably it should be modified to do that. -- Sebastian Huber, embedded brains GmbH Address : Dornierstr. 4, D-82178 Puchheim, Germany Phone : +49 89 189 47 41-16 Fax : +49 89 189 47 41-09 E-Mail : sebastian.hu...@embedded-brains.de PGP : Public key available on request. Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.
Re: Fill more delay slots in conditional returns
On 04/14/2013 03:43 AM, Eric Botcazou wrote: I don't recall ever working on this aspect of reorg. The obvious worry is that with reorg moving stuff around those notes may not be valid anymore in the general case. Yes, in the general case I agree that's too dangerous. In this particular case, i.e. backward scan only, this might be plausible, although one has probably to worry about what happens if the insn is removed from the delay slot and put back into the RTL stream. And if I remember correctly, relax_delay_slots can do this sort of thing. * reorg.c (fill_simple_delay_slots): Reindent block of code. * resource.c (mark_target_live_regs): Reformat conditional block. Thanks for taking care of these. Jeff
[PATCH] Fix PR56933
This fixes the wrong-code caused by the use-before-compute of GROUP_READ_WRITE_DEPENDENCE after I moved data dependence checking after group analysis in the vectorizer. The fix is to move the dependence checking completely to the dependence checking - now possible as we have computed groups already. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2013-04-15 Richard Biener rguent...@suse.de PR tree-optimization/56933 * tree-vectorizer.h (struct _stmt_vec_info): Remove read_write_dep member. (GROUP_READ_WRITE_DEPENDENCE): Remove. (STMT_VINFO_GROUP_READ_WRITE_DEPENDENCE): Likewise. * tree-vect-data-refs.c (vect_analyze_group_access): Move dependence check ... vect_analyze_data_ref_dependence (vect_analyze_data_ref_dependence): ... here. * tree-vect-stmts.c (new_stmt_vec_info): Do not initialize GROUP_READ_WRITE_DEPENDENCE. * gcc.dg/vect/pr56933.c: New testcase. Index: gcc/tree-vect-data-refs.c === *** gcc/tree-vect-data-refs.c (revision 197957) --- gcc/tree-vect-data-refs.c (working copy) *** vect_analyze_data_ref_dependence (struct *** 341,354 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb)); } ! /* For interleaving, mark that there is a read-write dependency if ! necessary. We check before that one of the data-refs is store. */ ! if (DR_IS_READ (dra)) ! GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true; ! else ! { ! if (DR_IS_READ (drb)) ! GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true; } continue; --- 341,374 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb)); } ! /* When we perform grouped accesses and perform implicit CSE !by detecting equal accesses and doing disambiguation with !runtime alias tests like for ! .. = a[i]; ! .. = a[i+1]; ! a[i] = ..; ! a[i+1] = ..; ! *p = ..; ! .. = a[i]; ! .. = a[i+1]; !where we will end up loading { a[i], a[i+1] } once, make !sure that inserting group loads before the first load and !stores after the last store will do the right thing. */ ! if ((STMT_VINFO_GROUPED_ACCESS (stmtinfo_a) ! GROUP_SAME_DR_STMT (stmtinfo_a)) ! || (STMT_VINFO_GROUPED_ACCESS (stmtinfo_b) ! GROUP_SAME_DR_STMT (stmtinfo_b))) ! { ! gimple earlier_stmt; ! earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb)); ! if (DR_IS_WRITE ! (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt ! { ! if (dump_enabled_p ()) ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, !READ_WRITE dependence in interleaving.); ! return true; ! } } continue; *** vect_analyze_group_access (struct data_r *** 2097,2113 return false; } - /* Check that there is no load-store dependencies for this loads - to prevent a case of load-store-load to the same location. */ - if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next)) - || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - READ_WRITE dependence in interleaving.); - return false; - } - /* For load use the same data-ref load. */ GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev; --- 2117,2122 Index: gcc/tree-vect-stmts.c === *** gcc/tree-vect-stmts.c (revision 197957) --- gcc/tree-vect-stmts.c (working copy) *** new_stmt_vec_info (gimple stmt, loop_vec *** 5962,5968 GROUP_STORE_COUNT (res) = 0; GROUP_GAP (res) = 0; GROUP_SAME_DR_STMT (res) = NULL; - GROUP_READ_WRITE_DEPENDENCE (res) = false; return res; } --- 5962,5967 Index: gcc/tree-vectorizer.h === *** gcc/tree-vectorizer.h (revision 197957) --- gcc/tree-vectorizer.h (working copy) *** typedef struct _stmt_vec_info { *** 460,469 /* Stmt is part of some pattern (computation idiom) */ bool in_pattern_p; - /* For loads only, if there is a store with the same location, this field is - TRUE. */ -
Re: [PATCH, tree-ssa] Avoid -Wuninitialized warning in try_unroll_loop_completely()
On 04/13/2013 07:17 PM, Chung-Ju Wu wrote: Hi, I noticed there is an uninitialized variable warning when compiling tree-ssa-loop-ivcanon.c file. Attached patch is a slight modification to avoid the warning and a plaintext ChangeLog is as below. Is it OK for trunk? 2013-04-14 Chung-Ju Wu jasonw...@gmail.com * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Avoid -Wuninitialized warning. If this is a false positive (and I think it is from a very quick scan of the code), can you mark the initialization as such? /* Avoid false positive -Wuninitialized warning. */ Ideally this will become standard practice. jeff
[PATCH] Fix SLSR wrong-code (PR tree-optimization/56962)
Hi! record_increment failed to verify that initializer is usable, which it is only if cand_stmt is an addition (CAND_ADD can be e.g. even on a cast of addition to some type of the same precision etc.) and one of the operands is c-base_expr (because then the other operand necessarily has to be the rest, but the code was only checking one of the operands, but cand_stmt e.g. can be a sum of two SSA_NAMEs where each of those adds some multiply of one of base_expr operands and some multiply of the c-stride. If we set initializer to randomly chosen operand of such stmt, while we'll have the right multiply of c-stride, the base_expr might be wrong. Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.8? 2013-04-15 Jakub Jelinek ja...@redhat.com PR tree-optimization/56962 * gimple-ssa-strength-reduction.c (record_increment): Only set initializer if gimple_assign_rhs_code is {,POINTER_}PLUS_EXPR and either rhs1 or rhs2 is equal to c-base_expr. * gcc.c-torture/execute/pr56962.c: New test. --- gcc/gimple-ssa-strength-reduction.c.jj 2013-01-11 09:02:50.0 +0100 +++ gcc/gimple-ssa-strength-reduction.c 2013-04-15 11:59:46.668463873 +0200 @@ -1829,16 +1829,20 @@ record_increment (slsr_cand_t c, double_ if (c-kind == CAND_ADD c-index == increment (increment.sgt (double_int_one) - || increment.slt (double_int_minus_one))) + || increment.slt (double_int_minus_one)) + (gimple_assign_rhs_code (c-cand_stmt) == PLUS_EXPR + || gimple_assign_rhs_code (c-cand_stmt) == POINTER_PLUS_EXPR)) { - tree t0; + tree t0 = NULL_TREE; tree rhs1 = gimple_assign_rhs1 (c-cand_stmt); tree rhs2 = gimple_assign_rhs2 (c-cand_stmt); if (operand_equal_p (rhs1, c-base_expr, 0)) t0 = rhs2; - else + else if (operand_equal_p (rhs2, c-base_expr, 0)) t0 = rhs1; - if (SSA_NAME_DEF_STMT (t0) gimple_bb (SSA_NAME_DEF_STMT (t0))) + if (t0 + SSA_NAME_DEF_STMT (t0) + gimple_bb (SSA_NAME_DEF_STMT (t0))) { incr_vec[incr_vec_len].initializer = t0; incr_vec[incr_vec_len++].init_bb --- gcc/testsuite/gcc.c-torture/execute/pr56962.c.jj2013-04-15 12:09:24.781355085 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr56962.c 2013-04-15 12:09:19.985381802 +0200 @@ -0,0 +1,30 @@ +/* PR tree-optimization/56962 */ + +extern void abort (void); +long long v[144]; + +__attribute__((noinline, noclone)) void +bar (long long *x) +{ + if (x != v[29]) +abort (); +} + +__attribute__((noinline, noclone)) void +foo (long long *x, long y, long z) +{ + long long a, b, c; + a = x[z * 4 + y * 3]; + b = x[z * 5 + y * 3]; + c = x[z * 5 + y * 4]; + x[y * 4] = a; + bar (x[z * 5 + y]); + x[z * 5 + y * 5] = b + c; +} + +int +main () +{ + foo (v, 24, 1); + return 0; +} Jakub
Re: [PATCH] color diagnostics markers
On Mon, Apr 15, 2013 at 04:14:58AM -0500, Gabriel Dos Reis wrote: Defining a variable in my environment counts as a clear intention. If you invoke GCC on command line with explicit option requesting colors, I don't think there is any doubt that. However, I dispute the intent to be so universally clear for most GCC users who happen to have GCC_COLORS set -- most users inherit whatever their sysadmin or distros set for them. That said, I am fine with the idea to GCC_COLORS = detect. Here is a patch that implements it. If GCC_COLORS isn't in the environment, the default is still -fdiagnostics-color=never, otherwise it is -fdiagnostics-color=auto (but, as before, if GCC_COLORS is in the environment, but empty, colors aren't shown at all). Bootstrapped/regtested on x86_64-linux and i686-linux, additionally tested on a few testcases with make check when GCC_COLORS was in the environment, and additionally tested with RUNTESTFLAGS='--target_board=unix/-fdiagnostics-color=always dg.exp=pr56*.c' where only the last one showed some (expected) regressions, because that forces over colors even for the tests. Ok for trunk? 2013-04-15 Jakub Jelinek ja...@redhat.com * Makefile.in (toplev.o): Depend on diagnostic-color.h. * diagnostic-color.c (should_colorize): Remove _WIN32 version. (colorize_init): Add argument to _WIN32 version. * toplev.c: Include diagnostic-color.h. (process_options): Default to -fdiagnostics-color=auto if GCC_COLORS env var is in the environment. * common.opt (fdiagnostics-color=): Add Var and Init. * doc/invoke.texi (-fdiagnostics-color=): Document that if GCC_COLORS env var is in the environment, the default is auto rather than never. * lib/prune.exp: Add -fdiagnostics-color=never to TEST_ALWAYS_FLAGS. * lib/c-compat.exp (compat-use-alt-compiler, compat_setup_dfp): Handle -fdiagnostics-color=never option similarly to -fno-diagnostics-show-caret option. --- gcc/Makefile.in.jj 2013-04-12 08:13:22.0 +0200 +++ gcc/Makefile.in 2013-04-12 08:13:22.0 +0200 @@ -2710,7 +2710,7 @@ toplev.o : toplev.c $(CONFIG_H) $(SYSTEM $(OPTS_H) params.def tree-mudflap.h $(TREE_PASS_H) $(GIMPLE_H) \ tree-ssa-alias.h $(PLUGIN_H) realmpfr.h tree-diagnostic.h \ $(TREE_PRETTY_PRINT_H) opts-diagnostic.h $(COMMON_TARGET_H) \ - tsan.h + tsan.h diagnostic-color.h hwint.o : hwint.c $(CONFIG_H) $(SYSTEM_H) $(DIAGNOSTIC_CORE_H) --- gcc/diagnostic-color.c.jj 2013-04-12 08:13:22.0 +0200 +++ gcc/diagnostic-color.c 2013-04-15 13:00:41.978114658 +0200 @@ -264,14 +264,8 @@ parse_gcc_colors (void) } #if defined(_WIN32) -static bool -should_colorize (void) -{ - return false; -} - bool -colorize_init (void) +colorize_init (diagnostic_color_rule_t) { return false; } --- gcc/doc/invoke.texi.jj 2013-04-12 08:13:22.0 +0200 +++ gcc/doc/invoke.texi 2013-04-15 13:41:42.869556167 +0200 @@ -2963,9 +2963,10 @@ a message which is too long to fit on a @cindex highlight, color, colour @vindex GCC_COLORS @r{environment variable} Use color in diagnostics. @var{WHEN} is @samp{never}, @samp{always}, -or @samp{auto}. The default is @samp{never}, @samp{auto} means to use color -only when the standard error is a terminal. The forms -@option{-fdiagnostics-color} and @option{-fno-diagnostics-color} are +or @samp{auto}. The default is @samp{never} if @env{GCC_COLORS} environment +variable isn't present in the environment, and @samp{auto} otherwise. +@samp{auto} means to use color only when the standard error is a terminal. +The forms @option{-fdiagnostics-color} and @option{-fno-diagnostics-color} are aliases for @option{-fdiagnostics-color=always} and @option{-fdiagnostics-color=never}, respectively. --- gcc/testsuite/lib/prune.exp.jj 2013-01-11 09:02:39.0 +0100 +++ gcc/testsuite/lib/prune.exp 2013-04-15 13:36:49.275191326 +0200 @@ -19,7 +19,7 @@ if ![info exists TEST_ALWAYS_FLAGS] { set TEST_ALWAYS_FLAGS } -set TEST_ALWAYS_FLAGS -fno-diagnostics-show-caret $TEST_ALWAYS_FLAGS +set TEST_ALWAYS_FLAGS -fno-diagnostics-show-caret -fdiagnostics-color=never $TEST_ALWAYS_FLAGS proc prune_gcc_output { text } { #send_user Before:$text\n --- gcc/testsuite/lib/c-compat.exp.jj 2013-01-11 09:02:39.0 +0100 +++ gcc/testsuite/lib/c-compat.exp 2013-04-15 13:38:36.361596471 +0200 @@ -34,7 +34,7 @@ load_lib target-supports.exp # proc compat-use-alt-compiler { } { global GCC_UNDER_TEST ALT_CC_UNDER_TEST -global compat_same_alt compat_alt_caret +global compat_same_alt compat_alt_caret compat_alt_color global TEST_ALWAYS_FLAGS # We don't need to do this if the alternate compiler is actually @@ -44,6 +44,9 @@ proc compat-use-alt-compiler { } { if { $compat_alt_caret == 0 } then { regsub -- -fno-diagnostics-show-caret $TEST_ALWAYS_FLAGS TEST_ALWAYS_FLAGS } +
Re: [PATCH] Fix SLSR wrong-code (PR tree-optimization/56962)
FWIW, I agree with Jakub's fix. Thanks! Bill On Mon, 2013-04-15 at 16:28 +0200, Jakub Jelinek wrote: Hi! record_increment failed to verify that initializer is usable, which it is only if cand_stmt is an addition (CAND_ADD can be e.g. even on a cast of addition to some type of the same precision etc.) and one of the operands is c-base_expr (because then the other operand necessarily has to be the rest, but the code was only checking one of the operands, but cand_stmt e.g. can be a sum of two SSA_NAMEs where each of those adds some multiply of one of base_expr operands and some multiply of the c-stride. If we set initializer to randomly chosen operand of such stmt, while we'll have the right multiply of c-stride, the base_expr might be wrong. Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.8? 2013-04-15 Jakub Jelinek ja...@redhat.com PR tree-optimization/56962 * gimple-ssa-strength-reduction.c (record_increment): Only set initializer if gimple_assign_rhs_code is {,POINTER_}PLUS_EXPR and either rhs1 or rhs2 is equal to c-base_expr. * gcc.c-torture/execute/pr56962.c: New test. --- gcc/gimple-ssa-strength-reduction.c.jj2013-01-11 09:02:50.0 +0100 +++ gcc/gimple-ssa-strength-reduction.c 2013-04-15 11:59:46.668463873 +0200 @@ -1829,16 +1829,20 @@ record_increment (slsr_cand_t c, double_ if (c-kind == CAND_ADD c-index == increment (increment.sgt (double_int_one) - || increment.slt (double_int_minus_one))) + || increment.slt (double_int_minus_one)) +(gimple_assign_rhs_code (c-cand_stmt) == PLUS_EXPR + || gimple_assign_rhs_code (c-cand_stmt) == POINTER_PLUS_EXPR)) { - tree t0; + tree t0 = NULL_TREE; tree rhs1 = gimple_assign_rhs1 (c-cand_stmt); tree rhs2 = gimple_assign_rhs2 (c-cand_stmt); if (operand_equal_p (rhs1, c-base_expr, 0)) t0 = rhs2; - else + else if (operand_equal_p (rhs2, c-base_expr, 0)) t0 = rhs1; - if (SSA_NAME_DEF_STMT (t0) gimple_bb (SSA_NAME_DEF_STMT (t0))) + if (t0 +SSA_NAME_DEF_STMT (t0) +gimple_bb (SSA_NAME_DEF_STMT (t0))) { incr_vec[incr_vec_len].initializer = t0; incr_vec[incr_vec_len++].init_bb --- gcc/testsuite/gcc.c-torture/execute/pr56962.c.jj 2013-04-15 12:09:24.781355085 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr56962.c 2013-04-15 12:09:19.985381802 +0200 @@ -0,0 +1,30 @@ +/* PR tree-optimization/56962 */ + +extern void abort (void); +long long v[144]; + +__attribute__((noinline, noclone)) void +bar (long long *x) +{ + if (x != v[29]) +abort (); +} + +__attribute__((noinline, noclone)) void +foo (long long *x, long y, long z) +{ + long long a, b, c; + a = x[z * 4 + y * 3]; + b = x[z * 5 + y * 3]; + c = x[z * 5 + y * 4]; + x[y * 4] = a; + bar (x[z * 5 + y]); + x[z * 5 + y * 5] = b + c; +} + +int +main () +{ + foo (v, 24, 1); + return 0; +} Jakub
Re: [PATCH] Fix SLSR wrong-code (PR tree-optimization/56962)
On Mon, 15 Apr 2013, Jakub Jelinek wrote: Hi! record_increment failed to verify that initializer is usable, which it is only if cand_stmt is an addition (CAND_ADD can be e.g. even on a cast of addition to some type of the same precision etc.) and one of the operands is c-base_expr (because then the other operand necessarily has to be the rest, but the code was only checking one of the operands, but cand_stmt e.g. can be a sum of two SSA_NAMEs where each of those adds some multiply of one of base_expr operands and some multiply of the c-stride. If we set initializer to randomly chosen operand of such stmt, while we'll have the right multiply of c-stride, the base_expr might be wrong. Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.8? Ok. Thanks, Richard. 2013-04-15 Jakub Jelinek ja...@redhat.com PR tree-optimization/56962 * gimple-ssa-strength-reduction.c (record_increment): Only set initializer if gimple_assign_rhs_code is {,POINTER_}PLUS_EXPR and either rhs1 or rhs2 is equal to c-base_expr. * gcc.c-torture/execute/pr56962.c: New test. --- gcc/gimple-ssa-strength-reduction.c.jj2013-01-11 09:02:50.0 +0100 +++ gcc/gimple-ssa-strength-reduction.c 2013-04-15 11:59:46.668463873 +0200 @@ -1829,16 +1829,20 @@ record_increment (slsr_cand_t c, double_ if (c-kind == CAND_ADD c-index == increment (increment.sgt (double_int_one) - || increment.slt (double_int_minus_one))) + || increment.slt (double_int_minus_one)) +(gimple_assign_rhs_code (c-cand_stmt) == PLUS_EXPR + || gimple_assign_rhs_code (c-cand_stmt) == POINTER_PLUS_EXPR)) { - tree t0; + tree t0 = NULL_TREE; tree rhs1 = gimple_assign_rhs1 (c-cand_stmt); tree rhs2 = gimple_assign_rhs2 (c-cand_stmt); if (operand_equal_p (rhs1, c-base_expr, 0)) t0 = rhs2; - else + else if (operand_equal_p (rhs2, c-base_expr, 0)) t0 = rhs1; - if (SSA_NAME_DEF_STMT (t0) gimple_bb (SSA_NAME_DEF_STMT (t0))) + if (t0 +SSA_NAME_DEF_STMT (t0) +gimple_bb (SSA_NAME_DEF_STMT (t0))) { incr_vec[incr_vec_len].initializer = t0; incr_vec[incr_vec_len++].init_bb --- gcc/testsuite/gcc.c-torture/execute/pr56962.c.jj 2013-04-15 12:09:24.781355085 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr56962.c 2013-04-15 12:09:19.985381802 +0200 @@ -0,0 +1,30 @@ +/* PR tree-optimization/56962 */ + +extern void abort (void); +long long v[144]; + +__attribute__((noinline, noclone)) void +bar (long long *x) +{ + if (x != v[29]) +abort (); +} + +__attribute__((noinline, noclone)) void +foo (long long *x, long y, long z) +{ + long long a, b, c; + a = x[z * 4 + y * 3]; + b = x[z * 5 + y * 3]; + c = x[z * 5 + y * 4]; + x[y * 4] = a; + bar (x[z * 5 + y]); + x[z * 5 + y * 5] = b + c; +} + +int +main () +{ + foo (v, 24, 1); + return 0; +} Jakub -- Richard Biener rguent...@suse.de SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend
Re: [wwwdocs] Buildstat update for 4.8
On 04/15/2013 01:36 PM, Gerald Pfeifer wrote: On Tue, 2 Apr 2013, Tom G. Christensen wrote: First round of results for gcc 4.8.x. There's something in that patch that you probably did not mean to include: Missing: 4.8.0#x86_64-winnix-linux-gnu (arch/tune amdfam10/amdfam10)#http://gcc.gnu.org/ml/gcc-testresults/2013-03/msg03331.html Missing: 4.8.0#x86_64-winnix-linux-gnu (arch/tune amdfam10/bdver2)#http://gcc.gnu.org/ml/gcc-testresults/2013-03/msg03334.html Missing: 4.8.0#x86_64-winnix-linux-gnu (arch/tune bdver2/bdver2)#http://gcc.gnu.org/ml/gcc-testresults/2013-03/msg03332.html Missing: 4.8.0#x86_64-winnix-linux-gnu (arch/tune none/none)#http://gcc.gnu.org/ml/gcc-testresults/2013-03/msg0.html Indeed. It seems I accidentally took the output from my check script instead of only cvs diff. Are you planning to look into those results above? I already did when I prepared the patch. I chose to ignore them hence why they are marked as missing. Those results were posted as data points for this mail: http://gcc.gnu.org/ml/gcc/2013-04/msg3.html The non-standard host triplet used served no other purpose than easy identification for the readers of that mail. I suppose I could add them but should I then add them to the canonical x86_64-unknown-linux-gnu entry? -tgc
[4.7, v3] Compile 30_threads/condition_variable/members/53841.cc with -std=gnu++0x on Tru64 UNIX and IRIX
On the 4.7 branch, 30_threads/condition_variable/members/53841.cc fails to compile on IRIX and Tru64 UNIX since the required -std=gnu++0x wasn't passed. Fixed like this, tested with the appropriate runtest invocation, installed on the 4.7 branch. Rainer 2013-04-15 Rainer Orth r...@cebitec.uni-bielefeld.de * testsuite/30_threads/condition_variable/members/53841.cc: Add -std=gnu++0x -pthread on alpha*-*-osf*, mips-sgi-irix6*. # HG changeset patch # Parent 72c6c407d339f321a393e37c78e6dcadc67546d1 Compile 30_threads/condition_variable/members/53841.cc with -std=gnu++0x on Tru64 UNIX diff --git a/libstdc++-v3/testsuite/30_threads/condition_variable/members/53841.cc b/libstdc++-v3/testsuite/30_threads/condition_variable/members/53841.cc --- a/libstdc++-v3/testsuite/30_threads/condition_variable/members/53841.cc +++ b/libstdc++-v3/testsuite/30_threads/condition_variable/members/53841.cc @@ -1,5 +1,5 @@ // { dg-do compile } -// { dg-options -std=gnu++0x -pthread { target *-*-freebsd* *-*-netbsd* *-*-linux* powerpc-ibm-aix* hppa*-hp-hpux11* } } +// { dg-options -std=gnu++0x -pthread { target *-*-freebsd* *-*-netbsd* *-*-linux* alpha*-*-osf* mips-sgi-irix6* powerpc-ibm-aix* hppa*-hp-hpux11* } } // { dg-options -std=gnu++0x -pthreads { target *-*-solaris* } } // { dg-options -std=gnu++0x { target *-*-cygwin *-*-darwin* } } // { dg-require-cstdint } -- - Rainer Orth, Center for Biotechnology, Bielefeld University
[wwwdoc] Mark ARM/hard_vfp_4_4_branch as discontinued
In reality this was discontinued some time ago. But I was just doing some local spring-cleaning and realized that wwwdocs had never been updated. Committed as obvious. R. Index: svn.html === RCS file: /cvs/gcc/wwwdocs/htdocs/svn.html,v retrieving revision 1.183 retrieving revision 1.184 diff -u -r1.183 -r1.184 --- svn.html7 Apr 2013 23:47:29 - 1.183 +++ svn.html15 Apr 2013 14:55:17 - 1.184 @@ -395,11 +395,6 @@ are Dwarakanath Rajagopal lt;a href=mailto:dwarak.rajago...@amd.com;dwarak.rajago...@amd.com/agt; and H.J. Lu lt;a href=mailto:hjl.to...@gmail.com;hjl.to...@gmail.com/agt;./dd - dtARM/hard_vfp_4_4_branch/dt - ddThis branch contains support for the hard-VFP variant of the AAPCS calling - standard and tracks the gcc-4.4 development. This branch is maintained by - Richard Earnshaw./dd - dtARM/aarch64-branch/dt ddThis branch adds support for the AArch64 architecture and will track trunk until such time as the port is merged into trunk. Patches to this @@ -1123,6 +1118,11 @@ by a href=mailto:laurynas.bivei...@gmail.com;Laurynas Biveinis/a./dd + dtARM/hard_vfp_4_4_branch/dt + ddThis branch contains support for the hard-VFP variant of the AAPCS calling + standard and tracked gcc-4.4 development. This branch was maintained by + Richard Earnshaw./dd + dtix86/avx/dt ddThe goal of this branch is to implement Intel AVX (Intel Advanced Vector Extensions). The branch is maintained by
Re: [patch][mips] split mips_reorg in pre- and post-dbr_schedule parts
On 04/14/2013 08:20 AM, Steven Bosscher wrote: Hello, This patch splits mips_reorg.c in a pre-dbr_schedule part and a new, machine specific post-dbr_schedule pass. With this patch, cleanup_barriers and dbr_schedule can be static functions again. Cross-builttested mips-sim. OK for trunk? Ciao! Steven mips_post_dbr_reorg_as_machine_pass.diff.txt * config/mips/mips.c: Include tree-pass.h. (mips_reorg): Split in pre- and post-dbr_schedule parts. (mips_machine_reorg2): Move mips_reorg post-dbr_schedule parts here. (pass_mips_machine_reorg2): New machine specific pass. (insert_pass_mips_machine_reorg2): New pass plugin definition. (mips_option_override): Register the new pass. * rtl.h (cleanup_barriers): Remove prototype. (dbr_schedule): Likewise. * jump.c (cleanup_barriers): Make static. * reorg.c (dbr_schedule): Likewise. The rtl, jump reorg bits are fine with me. I don't know enough about the MIPS specific bits to comment on them in any meaningful way. jeff
C++ PATCH for c++/56388 (lambdas and EH)
My code for inserting capture proxy DECL_EXPRs was assuming that there is one stmt_list_stack entry per cp_binding_level entry, but that isn't always the case. Conveniently, the stmt_list_stack entry we want will always be at index 1, since each function has its own stack. Tested x86_64-pc-linux-gnu, applying to trunk and 4.8. commit 25d22d3ebea706ae32be404241cfe34d6b2ffefb Author: Jason Merrill ja...@redhat.com Date: Thu Apr 11 18:12:52 2013 -0400 PR c++/56388 * semantics.c (insert_capture_proxy): Just use index 1 in the stmt_list_stack. diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index a09a7f4..1ac38a3 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -9282,7 +9282,7 @@ insert_capture_proxy (tree var) /* And put a DECL_EXPR in the STATEMENT_LIST for the same block. */ var = build_stmt (DECL_SOURCE_LOCATION (var), DECL_EXPR, var); - stmt_list = (*stmt_list_stack)[stmt_list_stack-length () - 1 - skip]; + stmt_list = (*stmt_list_stack)[1]; gcc_assert (stmt_list); append_to_statement_list_force (var, stmt_list); } diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh3.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh3.C new file mode 100644 index 000..10dc6e3 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh3.C @@ -0,0 +1,14 @@ +// PR c++/56388 +// { dg-require-effective-target c++11 } + +int main() +{ +bool /*const*/ condition = false; + +[]{ +try{} +catch(...){ +if(condition){} +} +}(); +}
Re: Another C++ PATCH for c++/52748 (N3276 and operator overloading)
On 04/11/2013 08:50 PM, Jason Merrill wrote: My earlier N3276 work only affected the function call syntax, but it needs to affect implicit function calls from overloaded operators as well. ...and in templates. Tested x86_64-pc-linux-gnu, applying to trunk and 4.8. commit 9448b41e5f5a64a2a92519049898fba4c9d43633 Author: Jason Merrill ja...@redhat.com Date: Sat Apr 13 21:12:45 2013 +0200 PR c++/52748 * pt.c (tsubst) [DECLTYPE_TYPE]: If ~id is an expression rather than a destructor name, it isn't an unqualified-name. (tsubst_copy_and_build): Pass down decltype_flag to operator handling code, too. diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 0c7b2ed..5c960e0 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -11800,8 +11800,17 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl) else if (DECLTYPE_FOR_LAMBDA_PROXY (t)) type = lambda_proxy_type (type); else - type = finish_decltype_type - (type, DECLTYPE_TYPE_ID_EXPR_OR_MEMBER_ACCESS_P (t), complain); + { + bool id = DECLTYPE_TYPE_ID_EXPR_OR_MEMBER_ACCESS_P (t); + if (id TREE_CODE (DECLTYPE_TYPE_EXPR (t)) == BIT_NOT_EXPR + EXPR_P (type)) + /* In a template ~id could be either a complement expression + or an unqualified-id naming a destructor; if instantiating + it produces an expression, it's not an id-expression or + member access. */ + id = false; + type = finish_decltype_type (type, id, complain); + } return cp_build_qualified_type_real (type, cp_type_quals (t) | cp_type_quals (type), @@ -13427,9 +13436,8 @@ tsubst_copy_and_build (tree t, /* N3276 decltype magic only applies to calls at the top level or on the right side of a comma. */ - if (TREE_CODE (t) != CALL_EXPR - TREE_CODE (t) != COMPOUND_EXPR) -complain = ~tf_decltype; + tsubst_flags_t decltype_flag = (complain tf_decltype); + complain = ~tf_decltype; switch (TREE_CODE (t)) { @@ -13517,7 +13525,8 @@ tsubst_copy_and_build (tree t, r = convert_from_reference (r); } else - r = build_x_indirect_ref (input_location, r, RO_UNARY_STAR, complain); + r = build_x_indirect_ref (input_location, r, RO_UNARY_STAR, +complain|decltype_flag); RETURN (r); } @@ -13594,7 +13603,8 @@ tsubst_copy_and_build (tree t, case POSTINCREMENT_EXPR: op1 = tsubst_non_call_postfix_expression (TREE_OPERAND (t, 0), args, complain, in_decl); - RETURN (build_x_unary_op (input_location, TREE_CODE (t), op1, complain)); + RETURN (build_x_unary_op (input_location, TREE_CODE (t), op1, +complain|decltype_flag)); case PREDECREMENT_EXPR: case PREINCREMENT_EXPR: @@ -13606,7 +13616,8 @@ tsubst_copy_and_build (tree t, case REALPART_EXPR: case IMAGPART_EXPR: RETURN (build_x_unary_op (input_location, TREE_CODE (t), - RECUR (TREE_OPERAND (t, 0)), complain)); + RECUR (TREE_OPERAND (t, 0)), +complain|decltype_flag)); case FIX_TRUNC_EXPR: RETURN (cp_build_unary_op (FIX_TRUNC_EXPR, RECUR (TREE_OPERAND (t, 0)), @@ -13623,7 +13634,8 @@ tsubst_copy_and_build (tree t, else op1 = tsubst_non_call_postfix_expression (op1, args, complain, in_decl); - RETURN (build_x_unary_op (input_location, ADDR_EXPR, op1, complain)); + RETURN (build_x_unary_op (input_location, ADDR_EXPR, op1, +complain|decltype_flag)); case PLUS_EXPR: case MINUS_EXPR: @@ -13672,7 +13684,7 @@ tsubst_copy_and_build (tree t, ? ERROR_MARK : TREE_CODE (TREE_OPERAND (t, 1))), /*overload=*/NULL, - complain); + complain|decltype_flag); if (EXPR_P (r) TREE_NO_WARNING (t)) TREE_NO_WARNING (r) = TREE_NO_WARNING (t); @@ -13688,7 +13700,8 @@ tsubst_copy_and_build (tree t, op1 = tsubst_non_call_postfix_expression (TREE_OPERAND (t, 0), args, complain, in_decl); RETURN (build_x_array_ref (EXPR_LOCATION (t), op1, -RECUR (TREE_OPERAND (t, 1)), complain)); + RECUR (TREE_OPERAND (t, 1)), + complain|decltype_flag)); case SIZEOF_EXPR: if (PACK_EXPANSION_P (TREE_OPERAND (t, 0))) @@ -13781,7 +13794,7 @@ tsubst_copy_and_build (tree t, RECUR (TREE_OPERAND (t, 0)), TREE_CODE (TREE_OPERAND (t, 1)), RECUR (TREE_OPERAND (t, 2)), - complain); + complain|decltype_flag); /* TREE_NO_WARNING must be set if either the expression was parenthesized or it uses an operator such as = rather than plain assignment. In the former case, it was already @@ -13870,7 +13883,7 @@ tsubst_copy_and_build (tree t, RETURN (build_x_compound_expr (EXPR_LOCATION (t), op0, RECUR (TREE_OPERAND (t, 1)), - complain)); + complain|decltype_flag)); } case CALL_EXPR: @@ -13882,10 +13895,6 @@ tsubst_copy_and_build (tree t, bool koenig_p; tree ret; - /* Don't pass tf_decltype down to subexpressions. */ - tsubst_flags_t decltype_flag = (complain
[4.7, testsuite] Skip gcc.dg/torture/pr53922.c on Tru64 UNIX
I noticed that gcc.dg/torture/pr53922.c FAILs on Tru64 UNIX on the 4.7 branch for the same reason as on HP-UX: no undefined weak symbols. The following patch accounts for that, together with a small cleanup. Tested with the appropriate runtest invocation on alpha-dec-osf5.1b, installed on 4.7 branch. Rainer 2013-04-15 Rainer Orth r...@cebitec.uni-bielefeld.de * gcc.dg/torture/pr53922.c: Skip on alpha*-*-osf*. Remove dg-skip-if default args. # HG changeset patch # Parent e26c8a7daecd890ceeb6cadd50911c3d95031a85 Skip gcc.dg/torture/pr53922.c on Tru64 UNIX diff --git a/gcc/testsuite/gcc.dg/torture/pr53922.c b/gcc/testsuite/gcc.dg/torture/pr53922.c --- a/gcc/testsuite/gcc.dg/torture/pr53922.c +++ b/gcc/testsuite/gcc.dg/torture/pr53922.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-weak } */ -/* { dg-skip-if No undefined weak { hppa*-*-hpux* { ! lp64 } } { * } { } } */ +/* { dg-skip-if No undefined weak { alpha*-*-osf* } } */ +/* { dg-skip-if No undefined weak { hppa*-*-hpux* { ! lp64 } } } */ int x(int a) { -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Minor C++ PATCH to tsubst of DECLTYPE_TYPE
While working on the latest 52748 patch, it occurred to me that using tsubst_expr for DECLTYPE_TYPE is wrong, since that function deals with statements; since we only have to deal with expressions there, tsubst_copy_and_build is a better choice. Tested x86_64-pc-linux-gnu, applying to trunk. commit b48d4d61ceffc2989f0ea1e32ca6e9cc41923b9c Author: Jason Merrill ja...@redhat.com Date: Sun Apr 14 16:41:27 2013 +0200 * pt.c (tsubst) [DECLTYPE_TYPE]: Use tsubst_copy_and_build. diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index ba386a2..411ca55 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -11770,9 +11770,10 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl) ++cp_unevaluated_operand; ++c_inhibit_evaluation_warnings; - type = tsubst_expr (DECLTYPE_TYPE_EXPR (t), args, - complain|tf_decltype, in_decl, - /*integral_constant_expression_p=*/false); + type = tsubst_copy_and_build (DECLTYPE_TYPE_EXPR (t), args, + complain|tf_decltype, in_decl, + /*function_p*/false, + /*integral_constant_expression*/false); --cp_unevaluated_operand; --c_inhibit_evaluation_warnings;
Re: [C++11][4.9] Add missing REDUC_PLUS_EXPR case to potential_constant_expression_1.
Richard Biener wrote: On Wed, Apr 10, 2013 at 12:50 PM, James Greenhalgh james.greenha...@arm.com wrote: -Original Message- From: dosr...@gmail.com [mailto:dosr...@gmail.com] On Behalf Of Gabriel Dos Reis Sent: 20 March 2013 19:09 To: James Greenhalgh Cc: Jakub Jelinek; Richard Biener; gcc-patches@gcc.gnu.org; Jason Merrill; m...@codesourcery.com Subject: Re: [C++11][4.9] Add missing REDUC_PLUS_EXPR case to potential_constant_expression_1. On Wed, Mar 20, 2013 at 1:03 PM, James Greenhalgh james.greenha...@arm.com wrote: Is that be sensible? It certainly seems like someone intended to explicitly enumerate all the possible cases and ensure that they were correctly handled. That someone would be me. We need to catch loudly any front-end tree code, e.g. ASTs, object we may have missed, as opposed to silently ignoring them with possible miscompilation and pray that someone might be sufficiently pissed off and report it as a bug. What is wrong isn't that the front-end inserts internal coverage check; rather it is the fact that we don't have enough separation between front-end asts and middle-end stuff. The convenience of adding a middle-end optimization (which this essentially is) should not trump correctness of the implementation of standard semantics. So, as far as I can see no decision came out of this thread as to what should be done. In that time I had to add another few tree cases as I added more things to TARGET_FOLD_BUILTIN. I'd like to start pushing some of these TARGET_FOLD_BUILTIN patches upstream, but they currently all hinge on resolving this discussion. I still think getting rid of TARGET_FOLD_BUILTIN and replacing it with TARGET_FOLD_STMT that only operates on GIMPLE is the way to go. Correct me if I'm wrong - as I currently understand the mid-end, TARGET_FOLD_BUILTIN could be used to fold builtins into tree expressions pre-vectorization so that the vectorizer could pick it up for optimization opportunities. Will folding to GIMPLE not defeat the purpose of folding builtins early on in GENERIC/Trees to expose them to early optimizations? The main purpose of using TARGET_FOLD_BUILTIN was to fold NEON builtins to trees so that they could go through the vectorizer for optimizations. If we go directly to GIMPLE, won't we lose this advantage? Thanks, Tejas Belagod ARM. One of the issues we hit is that it's not well-defined what tree codes are supposed to be part of GENERIC and which only part of GIMPLE (in case we want to support GENERIC tree codes not being a superset of GIMPLE tree codes at all). If they are part of GENERIC then the C++ frontend needs to handle them as folding can introduce all GENERIC tree codes. Richard. Would it be OK for this patch to go in, I know the thread started well for me with: -Original Message- From: Jason Merrill [mailto:ja...@redhat.com] Sent: 14 March 2013 18:52 To: James Greenhalgh; gcc-patches@gcc.gnu.org Cc: m...@codesourcery.com Subject: Re: [C++11][4.9] Add missing REDUC_PLUS_EXPR case to potential_constant_expression_1. On 03/14/2013 09:48 AM, James Greenhalgh wrote: Is this OK to commit to 4.9 when stage 1 opens up? Yes, but please add the other new tree codes as well. Jason But quickly moved on to discussion, so I didn't commit the patch. Thanks, James Greenhalgh Graduate Engineer ARM --- gcc/ 2013-04-09 James Greenhalgh james.greenha...@arm.com * cp/semantics.c (potential_constant_expression_1): Add cases for REDUC_PLUS_EXPR, REDUC_MIN_EXPR, REDUC_MAX_EXPR.
Re: [SPARC] Fix PR target/56890
No objections. Thanks. We can actually support this by adding patterns for the partial store instructions, which can store 8-bit and 16-bit quantities from FP registers. Ah, indeed, with -mvis. Not clear whether that would really be worthwhile. -- Eric Botcazou
Re: [SPARC] Fix PR target/56890
From: Eric Botcazou ebotca...@adacore.com Date: Mon, 15 Apr 2013 18:07:05 +0200 We can actually support this by adding patterns for the partial store instructions, which can store 8-bit and 16-bit quantities from FP registers. Ah, indeed, with -mvis. Not clear whether that would really be worthwhile. Well, %99 of sparc cpus used with gcc these days are -mvis capable, and popping the value to the integer register file via the stack in these situations is needless overhead.
[PATCH, ARM] Prologue/epilogue using STRD/LDRD in ARM mode
Generate prologue/epilogue using STRD/LDRD in ARM mode, when tuning prefer_ldrd_strd flag is set, such as in Cortex-A15. The previous version of this patch was posted for review here: http://gcc.gnu.org/ml/gcc-patches/2012-10/msg00995.html The new version includes the following improvements: (1) For prologue, it generates STRD whenever possible, otherwise it generate single-word loads, instead of STM. This allows us to use offset addressing with STRD, instead of writeback on every store used in the previous version of this patch. Similarly, for epilogue. To allow epilogue returns by loading directly into PC, a separate stack update instruction is emitted before the final load into PC. (2) The previous version of this patch causes an ICE in arm_emit_strd_push, when gcc is called with -fno-omit-frame-pointer -mapcs-frame command-line options. It is fixed in the attached patch, where arm_emit_strd_push is not called when TARGET_APCS_FRAME holds (epilogue already has a similar condition). (3) The previous version of the patch generated incorrect return sequences for interrupt function. This version fixes it by using the original LDM epilogues for interrupt functions. No need to change the tests gcc.target/arm/interrupt-*.c. (4) Takes assert statements out of the loop, addressing a comment made about a related patch, also relevant here. (5) Improves dwarf info generation. No regression on qemu for arm-none-eabi cortex-a15. Bootstrap successful on A15 TC2. Spec2k overall slight performance improvement (less than 1%) on Cortex-A15 TC2. Out of 26 benchmarks, 4 show regression of 2.5% or less (benchmarks 186,254,255,178). Other benchmarks show improvements or no change. Size increase overall by 1.4%. No clear correlation between performance and size increase. Ok for trunk? Thanks, Greta ChangeLog gcc/ 2013-04-15 Greta Yorsh Greta.Yorsh at arm.com * config/arm/arm.c (emit_multi_reg_push): New declaration for an existing function. (arm_emit_strd_push): New function. (arm_expand_prologue): Used here. (arm_emit_ldrd_pop): New function. (arm_expand_epilogue): Used here. (arm_get_frame_offsets): Update condition. (arm_emit_multi_reg_pop): Add a special case for load of a single register with writeback.diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 982487e..833d092 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -173,6 +173,7 @@ static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); static tree arm_builtin_decl (unsigned, bool); static void emit_constant_insn (rtx cond, rtx pattern); static rtx emit_set_insn (rtx, rtx); +static rtx emit_multi_reg_push (unsigned long); static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode, tree, bool); static rtx arm_function_arg (cumulative_args_t, enum machine_mode, @@ -16690,6 +16691,148 @@ thumb2_emit_strd_push (unsigned long saved_regs_mask) return; } +/* STRD in ARM mode requires consecutive registers. This function emits STRD + whenever possible, otherwise it emits single-word stores. The first store + also allocates stack space for all saved registers, using writeback with + post-addressing mode. All other stores use offset addressing. If no STRD + can be emitted, this function emits a sequence of single-word stores, + and not an STM as before, because single-word stores provide more freedom + scheduling and can be turned into an STM by peephole optimizations. */ +static void +arm_emit_strd_push (unsigned long saved_regs_mask) +{ + int num_regs = 0; + int i, j, dwarf_index = 0; + int offset = 0; + rtx dwarf = NULL_RTX; + rtx insn = NULL_RTX; + rtx tmp, mem; + + /* TODO: A more efficient code can be emitted by changing the + layout, e.g., first push all pairs that can use STRD to keep the + stack aligned, and then push all other registers. */ + for (i = 0; i = LAST_ARM_REGNUM; i++) +if (saved_regs_mask (1 i)) + num_regs++; + + gcc_assert (!(saved_regs_mask (1 SP_REGNUM))); + gcc_assert (!(saved_regs_mask (1 PC_REGNUM))); + gcc_assert (num_regs 0); + + /* Create sequence for DWARF info. */ + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1)); + + /* For dwarf info, we generate explicit stack update. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; + + /* Save registers. */ + offset = - 4 * num_regs; + j = 0; + while (j = LAST_ARM_REGNUM) +if (saved_regs_mask (1 j)) + { +if ((j % 2 == 0) + (saved_regs_mask (1 (j + 1 + { +/* Current register and previous register form register pair for + which STRD can be generated. */ +if (offset 0) +
Re: [Patch, Fortran, OOP] PR 56266: ICE on invalid in gfc_match_varspec
Le 13/04/2013 17:44, Mikael Morin a écrit : Indeed, that's invalid: And then, the call to gfc_match_varspec shouldn't be there in the first place. I'll test the following later. Index: primary.c === --- primary.c (révision 197949) +++ primary.c (copie de travail) @@ -1954,11 +1954,10 @@ gfc_match_varspec (gfc_expr *primary, int equiv_fl gcc_assert (!tail || !tail-next); - if (!(primary-expr_type == EXPR_VARIABLE - || (primary-expr_type == EXPR_STRUCTURE -primary-symtree primary-symtree-n.sym -primary-symtree-n.sym-attr.flavor))) - return MATCH_ERROR; + gcc_assert (primary-expr_type == EXPR_VARIABLE + || (primary-expr_type == EXPR_STRUCTURE + primary-symtree primary-symtree-n.sym + primary-symtree-n.sym-attr.flavor)); if (tbp-n.tb-is_generic) tbp_sym = NULL; @@ -3102,18 +3101,8 @@ gfc_match_rvalue (gfc_expr **result) gfc_error (Missing argument list in function '%s' at %C, sym-name); if (m != MATCH_YES) - { - m = MATCH_ERROR; - break; - } + m = MATCH_ERROR; - /* If our new function returns a character, array or structure -type, it might have subsequent references. */ - - m = gfc_match_varspec (e, 0, false, true); - if (m == MATCH_NO) - m = MATCH_YES; - break; generic_function:
Re: [PATCH] PR55033: Fix
Sebastian, Alan's patch is related to some original work by Richard Henderson and Richard has told both Alan and I that he eventually will review the patch. If someone else wants to review the patch before he gets to it, that would be great as well. Thanks, David would one of the global reviewers mind having a look at this? The comment #2 of PR55033 clearly shows that this is a compiler bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55033#c2 Alan Modra proposed a patch to fix this problem only one day after the bug report. This was in October 2012. Now this issue is still open due to a lack of global reviewer approval. We also have no disapproval. We have silence. It would be really nice if PR55033 can be fixed for GCC 4.8 and later.
Re: [Patch, Fortran, OOP] PR 56266: ICE on invalid in gfc_match_varspec
Indeed, that's invalid: And then, the call to gfc_match_varspec shouldn't be there in the first place. I'll test the following later. It seems like the parts you're removing have essentially been there since day zero. Would be interesting to know if (and where) your patch fails. Cheers, Janus
Re: [patch, fortran] Really fix PR 56782
Le 14/04/2013 16:21, Thomas Koenig a écrit : Hi Mikael, - (void) gfc_expr_walker (fcn, callback_reduction, NULL); why remove this? Because it is not needed, as the test case _46 shows. No need to run this twice, it doesn't get better :-) Indeed, that's right. gfc_internal_error (Illegal id in insert_iterator_function); This duplicated code could probably be merged with copy_walk_reduction_arg. I thought about it. The reason why I didn't do it was because the expr to be wrapped inside the call is different. Hum, how different? I think callback_reduction's iterator handling should happen there as well. Like I said, it is done automatically by the expression walker. I don't really understand. Attached is what I had in mind. And a testcase (the '|| expr-expr_type == EXPR_FUNCTION' in copy_walk_reduction_arg appeared wrong to me, and it was seemingly). Mikael diff --git a/frontend-passes.c b/frontend-passes.c index 9749314..cf63318 100644 --- a/frontend-passes.c +++ b/frontend-passes.c @@ -192,37 +192,49 @@ optimize_expr (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED, old one can be freed. */ static gfc_expr * -copy_walk_reduction_arg (gfc_expr *e, gfc_expr *fn) +copy_walk_reduction_arg (gfc_constructor *c, gfc_expr *fn) { - gfc_expr *fcn; - gfc_isym_id id; + gfc_expr *fcn, *e = c-expr; - if (e-rank == 0 || e-expr_type == EXPR_FUNCTION) -fcn = gfc_copy_expr (e); - else + fcn = gfc_copy_expr (e); + if (c-iterator) +{ + gfc_constructor_base newbase; + gfc_expr *new_expr; + gfc_constructor *new_c; + + newbase = NULL; + new_expr = gfc_get_expr (); + new_expr-expr_type = EXPR_ARRAY; + new_expr-ts = e-ts; + new_expr-where = e-where; + new_expr-rank = 1; + new_c = gfc_constructor_append_expr (newbase, fcn, (e-where)); + new_c-iterator = c-iterator; + new_expr-value.constructor = newbase; + c-iterator = NULL; + + fcn = new_expr; +} + + if (fcn-rank != 0) { - id = fn-value.function.isym-id; + gfc_isym_id id = fn-value.function.isym-id; if (id == GFC_ISYM_SUM || id == GFC_ISYM_PRODUCT) - fcn = gfc_build_intrinsic_call (current_ns, - fn-value.function.isym-id, + fcn = gfc_build_intrinsic_call (current_ns, id, fn-value.function.isym-name, - fn-where, 3, gfc_copy_expr (e), - NULL, NULL); + fn-where, 3, fcn, NULL, NULL); else if (id == GFC_ISYM_ANY || id == GFC_ISYM_ALL) - fcn = gfc_build_intrinsic_call (current_ns, - fn-value.function.isym-id, + fcn = gfc_build_intrinsic_call (current_ns, id, fn-value.function.isym-name, - fn-where, 2, gfc_copy_expr (e), - NULL); + fn-where, 2, fcn, NULL); else gfc_internal_error (Illegal id in copy_walk_reduction_arg); fcn-symtree-n.sym-attr.access = ACCESS_PRIVATE; } - (void) gfc_expr_walker (fcn, callback_reduction, NULL); - return fcn; } @@ -305,10 +317,10 @@ callback_reduction (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED, - only have a single element in the array which contains an iterator. */ - if (c == NULL || (c-iterator != NULL gfc_constructor_next (c) == NULL)) + if (c == NULL) return 0; - res = copy_walk_reduction_arg (c-expr, fn); + res = copy_walk_reduction_arg (c, fn); c = gfc_constructor_next (c); while (c) @@ -320,7 +332,7 @@ callback_reduction (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED, new_expr-where = fn-where; new_expr-value.op.op = op; new_expr-value.op.op1 = res; - new_expr-value.op.op2 = copy_walk_reduction_arg (c-expr, fn); + new_expr-value.op.op2 = copy_walk_reduction_arg (c, fn); res = new_expr; c = gfc_constructor_next (c); } ! { dg-do run } ! { dg-options -ffrontend-optimize -fdump-tree-original } ! Test that nested array constructors are optimized. program main implicit none integer, parameter :: dp=selected_real_kind(15) real(kind=dp), dimension(2,2) :: a real(kind=dp) thirteen data a /2._dp,3._dp,5._dp,7._dp/ thirteen = 13._dp if (abs (product([[sum([eleven_ones()]), thirteen], a]) - 30030._dp) 1e-8) call abort contains function eleven_ones() real(kind=dp) :: eleven_ones(11) integer :: i eleven_ones = [ (1._dp, i=1,11) ] end function eleven_ones end program main ! { dg-final { scan-tree-dump-times while 4 original } } ! { dg-final { cleanup-tree-dump original } }
[patch] RFC: ix86 / x86_64 register pressure aware scheduling
Hello, The attached patch enables register pressure aware scheduling for the ix86 and x86_64 targets. It uses the optimistic algorithm to avoid being overly conservative. This is the same as what other CISCy targets, like s390, also do. The motivation for this patch is the excessive spilling I've observed in a few test cases with relatively large basic blocks, e.g. encryption algorithms and codecs. The patch passes bootstrap+testing on x86_64-unknown-linux-gnu and i686-unknown-linux-gnu, with a few new failures due to PR56950. Off-list, Uros, Honza and others have already looked at the patch and benchmarked it. For x86_64 there is an overall improvement for SPEC2k except that lucas regresses, but such a preliminary result is IMHO very promising. Comments/suggestions welcome :-) Ciao! Steven * common/config/i386/i386-common.c (ix86_option_optimization_table): Do not disable insns scheduling. Enable register pressure aware scheduling. * config/i386/i386.c (ix86_option_override): Use the alternative, optimistic scheduling-pressure algorithm by default. Index: common/config/i386/i386-common.c === --- common/config/i386/i386-common.c(revision 197941) +++ common/config/i386/i386-common.c(working copy) @@ -707,9 +707,15 @@ static const struct default_options ix86 { /* Enable redundant extension instructions removal at -O2 and higher. */ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 }, -/* Turn off -fschedule-insns by default. It tends to make the - problem with not enough registers even worse. */ -{ OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 }, +/* Enable -fsched-pressure by default for all optimization levels. + Before SCHED_PRESSURE_MODEL register-pressure aware schedule was + available, -fschedule-insns was turned off completely by default for + this port, because scheduling before register allocation tends to + make the problem with not enough registers even worse. However, + for very long basic blocks the scheduler can help bring register + pressure down significantly, and SCHED_PRESSURE_MODEL is still + conservative enough to avoid creating excessive register pressure. */ +{ OPT_LEVELS_ALL, OPT_fsched_pressure, NULL, 1 }, #ifdef SUBTARGET_OPTIMIZATION_OPTIONS SUBTARGET_OPTIMIZATION_OPTIONS, Index: config/i386/i386.c === --- config/i386/i386.c (revision 197941) +++ config/i386/i386.c (working copy) @@ -3936,6 +3936,10 @@ ix86_option_override (void) ix86_option_override_internal (true); + /* Use the alternative scheduling-pressure algorithm by default. */ + maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, +global_options.x_param_values, +global_options_set.x_param_values); /* This needs to be done at start up. It's convenient to do it here. */ register_pass (insert_vzeroupper_info);
*ping* - Re: [Patch, Fortran] PR56907 - do not 'pack' arrays passed to C_LOC
*ping* Also pending is the NO_ARGS_CHECK patch at: http://gcc.gnu.org/ml/fortran/2013-04/msg00120.html On April 10, Tobias Burnus wrote: Fortran 2008 supports C_LOC(array); if the argument is not simply contiguous, the current code adds a call to __gfortran_intrinsic_pack. The pack call shouldn't be there. Fortran 2008 demands that the actual argument is contiguous and intrinsic_pack copy creates a copy if the run-time check shows that the argument is not contiguous. Thus, it is not a wrong-code issue. However, for performance reasons, it makes sense to avoid the call __gfortran_intrinsic_pack. Build and regtested on x86-64-gnu-linux. OK for the trunk? Tobias
Re: [Patch, Fortran] PR39505 - add support for !GCC$ attributes NO_ARG_CHECK
Hi Tobias, Minor patch update due to Janus' gfc_explicit_interface_required patch. Build and regtested on x86-64-gnu-linux. OK for the trunk? I see that it can be useful, but I really dislike disabling the TKR checks. Can you put this behind an option so the user has to specify that he really means it? OK with this change; also OK if other people think that requiring such an option is a Bad Idea. Thomas
[PATCH] Add a new option -fstack-protector-strong
Hi, I'm to bring up this patch about '-fstack-protector-strong' for trunk. Background - some times stack-protector is too-simple while stack-protector-all over-kills, for example, to build one of our core systems, we forcibly add -fstack-protector-all to all compile commands, which brings big performance penalty (due to extra stack guard/check insns on function prologue and epilogue) on both atom and arm. To use -fstack-protector is just regarded as not secure enough (only protects 2% functions) by the system secure team. So I'd like to add the option -fstack-protector-strong, that hits the balance between -fstack-protector and -fstack-protector-all. Design - see end of email. Benefit - gain big performance while sacrificing little security (for scenarios comparing -fstack-protector-all vs. -fstack-protector-strong) Status - it has been in google/main for more than 1 year, building the chrome browser and chromeos with no security degradation over this period. Test - dejagnu c/c++ test on 64-bit ubuntu, bootstrap, build/run chrome browser and chromiumos. Testifies - LLVM developers are refering my design docs to implement this stack-protector-strong schema - http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-October/053931.html Thanks, Patch - diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c index 3e210d9..0059626 100644 --- a/gcc/c-family/c-cppbuiltin.c +++ b/gcc/c-family/c-cppbuiltin.c @@ -888,6 +888,8 @@ c_cpp_builtins (cpp_reader *pfile) /* Make the choice of the stack protector runtime visible to source code. The macro names and values here were chosen for compatibility with an earlier implementation, i.e. ProPolice. */ + if (flag_stack_protect == 3) +cpp_define (pfile, __SSP_STRONG__=3); if (flag_stack_protect == 2) cpp_define (pfile, __SSP_ALL__=2); else if (flag_stack_protect == 1) diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index a651d8c..8728842 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -1291,6 +1291,10 @@ clear_tree_used (tree block) clear_tree_used (t); } +#define SPCT_FLAG_ALL 2 +#define SPCT_FLAG_DEFAULT 1 +#define SPCT_FLAG_STRONG 3 + /* Examine TYPE and determine a bit mask of the following features. */ #define SPCT_HAS_LARGE_CHAR_ARRAY 1 @@ -1360,7 +1364,8 @@ stack_protect_decl_phase (tree decl) if (bits SPCT_HAS_SMALL_CHAR_ARRAY) has_short_buffer = true; - if (flag_stack_protect == 2) + if (flag_stack_protect == SPCT_FLAG_ALL || + flag_stack_protect == SPCT_FLAG_STRONG) { if ((bits (SPCT_HAS_SMALL_CHAR_ARRAY | SPCT_HAS_LARGE_CHAR_ARRAY)) !(bits SPCT_HAS_AGGREGATE)) @@ -1514,6 +1519,29 @@ estimated_stack_frame_size (struct cgraph_node *node) return size; } +/* Helper routine to check if a record or union contains an array field. */ + +static int +record_or_union_type_has_array_p (const_tree tree_type) +{ + tree fields = TYPE_FIELDS (tree_type); + tree f; + + for (f = fields; f; f = DECL_CHAIN (f)) +{ + if (TREE_CODE (f) == FIELD_DECL) + { + tree field_type = TREE_TYPE (f); + if (RECORD_OR_UNION_TYPE_P (field_type) + record_or_union_type_has_array_p (field_type)) +return 1; + if (TREE_CODE (field_type) == ARRAY_TYPE) +return 1; + } +} + return 0; +} + /* Expand all variables used in the function. */ static rtx @@ -1525,6 +1553,7 @@ expand_used_vars (void) struct pointer_map_t *ssa_name_decls; unsigned i; unsigned len; + int gen_stack_protect_signal = 0; /* Compute the phase of the stack frame for this function. */ { @@ -1576,6 +1605,23 @@ expand_used_vars (void) } pointer_map_destroy (ssa_name_decls); + FOR_EACH_LOCAL_DECL (cfun, i, var) +if (!is_global_var (var)) + { + tree var_type = TREE_TYPE (var); + /* Examine local referenced variables that have their addresses taken, + contain an array, or are arrays. */ + if (TREE_CODE (var) == VAR_DECL + (TREE_CODE (var_type) == ARRAY_TYPE + || TREE_ADDRESSABLE (var) + || (RECORD_OR_UNION_TYPE_P (var_type) + record_or_union_type_has_array_p (var_type + { +++gen_stack_protect_signal; +break; + } + } + /* At this point all variables on the local_decls with TREE_USED set are not associated with any block scope. Lay them out. */ @@ -1662,11 +1708,18 @@ expand_used_vars (void) dump_stack_var_partition (); } - /* There are several conditions under which we should create a - stack guard: protect-all, alloca used, protected decls present. */ - if (flag_stack_protect == 2 - || (flag_stack_protect - (cfun-calls_alloca || has_protected_decls))) + /* Create stack guard, if + a) -fstack-protector-all - always; + b) -fstack-protector-strong - if there are arrays, memory + references to local variables, alloca used, or protected decls present; + c) -fstack-protector - if alloca used, or protected decls present */ + if (flag_stack_protect == SPCT_FLAG_ALL + || (flag_stack_protect
Re: [Patch, Fortran] PR39505 - add support for !GCC$ attributes NO_ARG_CHECK
Am 15.04.2013 23:03, schrieb Thomas Koenig: Hi Tobias, Minor patch update due to Janus' gfc_explicit_interface_required patch. Build and regtested on x86-64-gnu-linux. OK for the trunk? I see that it can be useful, but I really dislike disabling the TKR checks. Can you put this behind an option so the user has to specify that he really means it? Well, it is difficult to write accidentally !GCC$ attributes NO_ARG_CHECKS :: args Additionally, for the purpose of libraries - such as MPI, it makes sense to disable the TKR check without requiring the users to always compile their programs with special options. Regarding an option: Would be -f(no-)directives (with default = on) a suitable option, which also affects the other !GCC$ attributes, such as dllexport etc.? OK with this change; also OK if other people think that requiring such an option is a Bad Idea. Tobias
Re: [Patch, fortran] PR 40958 Compress module files with zlib
Le 09/04/2013 20:33, Janne Blomqvist a écrit : Regtested on x86_64-unknown-linux-gnu, Ok for trunk? Looks good. OK. Thanks. (IMHO the increase in compile time is modest enough that it's not worth doing the caching of uncompressed module files that I was previously thinking about, especially considering that large projects are invariably(?!) split over several source files, thereby reducing the effectiveness of such a caching scheme.) Yeah, well... I shouldn't have discouraged you with my previous unwelcoming comment about it. As -fwhole-file is the default, it would be best for the testcase at hand (single file cp2K) to load symbols directly from the module namespaces. Otherwise, caching module files would be probably an improvement in some cases and a degradation in some other ones. Anyway, the runtime is dominated by middle-end optimizations as soon as they are enabled, so why bother? Mikael
Re: [Patch, Fortran] PR39505 - add support for !GCC$ attributes NO_ARG_CHECK
Tobias Burnus wrote: Am 15.04.2013 23:03, schrieb Thomas Koenig: I see that it can be useful, but I really dislike disabling the TKR checks. Side remark: TS29113 already allows to disable TK checks via TYPE(*). And I find disabling TRK checks very useful for certain applications - and MPI buffers is one. On has just to make sure that it is difficult to abuse it. The J3 team spend some time on it to ensure this for TYPE(*) - and the NO_ARG_CHECK makes use of that feature. Can you put this behind an option so the user has to specify that he really means it? Well, it is difficult to write accidentally !GCC$ attributes NO_ARG_CHECKS :: args Additionally, for the purpose of libraries - such as MPI, it makes sense to disable the TKR check without requiring the users to always compile their programs with special options. Regarding an option: Would be -f(no-)directives (with default = on) a suitable option, which also affects the other !GCC$ attributes, such as dllexport etc.? Namely, the attached patch? Tobias 2013-04-16 Tobias Burnus bur...@net-b.de PR fortran/39505 * gfortran.h (gfc_option_t): Add flag_directives. * gfortran.texi (GCC Fortran directives): Mention -fno-directives. * invoke.texi (-fno-directives): Add. * lang.opt (fdirectives): Add. * options.c (gfc_init_options, gfc_handle_option): Handle it. * scanner.c (skip_gcc_attribute): Ditto. diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index a69cea2..bb4ec1b 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -2300,6 +2300,7 @@ typedef struct int flag_realloc_lhs; int flag_aggressive_function_elimination; int flag_frontend_optimize; + int flag_directives; int fpe; int rtcheck; diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi index 61cb3bb..c2bb0d5 100644 --- a/gcc/fortran/gfortran.texi +++ b/gcc/fortran/gfortran.texi @@ -2663,6 +2663,9 @@ on the processor; see @ref{Top,,C Extensions,gcc,Using the GNU Compiler Collection (GCC)} for details. +Note that the parsing of the directives can be disabled via the +@option{-fno-directives} option. + For procedures and procedure pointers, the following attributes can be used to change the calling convention: diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi index 12c200e..9f250ad 100644 --- a/gcc/fortran/invoke.texi +++ b/gcc/fortran/invoke.texi @@ -116,7 +116,7 @@ by type. Explanations are in the following sections. @xref{Fortran Dialect Options,,Options controlling Fortran dialect}. @gccoptlist{-fall-intrinsics -fbackslash -fcray-pointer -fd-lines-as-code @gol -fd-lines-as-comments -fdefault-double-8 -fdefault-integer-8 @gol --fdefault-real-8 -fdollar-ok -ffixed-line-length-@var{n} @gol +-fdefault-real-8 -fdirectives -fdollar-ok -ffixed-line-length-@var{n} @gol -ffixed-line-length-none -ffree-form -ffree-line-length-@var{n} @gol -ffree-line-length-none -fimplicit-none -finteger-4-integer-8 @gol -fmax-identifier-length -fmodule-private -fno-fixed-form -fno-range-check @gol @@ -250,6 +250,11 @@ the kind of non-double real constants like @code{1.0}, and does promote the default width of @code{DOUBLE PRECISION} to 16 bytes if possible, unless @code{-fdefault-double-8} is given, too. +@item -fno-directives +@opindex @code{fdirectives} +@opindex @code{fno-directives} +Disable the GNU Fortran Compiler Directives (@code{!GCC$}) directives. + @item -fdollar-ok @opindex @code{fdollar-ok} @cindex @code{$} diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt index dbc3f6b..5fb1a13 100644 --- a/gcc/fortran/lang.opt +++ b/gcc/fortran/lang.opt @@ -389,6 +389,10 @@ fdollar-ok Fortran Allow dollar signs in entity names +fdirectives +Fortran +Enable !GCC$ directives + fdump-core Fortran Ignore Does nothing. Preserved for backward compatibility. diff --git a/gcc/fortran/options.c b/gcc/fortran/options.c index 3f5de03..00e6a50 100644 --- a/gcc/fortran/options.c +++ b/gcc/fortran/options.c @@ -122,6 +122,7 @@ gfc_init_options (unsigned int decoded_options_count, gfc_option.flag_integer4_kind = 0; gfc_option.flag_real4_kind = 0; gfc_option.flag_real8_kind = 0; + gfc_option.flag_directives = 1; gfc_option.flag_dollar_ok = 0; gfc_option.flag_underscoring = 1; gfc_option.flag_f2c = 0; @@ -766,6 +767,10 @@ gfc_handle_option (size_t scode, const char *arg, int value, gfc_option.flag_d_lines = 0; break; +case OPT_fdirectives: + gfc_option.flag_directives = value; + break; + case OPT_fdump_fortran_original: case OPT_fdump_parse_tree: gfc_option.dump_fortran_original = value; diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c index fd8f284..8c95f90 100644 --- a/gcc/fortran/scanner.c +++ b/gcc/fortran/scanner.c @@ -718,6 +718,9 @@ skip_gcc_attribute (locus start) char c; locus old_loc = gfc_current_locus; + if (!gfc_option.flag_directives) +return false; + if ((c = next_char ()) == 'g' || c == 'G')
Re: RFA: enable LRA for rs6000
I built the spec 2006 suite with/without Vlad's patches for enabling using the LRA register allocator for the powerpc. Because of the bug with the count register that was in the version I checked out, I have built things with the -fno-branch-count-reg option. I created a branch off of subversion id 197925 and applied Vlad's initial patches: svn+ssh://gcc.gnu.org/svn/gcc/branches/ibm/meissner-lra I can't put the spec files in a general mailing list, but I will make them available to Vlad as needed. On the 64-bit side, the wrf benchmark does not build: /home/meissner/fsf-install-ppc64/meissner-lra/bin/gfortran -c -o module_diffusion_em.fppized.o -I. -I./netcdf/include -g -save-temps=obj -ffast-math -O3 -mveclibabi=mass -mcpu=power7 -mrecip=rsqrt -fpeel-loops -funroll-loops -ftree-vectorize -fvect-cost-model -msave-toc-indirect -fno-aggressive-loop-optimizations -fno-branch-count-reg -mno-pointers-to-nested-functions -mlra -m64 module_diffusion_em.fppized.f90 module_diffusion_em.fppized.f90: In function 'compute_diff_metrics': module_diffusion_em.fppized.f90:5069:0: internal compiler error: in check_rtl, at lra.c:1999 END SUBROUTINE compute_diff_metrics ^ 0x1055e1bf check_rtl /home/meissner/fsf-src/meissner-lra/gcc/lra.c:1999 0x105604c3 lra(_IO_FILE*) /home/meissner/fsf-src/meissner-lra/gcc/lra.c:2374 0x10512f4b do_reload /home/meissner/fsf-src/meissner-lra/gcc/ira.c:4619 0x10512f4b rest_of_handle_reload /home/meissner/fsf-src/meissner-lra/gcc/ira.c:4731 Please submit a full bug report, with preprocessed source if appropriate. Please include the complete backtrace with any bug report. See http://gcc.gnu.org/bugs.html for instructions. specmake: *** [module_diffusion_em.fppized.o] Error 1 specmake: *** Waiting for unfinished jobs On the 32-bit side, both wrf and dealII benchmarks do not build. The wrf failure looks like the 64-bit failure, but the file being compiled is different: /home/meissner/fsf-install-ppc64/meissner-lra/bin/gfortran -c -o ESMF_Alarm.fppized.o -I. -I./netcdf/include -g -save-temps=obj -ffast-math -Ofast -mveclibabi=mass -mcpu=power7 -mrecip=rsqrt -fpeel-loops -funroll-loops -ftree-vectorize -fvect-cost-model -fno-aggressive-loop-optimizations -fno-branch-count-reg -mlra -m32 ESMF_Alarm.fppized.f90 module_soil_pre.fppized.f90:1184:0: internal compiler error: in check_rtl, at lra.c:1999 END SUBROUTINE init_soil_3_real ^ 0x1055e1bf check_rtl /home/meissner/fsf-src/meissner-lra/gcc/lra.c:1999 0x105604c3 lra(_IO_FILE*) /home/meissner/fsf-src/meissner-lra/gcc/lra.c:2374 0x10512f4b do_reload /home/meissner/fsf-src/meissner-lra/gcc/ira.c:4619 0x10512f4b rest_of_handle_reload /home/meissner/fsf-src/meissner-lra/gcc/ira.c:4731 Please submit a full bug report, with preprocessed source if appropriate. Please include the complete backtrace with any bug report. See http://gcc.gnu.org/bugs.html for instructions. specmake: *** [module_soil_pre.fppized.o] Error 1 specmake: *** Waiting for unfinished jobs Error with make 'specmake -j40 build': check file '/home/meissner/spec-build/spec-2006-base-dev49-power7-vsx-svn197925-nocountreg-lra-shared-at6.0-32bit/benchspec/CPU2006/481.wrf/build/build_base_dev49-power7-vsx-32bit./make.err' Command returned exit code 2 Error with make! *** Error building 481.wrf In dealII, quadrature_lib.cc and polynomial.cc don't build. /home/meissner/fsf-install-ppc64/meissner-lra/bin/g++ -c -o quadrature_lib.o -DSPEC_CPU -DNDEBUG -Iinclude -DBOOST_DISABLE_THREADS -Ddeal_II_dimension=3 -g -save-temps=obj -ffast-math -Ofast -mveclibabi=mass -mcpu=power7 -mrecip=rsqrt -fpeel-loops -funroll-loops -ftree-vectorize -fvect-cost-model -fno-aggressive-loop-optimizations -fno-branch-count-reg -mlra -m32 -DSPEC_CPU_LINUX -include cstddef quadrature_lib.cc quadrature_lib.cc: In constructor 'QGaussdim::QGauss(unsigned int) [with int dim = 1]': quadrature_lib.cc:95:1: internal compiler error: in check_rtl, at lra.c:1999 } ^ 0x106cb2bf check_rtl /home/meissner/fsf-src/meissner-lra/gcc/lra.c:1999 0x106cd5c3 lra(_IO_FILE*) /home/meissner/fsf-src/meissner-lra/gcc/lra.c:2374 0x1068004b do_reload /home/meissner/fsf-src/meissner-lra/gcc/ira.c:4619 0x1068004b rest_of_handle_reload /home/meissner/fsf-src/meissner-lra/gcc/ira.c:4731 Please submit a full bug report, with preprocessed source if appropriate. Please include the complete backtrace with any bug report. See http://gcc.gnu.org/bugs.html for instructions. specmake: *** [quadrature_lib.o] Error 1 /home/meissner/fsf-install-ppc64/meissner-lra/bin/g++ -c -o polynomial.o -DSPEC_CPU -DNDEBUG -Iinclude -DBOOST_DISABLE_THREADS -Ddeal_II_dimension=3 -g -save-temps=obj -ffast-math -Ofast -mveclibabi=mass -mcpu=power7 -mrecip=rsqrt -fpeel-loops -funroll-loops -ftree-vectorize -fvect-cost-model -fno-aggressive-loop-optimizations -fno-branch-count-reg -mlra -m32
Re: RFA: enable LRA for rs6000
On Tue, Apr 16, 2013 at 12:48 AM, Michael Meissner wrote: 0x1055e1bf check_rtl /home/meissner/fsf-src/meissner-lra/gcc/lra.c:1999 These are all cases of insns not satisfying their constraints. There are no PRs for this, and there are no test suite failures of this kind in the logs of my powerpc lra-branch test bot. I hope you can extract test cases and file PRs... Ciao! Steven
Re: RFA: enable LRA for rs6000
On Tue, Apr 16, 2013 at 01:03:35AM +0200, Steven Bosscher wrote: On Tue, Apr 16, 2013 at 12:48 AM, Michael Meissner wrote: 0x1055e1bf check_rtl /home/meissner/fsf-src/meissner-lra/gcc/lra.c:1999 These are all cases of insns not satisfying their constraints. There are no PRs for this, and there are no test suite failures of this kind in the logs of my powerpc lra-branch test bot. I hope you can extract test cases and file PRs... Yes of course, but I wanted to give Vlad a heads up, ASAP. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Re: [PATCH] V2DI zero constant in GPR (PR target/56948)
On Sat, Apr 13, 2013 at 03:48:13PM -0400, David Edelsohn wrote: V2DI mode is allowed in GPRs and the pattern predicate allows easy vector constants but the pattern in vsx.md does not provide an alternative for that case, which can lead to an ICE where the insn does not satisfy its constraints. The following patch adds an alternative for this case. I also noticed that the VSX movti_64bit pattern does not handle loading constants into a GPR. And both the movti_64bit and movti_32bit patterns use j-wa instead of O-wa. The j constraint will work because it will accept any mode, but I think that an O constraint is more accurate for a scalar mode like TImode. Because the failure depends on the details of register allocation, I do not have a short testcase. Comments? This looks right. Too bad we probably can't combine j/O constraints, due to them being used in asm. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Re: [PATCH, tree-ssa] Avoid -Wuninitialized warning in try_unroll_loop_completely()
2013/4/15 Jeff Law l...@redhat.com: On 04/13/2013 07:17 PM, Chung-Ju Wu wrote: Hi, I noticed there is an uninitialized variable warning when compiling tree-ssa-loop-ivcanon.c file. Attached patch is a slight modification to avoid the warning and a plaintext ChangeLog is as below. Is it OK for trunk? 2013-04-14 Chung-Ju Wu jasonw...@gmail.com * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Avoid -Wuninitialized warning. If this is a false positive (and I think it is from a very quick scan of the code), can you mark the initialization as such? /* Avoid false positive -Wuninitialized warning. */ Ideally this will become standard practice. jeff You are right. After doing survey on http://gcc.gnu.org/wiki/Better_Uninitialized_Warnings and reading related discussion thread, I realized it is a complicated detection and this is a false positive case. I was using gcc-4.6.3, which is provided by Ubuntu 12.04, and the warning is displayed during the compilation process. As I tried to build another native gcc by myself with current main trunk and used it to compile tree-ssa-loop-ivcanon.c again, there is no such warning at all. (See attachment for my console output.) So I am wondering if my patch is still valuable since such false positive warning is already fixed on trunk. Or do you think it is still good having the comment in the patch and then OK to commit it? :) Best regards, jasonwucj [jasonwucj@sw-compiler]$ g++ --version g++ (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3 Copyright (C) 2011 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. [jasonwucj@sw-compiler]$ g++ tree-ssa-loop-ivcanon.ii -S -O2 -Wall /home/jasonwucj/tmp/gcc-svn-trunk/gcc/tree-ssa-loop-ivcanon.c: In function ¡¥bool canonicalize_loop_induction_variables(loop*, bool, unroll_level, bool)¡¦: /home/jasonwucj/tmp/gcc-svn-trunk/gcc/tree-ssa-loop-ivcanon.c:866:46: warning: ¡¥n_unroll¡¦ may be used uninitialized in this function [-Wuninitialized] /home/jasonwucj/tmp/gcc-svn-trunk/gcc/tree-ssa-loop-ivcanon.c:655:17: note: ¡¥n_unroll¡¦ was declared here [jasonwucj@sw-compiler]$ toolchain/bin/g++ --version g++ (20130416) 4.9.0 20130415 (experimental) Copyright (C) 2013 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. [jasonwucj@sw-compiler]$ toolchain/bin/g++ tree-ssa-loop-ivcanon.ii -S -O2 -Wall [jasonwucj@sw-compiler]$
RE: [Patch/ARM] Cortex-M4 core pipeline patch to tune LDR/STR pairs
Hello Ramana, Can you please review my patch at http://gcc.gnu.org/ml/gcc-patches/2013-03/msg01252.html. Thanks. Terry -Original Message- From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches- ow...@gcc.gnu.org] On Behalf Of Terry Guo Sent: Friday, March 29, 2013 6:00 PM To: gcc-patches@gcc.gnu.org Subject: [Patch/ARM] Cortex-M4 core pipeline patch to tune LDR/STR pairs Hello, The attached pipeline patch intends to turn following code generation ldr r5, [r4, #12] adds r2, r2, #16 str r5, [r3, #8] to ldr r5, [r4, #12] str r5, [r3, #8] adds r2, r2, #16 The reason is that the STR can be started from the second cycle of its preceding LDR which takes 2 cycles, as long as the result of LDR isn't used as memory address of STR. Tested with various benchmarks on Cortex-M4 MPS. Except one regression caused by register allocation, the others either show performance improvement or no change. Is it OK to trunk? BR, Terry 2013-03-29 Terry Guo terry@arm.com * gcc/config/arm/cortex-m4.md: New bypass to tune LDR/STR pairs.