Re: [google] dump inline decisions to stderr under -fopt-info
There are a couple of problems with the patch (the patch is only compatible with 4_6 branch) 1) the dump_inline_decision should be called inside cgraph_mark_inline_edge when the edge is finally picked (and when the callee node is cloned) 2) The source location is not printed which makes the dumping less useful 3) the information line should clearly print out the final caller where the inlining happens, and the intermediate inline instance information should be clearly printed as additional information showing the inline context. For instance, foo1->foo2->foo3->foo4 If all callsites are inlined in top down order, the messages I expect to see are: a.c:5: note: 'foo2' is inlined into 'foo1' with call count a.c:10: note: 'foo3' is inlined into 'foo1' with call count (via inline instance 'foo2') a.c:20: note: 'foo4' is inlined into foo1' with call count (via inline instances 'foo3', 'foo2') If the decision is bottom up, the messages should look like: a.c:20: note: 'foo4' is inlined into 'foo3' with call count a.c:10: note: 'foo3' is inlined into 'foo2' with call count a.c:5: note: 'foo2' is inlined into 'foo1' with call count Ideally, the caller and callee should also be marked with 'entry count and max bb count' information -- but that probably should be controlled by opt-info level. 4) Also notice that for inline node clones, the right way to walk up the call context chain is via 'edge->caller->callers'. 5) there should be a test case. Thanks, David On Tue, Dec 13, 2011 at 5:13 PM, Dehao Chen wrote: > I've updated the patch to fix a bug in dump_inline_decision. > > Thanks, > Dehao > > On Thu, Dec 1, 2011 at 9:59 AM, Dehao Chen wrote: >> >> This patch is for google-{main|gcc_4.6} only. >> >> Tested with bootstrap and regression tests. >> >> Dump inline decisions, also output the inline chain. >> >> Dehao >> >> 2011-12-01 Dehao Chen >> >> * ipa-inline.c (dump_inline_decision): New function. >> (inline_small_functions): Use it to dump the inline decisions to >> stderr. >> >> Index: gcc/ipa-inline.c >> === >> --- gcc/ipa-inline.c (revision 181835) >> +++ gcc/ipa-inline.c (working copy) >> @@ -1377,6 +1377,45 @@ >> } >> >> >> +/* Dump the inline decision of EDGE to stderr. */ >> + >> +static void >> +dump_inline_decision (struct cgraph_edge *edge) >> +{ >> + location_t locus; >> + size_t buf_size = 4096; >> + size_t current_string_len = 0; >> + char *buf = (char *) xmalloc (buf_size); >> + struct cgraph_node *inlined_to; >> + gcov_type callee_count = edge->callee->count; >> + buf[0] = 0; >> + if (edge->inline_failed == CIF_OK && edge->callee->clone_of) >> + callee_count += edge->callee->clone_of->count; >> + for (inlined_to = edge->caller->global.inlined_to; >> + inlined_to; inlined_to = inlined_to->global.inlined_to) >> + { >> + const char *name = cgraph_node_name (inlined_to); >> + if (!name) >> + name = "unknown"; >> + current_string_len += (strlen (name) + 4); >> + while (current_string_len >= buf_size) >> + { >> + buf_size *= 2; >> + buf = (char *) xrealloc (buf, buf_size); >> + } >> + strcat (buf, "-->"); >> + strcat (buf, name); >> + } >> + locus = gimple_location (edge->call_stmt); >> + inform (locus, "%s ("HOST_WIDEST_INT_PRINT_DEC") --" >> + HOST_WIDEST_INT_PRINT_DEC"--> %s (" >> + HOST_WIDEST_INT_PRINT_DEC") %s : %s", >> + cgraph_node_name (edge->callee), callee_count, edge->count, >> + cgraph_node_name (edge->caller), edge->caller->count, buf, >> + edge->inline_failed == CIF_OK ? "INLINED": "IGNORED"); >> +} >> + >> + >> /* We use greedy algorithm for inlining of small functions: >> All inline candidates are put into prioritized heap ordered in >> increasing badness. >> @@ -1428,6 +1467,7 @@ >> overall_size = initial_size; >> max_size = compute_max_insns (overall_size); >> min_size = overall_size; >> + edge = NULL; >> >> /* Populate the heeap with all edges we might inline. */ >> >> @@ -1462,6 +1502,9 @@ >> int current_badness; >> int growth; >> >> + if (edge && flag_opt_info >= OPT_INFO_MIN) >> + dump_inline_decision (edge); >> + >> edge = (struct cgraph_edge *) fibheap_extract_min (heap); >> gcc_assert (edge->aux); >> edge->aux = NULL; >> @@ -1482,6 +1525,7 @@ >> if (current_badness != badness) >> { >> edge->aux = fibheap_insert (heap, current_badness, edge); >> + edge = NULL; >> continue; >> } >> >> @@ -1636,6 +1680,8 @@ >> fprintf (dump_file, "New minimal size reached: %i\n", min_size); >> } >> } >> + if (edge && flag_opt_info >= OPT_INFO_MIN) >> + dump_inline_decision (edge); >> >> free_growth_caches (); >> if (new_indirect_edges)
Re: [google] dump inline decisions to stderr under -fopt-info
Sorry, forgot to attach the patch... Dehao On Wed, Dec 14, 2011 at 9:13 AM, Dehao Chen wrote: > I've updated the patch to fix a bug in dump_inline_decision. > > Thanks, > Dehao > > On Thu, Dec 1, 2011 at 9:59 AM, Dehao Chen wrote: >> >> This patch is for google-{main|gcc_4.6} only. >> >> Tested with bootstrap and regression tests. >> >> Dump inline decisions, also output the inline chain. >> >> Dehao >> >> 2011-12-01 Dehao Chen >> >> * ipa-inline.c (dump_inline_decision): New function. >> (inline_small_functions): Use it to dump the inline decisions to >> stderr. >> >> Index: gcc/ipa-inline.c >> === >> --- gcc/ipa-inline.c (revision 181835) >> +++ gcc/ipa-inline.c (working copy) >> @@ -1377,6 +1377,45 @@ >> } >> >> >> +/* Dump the inline decision of EDGE to stderr. */ >> + >> +static void >> +dump_inline_decision (struct cgraph_edge *edge) >> +{ >> + location_t locus; >> + size_t buf_size = 4096; >> + size_t current_string_len = 0; >> + char *buf = (char *) xmalloc (buf_size); >> + struct cgraph_node *inlined_to; >> + gcov_type callee_count = edge->callee->count; >> + buf[0] = 0; >> + if (edge->inline_failed == CIF_OK && edge->callee->clone_of) >> + callee_count += edge->callee->clone_of->count; >> + for (inlined_to = edge->caller->global.inlined_to; >> + inlined_to; inlined_to = inlined_to->global.inlined_to) >> + { >> + const char *name = cgraph_node_name (inlined_to); >> + if (!name) >> + name = "unknown"; >> + current_string_len += (strlen (name) + 4); >> + while (current_string_len >= buf_size) >> + { >> + buf_size *= 2; >> + buf = (char *) xrealloc (buf, buf_size); >> + } >> + strcat (buf, "-->"); >> + strcat (buf, name); >> + } >> + locus = gimple_location (edge->call_stmt); >> + inform (locus, "%s ("HOST_WIDEST_INT_PRINT_DEC") --" >> + HOST_WIDEST_INT_PRINT_DEC"--> %s (" >> + HOST_WIDEST_INT_PRINT_DEC") %s : %s", >> + cgraph_node_name (edge->callee), callee_count, edge->count, >> + cgraph_node_name (edge->caller), edge->caller->count, buf, >> + edge->inline_failed == CIF_OK ? "INLINED": "IGNORED"); >> +} >> + >> + >> /* We use greedy algorithm for inlining of small functions: >> All inline candidates are put into prioritized heap ordered in >> increasing badness. >> @@ -1428,6 +1467,7 @@ >> overall_size = initial_size; >> max_size = compute_max_insns (overall_size); >> min_size = overall_size; >> + edge = NULL; >> >> /* Populate the heeap with all edges we might inline. */ >> >> @@ -1462,6 +1502,9 @@ >> int current_badness; >> int growth; >> >> + if (edge && flag_opt_info >= OPT_INFO_MIN) >> + dump_inline_decision (edge); >> + >> edge = (struct cgraph_edge *) fibheap_extract_min (heap); >> gcc_assert (edge->aux); >> edge->aux = NULL; >> @@ -1482,6 +1525,7 @@ >> if (current_badness != badness) >> { >> edge->aux = fibheap_insert (heap, current_badness, edge); >> + edge = NULL; >> continue; >> } >> >> @@ -1636,6 +1680,8 @@ >> fprintf (dump_file, "New minimal size reached: %i\n", min_size); >> } >> } >> + if (edge && flag_opt_info >= OPT_INFO_MIN) >> + dump_inline_decision (edge); >> >> free_growth_caches (); >> if (new_indirect_edges) Index: ipa-inline.c === --- ipa-inline.c(revision 181835) +++ ipa-inline.c(working copy) @@ -1073,6 +1073,44 @@ return false; } +/* Dump the inline decision of EDGE to stderr. */ + +static void +dump_inline_decision (struct cgraph_edge *edge) +{ + location_t locus; + size_t buf_size = 4096; + size_t current_string_len = 0; + char *buf = (char *) xmalloc (buf_size); + struct cgraph_node *inlined_to; + gcov_type callee_count = edge->callee->count; + buf[0] = 0; + if (edge->inline_failed == CIF_OK && edge->callee->clone_of) +callee_count += edge->callee->clone_of->count; + for (inlined_to = edge->caller->global.inlined_to; + inlined_to; inlined_to = inlined_to->global.inlined_to) +{ + const char *name = cgraph_node_name (inlined_to); + if (!name) + name = "unknown"; + current_string_len += (strlen (name) + 4); + while (current_string_len >= buf_size) + { + buf_size *= 2; + buf = (char *) xrealloc (buf, buf_size); + } + strcat (buf, "-->"); + strcat (buf, name); +} + locus = gimple_location (edge->call_stmt); + inform (locus, "%s ("HOST_WIDEST_INT_PRINT_DEC") --" + HOST_WIDEST_INT_PRINT_DEC"--> %s (" + HOST_WIDEST_INT_PRINT_DEC") %s : %s", + xstrdup (cgraph_node_name (edge->callee)), callee_count, edge->count, + xstrdup (cgraph_node_name (edge->caller)), edge->caller->count, buf, + edge->inline_failed == CIF
Re: [Patch,AVR] ad PR50931: Implement mulpsi3
2011/12/13 Georg-Johann Lay : > This adds missing 24-Bit multiplication. > > Besides implementing __mulpsi3 in libgcc the patch adds some minor tweaks to > multiply with small numbers and to represent the reduced register footprint of > asm implementations. > > With this patch PR50931 is complete from the target side. > (But there is still a segfault in the frontends, see PR51527) > > Ok for trunk? > > Johann > > libgcc/ > PR target/50931 > * config/avr/t-avr (LIB1ASMSRC): Add _mulpsi3, _mulsqipsi3. > * config/avr/lib1funcs.S (__mulpsi3, __mulsqipsi3): New functions. > gcc/ > PR target/50931 > * config/avr/avr.md (mulpsi3): New expander. > (*umulqihipsi3, *umulhiqipsi3): New insns. > (*mulsqipsi3.libgcc, *mulpsi3.libgcc): New insns. > (mulsqipsi3, *mulpsi3): New insn-and-splits. > (ashlpsi3): Turn to expander. Move insn code to... > (*ashlpsi3): ...this new insn. > testsuite/ > PR target/50931 > * gcc.target/avr/torture/int24-mul.c: New testcase. > Approved. Denis.
Re: Fix flags for edges from/to entry/exit basic blocks (issue5486043)
> From: Dmitry Vyukov > Date: Tue, 13 Dec 2011 11:43:18 +0100 > On Tue, Dec 13, 2011 at 3:04 AM, Hans-Peter Nilsson > wrote: > > Details in PR51521. > Sorry for the trouble. What should I do now? Do what Diego and others asked, roll back your patch immediately; I see it hasn't happened yet. Normally, there'd be a 48h grace period, but since you didn't follow protocol regarding testing, it should be reverted immediately. > Just in case I've prepared a patch that rolls it back: > http://codereview.appspot.com/5485054 I'm not going there, patches for trunk must be sent to gcc-patches@. But, rolling back your patch means only one thing wrt. the contents, no need to post it. You don't even have to ask for permission, see the docs: left as an exercise to find the exact sentence. brgds, H-P
C++ PATCH for c++/51406, 51161 (wrong-code with static cast to rvalue ref)
The code for casting to rvalue ref was assuming that no base adjustment would be necessary. This patch delegates to the normal lvalue binding code, and then changes the result to be an rvalue reference. Tested x86_64-pc-linux-gnu, applying to trunk. Will apply to 4.5/4.6 as well after testing. commit ed6825825765c8c02a53dc011e252ddfacc9f5e5 Author: Jason Merrill Date: Tue Dec 13 23:02:02 2011 -0500 PR c++/51406 PR c++/51161 * typeck.c (build_static_cast_1): Fix cast of lvalue to base rvalue reference. diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c index 4973d7d..b168963 100644 --- a/gcc/cp/typeck.c +++ b/gcc/cp/typeck.c @@ -5856,12 +5856,22 @@ build_static_cast_1 (tree type, tree expr, bool c_cast_p, cv2 T2 if cv2 T2 is reference-compatible with cv1 T1 (8.5.3)." */ if (TREE_CODE (type) == REFERENCE_TYPE && TYPE_REF_IS_RVALUE (type) - && lvalue_or_rvalue_with_address_p (expr) + && real_lvalue_p (expr) && reference_related_p (TREE_TYPE (type), intype) && (c_cast_p || at_least_as_qualified_p (TREE_TYPE (type), intype))) { - expr = build_typed_address (expr, type); - return convert_from_reference (expr); + /* Handle the lvalue case here by casting to lvalue reference and + then changing it to an rvalue reference. Casting an xvalue to + rvalue reference will be handled by the main code path. */ + tree lref = cp_build_reference_type (TREE_TYPE (type), false); + result = (perform_direct_initialization_if_possible + (lref, expr, c_cast_p, complain)); + result = cp_fold_convert (type, result); + /* Make sure we don't fold back down to a named rvalue reference, + because that would be an lvalue. */ + if (DECL_P (result)) + result = build1 (NON_LVALUE_EXPR, type, result); + return convert_from_reference (result); } /* Resolve overloaded address here rather than once in diff --git a/gcc/testsuite/g++.dg/cpp0x/rv-cast3.C b/gcc/testsuite/g++.dg/cpp0x/rv-cast3.C new file mode 100644 index 000..6c70324 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/rv-cast3.C @@ -0,0 +1,17 @@ +// PR c++/51406 +// { dg-do run { target c++11 } } + +extern "C" int printf(const char *,...); +extern "C" void abort(); + +struct A { int a; A() : a(1) {} }; +struct B { int b; B() : b(2) {} }; +struct X : A, B {}; + +int main() { +X x; +int a=static_cast(x).a; +int b=static_cast(x).b; +// printf ("%d %d\n", a, b); +if (a!=1 || b!=2) abort(); +} diff --git a/gcc/testsuite/g++.dg/cpp0x/rv-cast4.C b/gcc/testsuite/g++.dg/cpp0x/rv-cast4.C new file mode 100644 index 000..13f369d --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/rv-cast4.C @@ -0,0 +1,13 @@ +// PR c++/51161 +// { dg-do compile { target c++11 } } + +struct A{}; +struct B : A{}; +struct C : A{}; +struct D : B, C{}; + +int main() +{ + D d; + static_cast(d); // { dg-error "ambiguous" } +}
[PATCH] MIPS16: Fix truncated DWARF-2 line information
Hi, I've noticed in the presence of a specific MIPS16 function thunk, GCC fails to emit suitable DWARF-2 location directives, which in turn causes DWARF-2 line records to provide truncated information. This is probably best illustrated with an example. Given the following source code: $ cat sinfrob16.c int i; double sinfrob16(double d) { i++; return d; } we get this: $ mips-linux-gnu-gcc -mips16 -Wa,-call_nonpic -fPIC -G0 -g -S sinfrob16.c $ cat sinfrob16.s .section .mdebug.abi32 .previous .gnu_attribute 4, 1 .abicalls .text $Ltext0: .cfi_sections .debug_frame .comm i,4,4 .align 2 .globl sinfrob16 $LFB0 = . .file 1 "sinfrob16.c" .loc 1 4 0 .cfi_startproc # Stub function for sinfrob16 (double) .section.mips16.fn.sinfrob16,"ax",@progbits .align 2 .setnomips16 .ent__fn_stub_sinfrob16 .type __fn_stub_sinfrob16, @function __fn_stub_sinfrob16: .setnoreorder .cpload $25 .setreorder .reloc 0,R_MIPS_NONE,sinfrob16 la $25,__fn_local_sinfrob16 mfc1$5,$f12 mfc1$4,$f13 jr $25 .end__fn_stub_sinfrob16 __fn_local_sinfrob16 = sinfrob16 .text .setmips16 .entsinfrob16 .type sinfrob16, @function sinfrob16: .frame $17,8,$31 # vars= 0, regs= 2/0, args= 0, gp= 0 .mask 0x8002,-4 .fmask 0x,0 li $2,%hi(_gp_disp) addiu $3,$pc,%lo(_gp_disp) sll $2,16 addu$2,$3 save8,$17,$31 $LCFI0 = . .cfi_def_cfa_offset 8 .cfi_offset 31, -4 .cfi_offset 17, -8 move$17,$sp $LCFI1 = . .cfi_def_cfa_register 17 move$28,$2 .loc 1 5 0 move$2,$28 move$24,$2 .loc 1 4 0 sw $5,12($17) sw $4,8($17) .loc 1 5 0 move$3,$24 lw $2,%got(i)($3) lw $2,0($2) addiu $2,1 move$3,$24 lw $3,%got(i)($3) move$24,$3 move$3,$24 sw $2,0($3) .loc 1 6 0 lw $3,12($17) lw $2,8($17) move$25,$3 move$24,$2 move$3,$25 move$2,$24 move$25,$3 move$24,$2 .loc 1 7 0 move$3,$25 move$2,$24 move$6,$28 lw $6,%got(__mips16_ret_df)($6) jalr$6 lw $6,0($17) move$28,$6 move$sp,$17 $LCFI2 = . .cfi_def_cfa_register 29 restore 8,$17,$31 $LCFI3 = . .cfi_restore 17 .cfi_restore 31 .cfi_def_cfa_offset 0 j $31 .endsinfrob16 .cfi_endproc $LFE0: .size sinfrob16, .-sinfrob16 $Letext0: .section.debug_info,"",@progbits $Ldebug_info0: .4byte 0x6f .2byte 0x2 .4byte $Ldebug_abbrev0 .byte 0x4 .uleb128 0x1 .4byte $LASF1 .byte 0x1 .4byte $LASF2 .4byte $LASF3 .4byte $Ldebug_ranges0+0 .4byte 0 .4byte 0 .4byte $Ldebug_line0 .uleb128 0x2 .byte 0x1 .4byte $LASF4 .byte 0x1 .byte 0x3 .byte 0x1 .4byte 0x54 .4byte $LFB0 .4byte $LFE0 .4byte $LLST0 .byte 0x1 .4byte 0x54 .uleb128 0x3 .ascii "d\000" .byte 0x1 .byte 0x3 .4byte 0x54 .byte 0x2 .byte 0x91 .sleb128 0 .byte 0 .uleb128 0x4 .byte 0x8 .byte 0x4 .4byte $LASF0 .uleb128 0x5 .ascii "i\000" .byte 0x1 .byte 0x1 .4byte 0x6b .byte 0x1 .byte 0x5 .byte 0x3 .4byte i .uleb128 0x6 .byte 0x4 .byte 0x5 .ascii "int\000" .byte 0 .section.debug_abbrev,"",@progbits $Ldebug_abbrev0: .uleb128 0x1 .uleb128 0x11 .byte 0x1 .uleb128 0x25 .uleb128 0xe .uleb128 0x13 .uleb128 0xb .uleb128 0x3 .uleb128 0xe .uleb128 0x1b .uleb128 0xe .uleb128 0x55 .uleb128 0x6 .uleb128 0x11 .uleb128 0x1 .uleb128 0x52 .uleb128 0x1 .uleb128 0x10 .uleb128 0x6 .byte 0 .byte 0 .uleb128 0x2 .uleb128 0x2e .byte 0x1 .uleb128 0x3f .uleb128 0xc .uleb128 0x3 .uleb128 0xe .uleb128 0x3a .uleb128 0xb .uleb128 0x3b .uleb128 0xb .uleb128 0x27 .uleb128 0xc .uleb128 0x4
Go patch committed: Move reading export data to gcc interface
This patch moves reading export data from an object file out of the Go frontend proper into the gcc side of the interface. The purpose of this move is to let the gcc side entirely control the name of the Mach-O segment and section where the export data is stored. Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Committed to mainline. Ian 2011-12-13 Ian Lance Taylor * go-backend.c: #include "simple-object.h" and "intl.h". (GO_EXPORT_SEGMENT_NAME): Define if not defined. (GO_EXPORT_SECTION_NAME): Likewise. (go_write_export_data): Use GO_EXPORT_SECTION_NAME. (go_read_export_data): New function. * go-c.h (go_read_export_data): Declare. Index: gcc/go/gofrontend/import.h === --- gcc/go/gofrontend/import.h (revision 181813) +++ gcc/go/gofrontend/import.h (working copy) @@ -287,7 +287,7 @@ class Stream_from_string : public Import size_t pos_; }; -// Read import data from an allocated buffer. +// Read import data from a buffer allocated using malloc. class Stream_from_buffer : public Import::Stream { @@ -297,7 +297,7 @@ class Stream_from_buffer : public Import { } ~Stream_from_buffer() - { delete[] this->buf_; } + { free(this->buf_); } protected: bool Index: gcc/go/gofrontend/import.cc === --- gcc/go/gofrontend/import.cc (revision 181889) +++ gcc/go/gofrontend/import.cc (working copy) @@ -215,46 +215,24 @@ Import::find_object_export_data(const st off_t offset, Location location) { - const char* errmsg; + char *buf; + size_t len; int err; - simple_object_read* sobj = simple_object_start_read(fd, offset, - "__GNU_GO", - &errmsg, &err); - if (sobj == NULL) -return NULL; - - off_t sec_offset; - off_t sec_length; - int found = simple_object_find_section(sobj, ".go_export", &sec_offset, - &sec_length, &errmsg, &err); - - simple_object_release_read(sobj); - - if (!found) -return NULL; - - if (lseek(fd, offset + sec_offset, SEEK_SET) < 0) + const char *errmsg = go_read_export_data(fd, offset, &buf, &len, &err); + if (errmsg != NULL) { - error_at(location, "lseek %s failed: %m", filename.c_str()); + if (err == 0) + error_at(location, "%s: %s", filename.c_str(), errmsg); + else + error_at(location, "%s: %s: %s", filename.c_str(), errmsg, + xstrerror(err)); return NULL; } - char* buf = new char[sec_length]; - ssize_t c = read(fd, buf, sec_length); - if (c < 0) -{ - error_at(location, "read %s failed: %m", filename.c_str()); - delete[] buf; - return NULL; -} - if (c < sec_length) -{ - error_at(location, "%s: short read", filename.c_str()); - delete[] buf; - return NULL; -} + if (buf == NULL) +return NULL; - return new Stream_from_buffer(buf, sec_length); + return new Stream_from_buffer(buf, len); } // Class Import. Index: gcc/go/go-c.h === --- gcc/go/go-c.h (revision 181628) +++ gcc/go/go-c.h (working copy) @@ -69,6 +69,8 @@ extern void go_imported_unsafe (void); extern void go_write_export_data (const char *, unsigned int); +extern const char *go_read_export_data (int, off_t, char **, size_t *, int *); + #if defined(__cplusplus) && !defined(ENABLE_BUILD_WITH_CXX) } /* End extern "C". */ #endif Index: gcc/go/go-backend.c === --- gcc/go/go-backend.c (revision 181628) +++ gcc/go/go-backend.c (working copy) @@ -20,16 +20,31 @@ along with GCC; see the file COPYING3. #include "config.h" #include "system.h" #include "coretypes.h" +#include "simple-object.h" #include "tm.h" #include "rtl.h" #include "tree.h" #include "tm_p.h" +#include "intl.h" #include "output.h" #include "target.h" #include "common/common-target.h" #include "go-c.h" +/* The segment name we pass to simple_object_start_read to find Go + export data. */ + +#ifndef GO_EXPORT_SEGMENT_NAME +#define GO_EXPORT_SEGMENT_NAME "__GNU_GO" +#endif + +/* The section name we use when reading and writing export data. */ + +#ifndef GO_EXPORT_SECTION_NAME +#define GO_EXPORT_SECTION_NAME ".go_export" +#endif + /* This file holds all the cases where the Go frontend needs information from gcc's backend. */ @@ -95,7 +110,7 @@ go_imported_unsafe (void) } /* This is called by the Go frontend proper to add data to the - .go_export section. */ + section containing Go export data. */ void go_write_export_data (const char *bytes, unsigned int size) @@ -105,9 +120,87 @@ go_write_export_data (const char *bytes, if (sec == NULL) { gcc_assert (targetm_common.have_named_sections); - sec = get_section (".go_export", SECTION_DEBUG, NULL); + sec = get_section (GO_EXPORT_SECTION_NAME, SECTION_DEBUG, NULL); }
Re: [google] dump inline decisions to stderr under -fopt-info
I've updated the patch to fix a bug in dump_inline_decision. Thanks, Dehao On Thu, Dec 1, 2011 at 9:59 AM, Dehao Chen wrote: > > This patch is for google-{main|gcc_4.6} only. > > Tested with bootstrap and regression tests. > > Dump inline decisions, also output the inline chain. > > Dehao > > 2011-12-01 Dehao Chen > > * ipa-inline.c (dump_inline_decision): New function. > (inline_small_functions): Use it to dump the inline decisions to > stderr. > > Index: gcc/ipa-inline.c > === > --- gcc/ipa-inline.c (revision 181835) > +++ gcc/ipa-inline.c (working copy) > @@ -1377,6 +1377,45 @@ > } > > > +/* Dump the inline decision of EDGE to stderr. */ > + > +static void > +dump_inline_decision (struct cgraph_edge *edge) > +{ > + location_t locus; > + size_t buf_size = 4096; > + size_t current_string_len = 0; > + char *buf = (char *) xmalloc (buf_size); > + struct cgraph_node *inlined_to; > + gcov_type callee_count = edge->callee->count; > + buf[0] = 0; > + if (edge->inline_failed == CIF_OK && edge->callee->clone_of) > + callee_count += edge->callee->clone_of->count; > + for (inlined_to = edge->caller->global.inlined_to; > + inlined_to; inlined_to = inlined_to->global.inlined_to) > + { > + const char *name = cgraph_node_name (inlined_to); > + if (!name) > + name = "unknown"; > + current_string_len += (strlen (name) + 4); > + while (current_string_len >= buf_size) > + { > + buf_size *= 2; > + buf = (char *) xrealloc (buf, buf_size); > + } > + strcat (buf, "-->"); > + strcat (buf, name); > + } > + locus = gimple_location (edge->call_stmt); > + inform (locus, "%s ("HOST_WIDEST_INT_PRINT_DEC") --" > + HOST_WIDEST_INT_PRINT_DEC"--> %s (" > + HOST_WIDEST_INT_PRINT_DEC") %s : %s", > + cgraph_node_name (edge->callee), callee_count, edge->count, > + cgraph_node_name (edge->caller), edge->caller->count, buf, > + edge->inline_failed == CIF_OK ? "INLINED": "IGNORED"); > +} > + > + > /* We use greedy algorithm for inlining of small functions: > All inline candidates are put into prioritized heap ordered in > increasing badness. > @@ -1428,6 +1467,7 @@ > overall_size = initial_size; > max_size = compute_max_insns (overall_size); > min_size = overall_size; > + edge = NULL; > > /* Populate the heeap with all edges we might inline. */ > > @@ -1462,6 +1502,9 @@ > int current_badness; > int growth; > > + if (edge && flag_opt_info >= OPT_INFO_MIN) > + dump_inline_decision (edge); > + > edge = (struct cgraph_edge *) fibheap_extract_min (heap); > gcc_assert (edge->aux); > edge->aux = NULL; > @@ -1482,6 +1525,7 @@ > if (current_badness != badness) > { > edge->aux = fibheap_insert (heap, current_badness, edge); > + edge = NULL; > continue; > } > > @@ -1636,6 +1680,8 @@ > fprintf (dump_file, "New minimal size reached: %i\n", min_size); > } > } > + if (edge && flag_opt_info >= OPT_INFO_MIN) > + dump_inline_decision (edge); > > free_growth_caches (); > if (new_indirect_edges)
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue 5488054)
On 2011/12/14 00:04:10, davidxl wrote: http://codereview.appspot.com/5488054/diff/4002/tree-vect-stmts.c File tree-vect-stmts.c (right): http://codereview.appspot.com/5488054/diff/4002/tree-vect-stmts.c#newcode3712 tree-vect-stmts.c:3712: } The check can be put into a helper function. Fixed. http://codereview.appspot.com/5488054/
Re: PR middle-end/51411: handle transaction_safe virtual inlined methods
On 12/13/2011 04:04 PM, Aldy Hernandez wrote: On 12/13/11 15:02, Richard Henderson wrote: On 12/13/2011 12:48 PM, Aldy Hernandez wrote: PR middle-end/51411 * trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL. ... /* ??? Is it worth trying to use make_decl_one_only? */ if (DECL_DECLARED_INLINE_P (new_decl)&& DECL_EXTERNAL (new_decl)) - { - DECL_EXTERNAL (new_decl) = 0; - TREE_PUBLIC (new_decl) = 0; - } + DECL_EXTERNAL (new_decl) = 0; Yes, that's what we had in mind. Though of course the changelog doesn't match. r~ Ok, my English typing privileges have been revoked until further notice. I'm obviously typing cross-eyed right now. Here's what I have, but keep in mind that I'm going to watch Sesame Street so I'll be away from my keyboard for a few hours... PR middle-end/51411 * trans-mem.c (ipa_tm_create_version): Do not zap TREE_PUBLIC. This causes multiple definitions. Extract of output with velox/glob2 benchmark: src/AINicowar.o: In function `_ZGTtNSsD2Ev': ./include/c++/4.7.0/bits/basic_string.h:535: multiple definition of `_ZGTtNSsD2Ev' src/AICastor.o:./include/c++/4.7.0/bits/basic_string.h:535: first defined here src/AINicowar.o: In function `_ZGTtNKSs13get_allocatorEv': ./include/c++/4.7.0/bits/basic_string.h:1814: multiple definition of `_ZGTtNKSs13get_allocatorEv' src/AICastor.o:./include/c++/4.7.0/bits/basic_string.h:1814: first defined here src/AINicowar.o: In function `_ZGTtNKSs6_M_repEv': ./include/c++/4.7.0/bits/basic_string.h:297: multiple definition of `_ZGTtNKSs6_M_repEv' src/AICastor.o:./include/c++/4.7.0/bits/basic_string.h:297: first defined here src/AINicowar.o: In function `_ZGTtNSs4_Rep10_M_disposeERKSaIcE': ./include/c++/4.7.0/bits/basic_string.h:234: multiple definition of `_ZGTtNSs4_Rep10_M_disposeERKSaIcE' src/AICastor.o:./include/c++/4.7.0/bits/basic_string.h:234: first defined here src/AINicowar.o: In function `_ZGTtNSaIcED2Ev': ./include/c++/4.7.0/bits/allocator.h:112: multiple definition of `_ZGTtNSaIcED1Ev' src/AICastor.o:./include/c++/4.7.0/bits/allocator.h:112: first defined here src/AINicowar.o: In function `_ZGTtNSaIcEC2ERKS_': ./include/c++/4.7.0/bits/allocator.h:106: multiple definition of `_ZGTtNSaIcEC1ERKS_' ... Without the patch, it is ok. If I remove completely this part: if (DECL_DECLARED_INLINE_P (new_decl) && DECL_EXTERNAL (new_decl)) { DECL_EXTERNAL (new_decl) = 0; TREE_PUBLIC (new_decl) = 0; } It ends with *only* one undefined symbol: src/UnitsSkins.o: In function `_ZGTtStltIcSt11char_traitsIcESaIcEEbRKSbIT_T0_T1_ES8_': ./include/c++/4.7.0/bits/basic_string.h:2568: undefined reference to `_ZGTtNKSs7compareERKSs' $ c++filt _ZNKSs7compareERKSs std::basic_string, std::allocator >::compare(std::basic_string, std::allocator > const&) const I am not sure the way to give you useful information. Patrick.
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue 5488054)
http://codereview.appspot.com/5488054/diff/4002/tree-vect-stmts.c File tree-vect-stmts.c (right): http://codereview.appspot.com/5488054/diff/4002/tree-vect-stmts.c#newcode3712 tree-vect-stmts.c:3712: } The check can be put into a helper function. http://codereview.appspot.com/5488054/
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)
I updated the patch to add the checks in vectorizable_load and vectorizable_store itself. Thanks, -Sri. On Tue, Dec 13, 2011 at 12:16 PM, Xinliang David Li wrote: > See instruction tables here: > http://www.agner.org/optimize/instruction_tables.pdf > > My brief reading of the table for core2 and corei7 suggest the following: > > 1. On core2 > > movdqu -- both load and store forms take up to 8 cycles to complete, > and store form produces 8 uops while load produces 4 uops > > movsd load: 1 uop, 2 cycle latency > movsd store: 1 uop, 3 cycle latency > > movhpd, movlpd load: 2 uops, 3 cycle latency > movhpd store: 2 uops, 5 cycle latency > movlpd store: 1uop, 3 cycle latency > > > 2. Core i7 > > movdqu load: 1 uop, 2 cycle latency > movdqu store: 1 uop, 3 cycle latency > > movsd load: 1 uop, 2 cycle latency > movsd store: 1uop, 3 cycle latency > > movhpd, movlpd load: 2 uop, 3 cycle latency > movhpd, movlpd sotre: 2 uop, 5 cycle latency > > > From the above, looks like a Sri's original simple heuristic should work fine > > 1) for corei7, if the load and stores can not be proved to be 128 bit > aligned, always use movdqu > > 2) for core2, experiment can be done to determine whether to look at > unaligned stores or both unaligned loads to disable vectorization. > > Yes, for longer term, a more precise cost model is probably needed -- > but require lots of work which may not work a lot better in practice. > > What is more important is to beef up gcc infrastructure to allow more > aggressive alignment (info) propagation. > > In 4.4, gcc does alignment (output array) based versioning -- Sri's > patch has the effect of doing the samething but only for selected > targets. > > thanks, > > David > > On Tue, Dec 13, 2011 at 10:56 AM, Richard Henderson wrote: >> On 12/13/2011 10:26 AM, Sriraman Tallam wrote: >>> Cool, this works for stores! It generates the movlps + movhps. I have >>> to also make a similar change to another call to gen_sse2_movdqu for >>> loads. Would it be ok to not do this when tune=core2? >> >> We can work something out. >> >> I'd like you to do the benchmarking to know if unaligned loads are really as >> expensive as unaligned stores, and whether there are reformatting penalties >> that make the movlps+movhps option for either load or store less attractive. >> >> >> r~
Re: Go patch committed: Multiplex goroutines onto OS threads
Uros Bizjak writes: >> This patch changes the Go library to multiplex goroutines onto operating >> system threads. Previously, each new goroutine ran in a separate >> thread. That is inefficient for programs with lots of goroutines. This >> patch changes the library such that it runs a certain numbers of >> threads, and lets each thread switch between goroutines. This is how >> the master Go library works, and this patch brings in code from the >> master Go library, adjusted for use by gccgo. > > For some reason I get this failure on alphaev68-pc-linux-gnu: > > --- FAIL: runtime_test.TestGcSys (4.64 seconds) > using 64 MB > using too much memory: 64 MB > > Raising the value in runtime/gc_test.go to 10e8 runs the test OK. Thanks for reporting this. I just committed the appended patch to both the master Go library and to libgo. I hope this will fix this problem. Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Ian diff -r 832d9ebdb3c6 libgo/go/runtime/gc_test.go --- a/libgo/go/runtime/gc_test.go Tue Dec 13 14:24:59 2011 -0800 +++ b/libgo/go/runtime/gc_test.go Tue Dec 13 15:13:43 2011 -0800 @@ -6,16 +6,24 @@ ) func TestGcSys(t *testing.T) { + runtime.GC() + runtime.UpdateMemStats() + sys := runtime.MemStats.Sys + for i := 0; i < 100; i++ { workthegc() } // Should only be using a few MB. runtime.UpdateMemStats() - sys := runtime.MemStats.Sys - t.Logf("using %d MB", sys>>20) - if sys > 10e6 { - t.Fatalf("using too much memory: %d MB", sys>>20) + if sys > runtime.MemStats.Sys { + sys = 0 + } else { + sys = runtime.MemStats.Sys - sys + } + t.Logf("used %d extra bytes", sys) + if sys > 2<<20 { + t.Fatalf("using too much memory: %d bytes", sys) } }
Re: [google] Add support for delete operator that takes the size of the object as a parameter
On Mon, Dec 12, 2011 at 1:52 PM, Easwaran Raman wrote: > On Mon, Dec 12, 2011 at 12:41 PM, Paolo Carlini > wrote: >> On 12/12/2011 09:37 PM, Easwaran Raman wrote: >>> >>> Thanks for the comments Paolo. I have attached the new patch. I have >>> bumped the version to 3.4.18 >> >> You shouldn't: 4.7 is not out yet, thus no reason to increase the minor >> version beyond the current 17. > Ok, I then don't understand your comment > "Note that backporting the patch as-is to 4_6-branch would be very > wrong in terms of ABI (in mainline we already have a 3.4.17)". > My original patch added the new symbol in version 3.4.17. Since we > don't want to add the symbol to 3.4.16 (if we have a situation where > the new runtime is not available when running a program compiled with > -fsized-delete) and you said I shouldn't be using 3.4.17, I assumed > I had to bump up the version. > >> >>> and used _ZdlPv[jmy] in gnu.ver. I have >>> also added the symbol to baseline_symbols.txt of other targets. >> >> You should not, just read again what I wrote. And you don't have to believe >> me: just browse the libstdc++ ChangeLogs and see if somebody ever does that >> when the linker map is touched. > > Sorry, I again misunderstood this as well (and still don't have a good > picture). Is the part which adds _ZdlPv[jmy] in gnu.ver ok? I added > that by mimicking the symbol _Znw[jmy] found in the same file. From > the log, it looks like the baseline_symbols.txt seems to be generated, > but I am not sure how that is to be done. For example, r145437 says a > bunch of these baseline_symbols.txt are regenerated, but I don't see > any other change from which this might be generated. > > Thanks, > Easwaran It looks like running 'make new-abi-baseline' under TARGET/libstdc++-v3/testsuite generates the baseline file. Should I check in that? What about other targets? Thanks, Easwaran > >> Paolo.
Re: [patch] add __is_final trait to fix libstdc++/51365
On Tue, Dec 13, 2011 at 12:02 PM, Jonathan Wakely wrote: > On 13 December 2011 17:01, Paolo Carlini wrote: >> Hi, >>> >>> This patch seems pretty simple and safe. Are you (Gaby and Paolo) arguing >>> that even so, it shouldn't go in? >> >> As far as I'm concerned, definetely not! I also think that it would be great >> if, for 4.7, Jon could handle the library issues with EBO by exploiting it. > > Yes, if this goes in for 4.7 I will definitely follow it with the > library changes to make use of it. > >> I only meant to say that something seems to me more fundamentally wrong at >> the design level about 'final' vs EBO, my hope is that for 4.8 we'll have a >> longer term stable solution based on a ISO Committee position. > > In one of the earlier bug report comments I proposed a > __gnu_cxx::is_final library trait to expose the __is_final(T) > intrinsic for users, but decided against it precisely because I don't > know how the committee will want to deal with the issue longer term. > > So the proposed patch just adds the __is_final intrinsic for use > internally by the library, to allow library changes so the test cases > in the bug report will pass. If preferred I won't even add __is_final > to the extend.texi docs, to leave it as an undocumented extension that > we could more easily remove (or deprecate) later if necessary. Leaving __is_final undocumented is probably a good compromise.
Re: [patch] add __is_final trait to fix libstdc++/51365
On Tue, Dec 13, 2011 at 10:43 AM, Jason Merrill wrote: > On 12/12/2011 09:14 AM, Gabriel Dos Reis wrote: >> >> On Mon, Dec 12, 2011 at 5:25 AM, Paolo Carlini >> wrote: >> I think being able to detect a final class is good enough for now, until we find out if there are real problems being encountered as people make more use of C++11. >>> >>> >>> Maybe. But in my opinion we should not rush. Something is wrong here at a >>> more fundamental level. >> >> >> I agree that we should wait a little bit for the dust to settle down. >> Users should >> avoid it, and implementors shouldn't go through hoops non commensurable >> with >> the benefits of "final". Maybe the "right" primitive is slightly >> different. > > > This patch seems pretty simple and safe. Are you (Gaby and Paolo) arguing > that even so, it shouldn't go in? My comment isn't about the technical aspect of the patch itself -- it is a simple patch. But we don't seem to understand yet the implications of "final". As was observed on the standard reflectors, the appropriate trait might actually need to be binary instead of unary as this patch implements -- for the purpose of empty base optimization which is where the problems pop up.
Re: [Tentative patch] -finit-real=snan - would it really be so simple for automatic arrays.
On 12/13/2011 08:41 PM, Thomas Koenig wrote: Hi Toon, (For gcc-patches: Patch at http://gcc.gnu.org/ml/fortran/2011-12/msg00080.html ) I would appreciate a review and a regression test by someone who can. Regression-test passed on trunk. This one really looks obvious. Unless somebody objects who knows this field better than I do, OK for trunk. Thanks - I'll wait two days for further comment and then apply. -- Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290 Saturnushof 14, 3738 XG Maartensdijk, The Netherlands At home: http://moene.org/~toon/; weather: http://moene.org/~hirlam/ Progress of GNU Fortran: http://gcc.gnu.org/wiki/GFortran#news
Re: [gofrontend-dev] *-rtems Compilation Failures Advice Wanted
Joel Sherrill writes: > RTEMS does not currently have IPv6. > > /users/joel/test-gcc/gcc-svn/libgo/go/syscall/socket.go:299:51: error: > use of undefined type 'IPv6Mreq' This patch is intended to fix this problem. Bootstrapped on x86_64-unknown-linux-gnu, which proves little since the code is not exercised. Committed to mainline. Ian diff -r f332ea69a151 libgo/mksysinfo.sh --- a/libgo/mksysinfo.sh Tue Dec 13 14:07:07 2011 -0800 +++ b/libgo/mksysinfo.sh Tue Dec 13 14:23:23 2011 -0800 @@ -183,10 +183,12 @@ grep '^const _SHUT_' gen-sysinfo.go | sed -e 's/^\(const \)_\(SHUT[^= ]*\)\(.*\)$/\1\2 = _\2/' >> ${OUT} -# The net package requires a definition for IPV6ONLY. -if ! grep '^const IPV6_V6ONLY ' ${OUT} >/dev/null 2>&1; then - echo "const IPV6_V6ONLY = 0" >> ${OUT} -fi +# The net package requires some const definitions. +for m in IPV6_V6ONLY IPPROTO_IPV6 IPV6_JOIN_GROUP IPV6_LEAVE_GROUP; do + if ! grep "^const $m " ${OUT} >/dev/null 2>&1; then +echo "const $m = 0" >> ${OUT} + fi +done # pathconf constants. grep '^const __PC' gen-sysinfo.go | @@ -474,10 +476,13 @@ -e 's/_in_addr/[4]byte/g' \ >> ${OUT} +# We need IPMreq to compile the net package. +if ! grep 'type IPMreq ' ${OUT} >/dev/null 2>&1; then + echo 'type IPMreq struct { Multiaddr [4]byte; Interface [4]byte; }' >> ${OUT} +fi + # The size of the ip_mreq struct. -if grep 'type IPMreq ' ${OUT} > /dev/null 2>&1; then - echo 'var SizeofIPMreq = int(unsafe.Sizeof(IPMreq{}))' >> ${OUT} -fi +echo 'var SizeofIPMreq = int(unsafe.Sizeof(IPMreq{}))' >> ${OUT} # The ipv6_mreq struct. grep '^type _ipv6_mreq ' gen-sysinfo.go | \ @@ -487,6 +492,11 @@ -e 's/_in6_addr/[16]byte/' \ >> ${OUT} +# We need IPv6Mreq to compile the net package. +if ! grep 'type IPv6Mreq ' ${OUT} >/dev/null 2>&1; then + echo 'type IPv6Mreq struct { Multiaddr [16]byte; Interface uint32; }' >> ${OUT} +fi + # Try to guess the type to use for fd_set. fd_set=`grep '^type _fd_set ' gen-sysinfo.go || true` fds_bits_type="_C_long"
Re: RTEMS Go Patch
Joel Sherrill writes: > * libgo/go/syscall/wait.c: Conditionalize on WIFxxx macros > and SIGxxx being defined. I think I have fixed the wait issues as follows. Bootstrapped on x86_64-unknown-linux-gnu, which means little as the files are not compiled there. Committed to mainline. Ian diff -r e62855c629d3 libgo/Makefile.am --- a/libgo/Makefile.am Tue Dec 13 13:58:40 2011 -0800 +++ b/libgo/Makefile.am Tue Dec 13 14:06:40 2011 -0800 @@ -1450,11 +1450,22 @@ endif # Define Wait4. +if LIBGO_IS_RTEMS +syscall_wait_file = +else if HAVE_WAIT4 syscall_wait_file = go/syscall/libcall_wait4.go else syscall_wait_file = go/syscall/libcall_waitpid.go endif +endif + +# Support for pulling apart wait status. +if LIBGO_IS_RTEMS +syscall_wait_c_file = +else +syscall_wait_c_file = go/syscall/wait.c +endif # Define Sleep. if LIBGO_IS_RTEMS @@ -1564,7 +1575,7 @@ syscall_arch.go go_syscall_c_files = \ go/syscall/errno.c \ - go/syscall/wait.c + $(syscall_wait_c_file) libcalls.go: s-libcalls; @true s-libcalls: Makefile go/syscall/mksyscall.awk $(go_base_syscall_files) diff -r e62855c629d3 libgo/go/syscall/exec_stubs.go --- a/libgo/go/syscall/exec_stubs.go Tue Dec 13 13:58:40 2011 -0800 +++ b/libgo/go/syscall/exec_stubs.go Tue Dec 13 14:06:40 2011 -0800 @@ -7,17 +7,27 @@ package syscall func ForkExec(argv0 string, argv []string, envv []string, dir string, fd []int) (pid int, err int) { - return -1, ENOSYS; + return -1, ENOSYS } func Exec(argv0 string, argv []string, envv []string) (err int) { - return ENOSYS; + return ENOSYS } func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, err error) { - return -1, ENOSYS; + return -1, ENOSYS } +func (w WaitStatus) Exited() bool{ return false } +func (w WaitStatus) Signaled() bool { return false } +func (w WaitStatus) Stopped() bool { return false } +func (w WaitStatus) Continued() bool { return false } +func (w WaitStatus) CoreDump() bool { return false } +func (w WaitStatus) ExitStatus() int { return 0 } +func (w WaitStatus) Signal() int { return 0 } +func (w WaitStatus) StopSignal() int { return 0 } +func (w WaitStatus) TrapCause() int { return 0 } + func raw_ptrace(request int, pid int, addr *byte, data *byte) Errno { return ENOSYS }
Re: RTEMS Go Patch
Joel Sherrill writes: > 2011-12-02 Joel Sherrill > > * runtime/go-signal.c: Add conditional on SIGPROF. > * runtime/mem_posix_memalign.c: Add USED directives. > * libgo/go/syscall/wait.c: Conditionalize on WIFxxx macros > and SIGxxx being defined. I committed the changes to runtime/go-signal.c and runtime/mem_posix_memalign.c. As I mentioned in an earlier message, we probably don't want to compile wait.c on RTEMS at all. I will work on that now. Ian
[PATCH] PR c++/51477 - ICE with invalid NSDMI
Hello, In the example of this patch, during the implicit declaration of the destructor, walk_field_subobs calls locate_fn_flags on the field invalid field 'x', to locate its destructor. That function pokes the BINFO of that field, which is NULL, and passes it along to lookup_fnfields. And we ICE because of that NULL base type. Just preventing lookup_member from crashing because of a NULL (or otherwise invalid) base type fixes the issue. Bootstrapped and tested on x86_64-unknown-linux-gnu against trunk. gcc/cp/ PR c++/51477 * search.c (lookup_member): Get out early on invalid base type. gcc/testsuite/ PR c++/51477 * g++.dg/cpp0x/nsdmi6.C: New test. --- gcc/cp/search.c |4 +++- gcc/testsuite/g++.dg/cpp0x/nsdmi6.C |8 2 files changed, 11 insertions(+), 1 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/nsdmi6.C diff --git a/gcc/cp/search.c b/gcc/cp/search.c index 3894c68..0ceb5bc 100644 --- a/gcc/cp/search.c +++ b/gcc/cp/search.c @@ -1171,7 +1171,9 @@ lookup_member (tree xbasetype, tree name, int protect, bool want_type, const char *errstr = 0; - if (name == error_mark_node) + if (name == error_mark_node + || xbasetype == NULL_TREE + || xbasetype == error_mark_node) return NULL_TREE; gcc_assert (TREE_CODE (name) == IDENTIFIER_NODE); diff --git a/gcc/testsuite/g++.dg/cpp0x/nsdmi6.C b/gcc/testsuite/g++.dg/cpp0x/nsdmi6.C new file mode 100644 index 000..bb455e7 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/nsdmi6.C @@ -0,0 +1,8 @@ +// Origin PR c++/51477 +// { dg-options "-std=c++11" } + +struct A +{ +typedef int int T; // { dg-error "two or more data types in declaration" } +struct T x[1] = { 0 }; // { dg-error "invalid|forward" } +}; -- 1.7.6.4 -- Dodji
Re: [PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11
Jason Merrill writes: > But yes, the patch is OK after fixing the comment. Thanks. Here is what I am bootstrapping. gcc/cp/ PR c++/51476 * pt.c (convert_nontype_argument): Don't call maybe_constant_value for PTRMEM_CST nodes. gcc/testsuite/ PR c++/51476 * cpp0x/ptrmem-cst-arg1.C: New test. --- gcc/cp/pt.c | 10 +++--- gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C |9 + 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index bb5aa0c..3e958bc 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -5720,11 +5720,15 @@ convert_nontype_argument (tree type, tree expr, tsubst_flags_t complain) to a null value, but otherwise still need to be of a specific form. */ if (cxx_dialect >= cxx0x) { - if (INTEGRAL_OR_ENUMERATION_TYPE_P (type)) + if (TREE_CODE (expr) == PTRMEM_CST) + /* A PTRMEM_CST is already constant, and a valid template + argument for a parameter of pointer to member type, we just want + to leave it in that form rather than lower it to a + CONSTRUCTOR. */; + else if (INTEGRAL_OR_ENUMERATION_TYPE_P (type)) expr = maybe_constant_value (expr); else if (TYPE_PTR_P (type) - || (TYPE_PTR_TO_MEMBER_P (type) - && TREE_CODE (expr) != PTRMEM_CST)) + || TYPE_PTR_TO_MEMBER_P (type)) { tree folded = maybe_constant_value (expr); if (TYPE_PTR_P (type) ? integer_zerop (folded) diff --git a/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C new file mode 100644 index 000..b6c81d5 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C @@ -0,0 +1,9 @@ +// Origin PR c++/51476 +// { dg-options "-std=c++11" } + +template struct A {}; +struct B +{ +int i; +A<&B::i> a; // { dg-error "could not convert template argument" } +}; -- 1.7.6.4 -- Dodji
Re: PR middle-end/51411: handle transaction_safe virtual inlined methods
On 12/13/11 15:02, Richard Henderson wrote: On 12/13/2011 12:48 PM, Aldy Hernandez wrote: PR middle-end/51411 * trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL. ... /* ??? Is it worth trying to use make_decl_one_only? */ if (DECL_DECLARED_INLINE_P (new_decl)&& DECL_EXTERNAL (new_decl)) - { - DECL_EXTERNAL (new_decl) = 0; - TREE_PUBLIC (new_decl) = 0; - } + DECL_EXTERNAL (new_decl) = 0; Yes, that's what we had in mind. Though of course the changelog doesn't match. r~ Ok, my English typing privileges have been revoked until further notice. I'm obviously typing cross-eyed right now. Here's what I have, but keep in mind that I'm going to watch Sesame Street so I'll be away from my keyboard for a few hours... PR middle-end/51411 * trans-mem.c (ipa_tm_create_version): Do not zap TREE_PUBLIC.
Re: PR middle-end/51411: handle transaction_safe virtual inlined methods
On 12/13/2011 12:48 PM, Aldy Hernandez wrote: > PR middle-end/51411 > * trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL. ... >/* ??? Is it worth trying to use make_decl_one_only? */ >if (DECL_DECLARED_INLINE_P (new_decl) && DECL_EXTERNAL (new_decl)) > - { > - DECL_EXTERNAL (new_decl) = 0; > - TREE_PUBLIC (new_decl) = 0; > - } > + DECL_EXTERNAL (new_decl) = 0; Yes, that's what we had in mind. Though of course the changelog doesn't match. r~
Re: PR middle-end/51411: handle transaction_safe virtual inlined methods
On 12/13/2011 03:48 PM, Aldy Hernandez wrote: Be that as it may, I gather this is what you want? Yep, that's what I had in mind. Jason
Re: PR middle-end/51411: handle transaction_safe virtual inlined methods
On 12/13/11 14:42, Richard Henderson wrote: On 12/13/2011 12:41 PM, Aldy Hernandez wrote: The TM clone is marked weak as the original decl was weak, but it is no longer external or public. I suggested to just clear the DECL_WEAK bit on the clone, but Jason mentioned that unsetting DECL_EXTERNAL is more optimal-- so that the clone can still be public and comdat. No regressions. PR fixed. OK? curr PR middle-end/51411 * trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL. The actual patch is more or less the exact opposite of what Jason recommended. r~ Bah, you guys are speaking Bostoneese or something, cause reading back on the IRC log I still think I implemented what Jason said. Be that as it may, I gather this is what you want? OK pending another round of tests? PR middle-end/51411 * trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL. Index: testsuite/g++.dg/tm/pr51411.C === --- testsuite/g++.dg/tm/pr51411.C (revision 0) +++ testsuite/g++.dg/tm/pr51411.C (revision 0) @@ -0,0 +1,7 @@ +// { dg-do compile } +// { dg-options "-fgnu-tm -O" } + +struct A +{ + __attribute__ ((transaction_safe)) virtual void virtfoo () { } +}; Index: trans-mem.c === --- trans-mem.c (revision 182290) +++ trans-mem.c (working copy) @@ -4256,10 +4256,7 @@ ipa_tm_create_version (struct cgraph_nod /* Remap extern inline to static inline. */ /* ??? Is it worth trying to use make_decl_one_only? */ if (DECL_DECLARED_INLINE_P (new_decl) && DECL_EXTERNAL (new_decl)) - { - DECL_EXTERNAL (new_decl) = 0; - TREE_PUBLIC (new_decl) = 0; - } + DECL_EXTERNAL (new_decl) = 0; tree_function_versioning (old_decl, new_decl, NULL, false, NULL, NULL, NULL);
Re: PR middle-end/51411: handle transaction_safe virtual inlined methods
On 12/13/2011 12:41 PM, Aldy Hernandez wrote: > The TM clone is marked weak as the original decl was weak, but it is no > longer external or public. I suggested to just clear the DECL_WEAK bit on > the clone, but Jason mentioned that unsetting DECL_EXTERNAL is more optimal-- > so that the clone can still be public and comdat. > > No regressions. PR fixed. > > OK? > > curr > > > PR middle-end/51411 > * trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL. The actual patch is more or less the exact opposite of what Jason recommended. r~
PR middle-end/51411: handle transaction_safe virtual inlined methods
IPA's function_and_variable_visibility is dying, because for a C++ virtual inlined TM clone, we have zapped the DECL_EXTERNAL and TREE_PUBLIC bits while building the clone in ipa_tm_create_version. The assert fails here in function_and_variable_visibility(): gcc_assert ((!DECL_WEAK (node->decl) && !DECL_COMDAT (node->decl)) || TREE_PUBLIC (node->decl) || DECL_EXTERNAL (node->decl)); The TM clone is marked weak as the original decl was weak, but it is no longer external or public. I suggested to just clear the DECL_WEAK bit on the clone, but Jason mentioned that unsetting DECL_EXTERNAL is more optimal-- so that the clone can still be public and comdat. No regressions. PR fixed. OK? PR middle-end/51411 * trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL. Index: testsuite/g++.dg/tm/pr51411.C === --- testsuite/g++.dg/tm/pr51411.C (revision 0) +++ testsuite/g++.dg/tm/pr51411.C (revision 0) @@ -0,0 +1,7 @@ +// { dg-do compile } +// { dg-options "-fgnu-tm -O" } + +struct A +{ + __attribute__ ((transaction_safe)) virtual void virtfoo () { } +}; Index: trans-mem.c === --- trans-mem.c (revision 182290) +++ trans-mem.c (working copy) @@ -4256,10 +4256,7 @@ ipa_tm_create_version (struct cgraph_nod /* Remap extern inline to static inline. */ /* ??? Is it worth trying to use make_decl_one_only? */ if (DECL_DECLARED_INLINE_P (new_decl) && DECL_EXTERNAL (new_decl)) - { - DECL_EXTERNAL (new_decl) = 0; - TREE_PUBLIC (new_decl) = 0; - } + TREE_PUBLIC (new_decl) = 0; tree_function_versioning (old_decl, new_decl, NULL, false, NULL, NULL, NULL);
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)
On Tue, Dec 13, 2011 at 10:56 AM, Richard Henderson wrote: > On 12/13/2011 10:26 AM, Sriraman Tallam wrote: >> Cool, this works for stores! It generates the movlps + movhps. I have >> to also make a similar change to another call to gen_sse2_movdqu for >> loads. Would it be ok to not do this when tune=core2? > > We can work something out. > > I'd like you to do the benchmarking to know if unaligned loads are really as > expensive as unaligned stores, and whether there are reformatting penalties > that make the movlps+movhps option for either load or store less attractive. I can confirm that movhps+movlps is *not at all* a good substitute for movdqu on core2. It makes it much worse. MOVHPS/MOVLPS has a very high penalty (~10x) for unaligned load/stores. > > > r~
Re: [RFC][libitm] Convert to c++11 atomics
On 12/13/2011 11:43 AM, Dominique Dhumieres wrote: >> I've committed the patch. > > It caused: > > ../../../work/libitm/config/posix/rwlock.cc: In member function 'bool > GTM::gtm_rwlock::write_lock_generic(GTM::gtm_thread*)': > ../../../work/libitm/config/posix/rwlock.cc:196:56: error: comparison between > signed and unsigned integer expressions [-Werror=sign-compare] > cc1plus: all warnings being treated as errors Fixed. Tested on linux with --disable-linux-futex. r~ Index: libitm/ChangeLog === --- libitm/ChangeLog(revision 182301) +++ libitm/ChangeLog(working copy) @@ -1,5 +1,8 @@ 2011-12-13 Richard Henderson + * config/posix/rwlock.cc (gtm_rwlock::write_lock_generic): Fix + signed/unsigned comparison werror. + * local_atomic: New file. * libitm_i.h: Include it. (gtm_thread::shared_state): Use atomic template. Index: libitm/config/posix/rwlock.cc === --- libitm/config/posix/rwlock.cc (revision 182301) +++ libitm/config/posix/rwlock.cc (working copy) @@ -193,7 +193,7 @@ it = it->next_thread) { // Don't count ourself if this is an upgrade. - if (it->shared_state.load(memory_order_relaxed) != -1) + if (it->shared_state.load(memory_order_relaxed) != (gtm_word)-1) readers++; }
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)
See instruction tables here: http://www.agner.org/optimize/instruction_tables.pdf My brief reading of the table for core2 and corei7 suggest the following: 1. On core2 movdqu -- both load and store forms take up to 8 cycles to complete, and store form produces 8 uops while load produces 4 uops movsd load: 1 uop, 2 cycle latency movsd store: 1 uop, 3 cycle latency movhpd, movlpd load: 2 uops, 3 cycle latency movhpd store: 2 uops, 5 cycle latency movlpd store: 1uop, 3 cycle latency 2. Core i7 movdqu load: 1 uop, 2 cycle latency movdqu store: 1 uop, 3 cycle latency movsd load: 1 uop, 2 cycle latency movsd store: 1uop, 3 cycle latency movhpd, movlpd load: 2 uop, 3 cycle latency movhpd, movlpd sotre: 2 uop, 5 cycle latency >From the above, looks like a Sri's original simple heuristic should work fine 1) for corei7, if the load and stores can not be proved to be 128 bit aligned, always use movdqu 2) for core2, experiment can be done to determine whether to look at unaligned stores or both unaligned loads to disable vectorization. Yes, for longer term, a more precise cost model is probably needed -- but require lots of work which may not work a lot better in practice. What is more important is to beef up gcc infrastructure to allow more aggressive alignment (info) propagation. In 4.4, gcc does alignment (output array) based versioning -- Sri's patch has the effect of doing the samething but only for selected targets. thanks, David On Tue, Dec 13, 2011 at 10:56 AM, Richard Henderson wrote: > On 12/13/2011 10:26 AM, Sriraman Tallam wrote: >> Cool, this works for stores! It generates the movlps + movhps. I have >> to also make a similar change to another call to gen_sse2_movdqu for >> loads. Would it be ok to not do this when tune=core2? > > We can work something out. > > I'd like you to do the benchmarking to know if unaligned loads are really as > expensive as unaligned stores, and whether there are reformatting penalties > that make the movlps+movhps option for either load or store less attractive. > > > r~
Re: [patch] add __is_final trait to fix libstdc++/51365
On 12/13/2011 12:01 PM, Paolo Carlini wrote: As far as I'm concerned, definetely not! I also think that it would be great if, for 4.7, Jon could handle the library issues with EBO by exploiting it. The patch is OK, then. Jason
Re: [PATCH] [MIPS] Add Octeon2 cpu support to GCC
On Tue, Dec 13, 2011 at 11:19 AM, Richard Sandiford wrote: > Andrew Pinski writes: >> +(define_insn_reservation "octeon_imul3_o2" 6 >> + (and (eq_attr "cpu" "octeon2") >> + (eq_attr "type" "imul3,pop,clz")) >> + " octeon_pipe1 + octeon_mult") > > Excess space before ". > >> Index: config/mips/mips.h >> === >> --- config/mips/mips.h (revision 182183) >> +++ config/mips/mips.h (working copy) >> @@ -250,7 +252,9 @@ struct mips_cpu_info { >> #define TUNE_MIPS6000 (mips_tune == PROCESSOR_R6000) >> #define TUNE_MIPS7000 (mips_tune == PROCESSOR_R7000) >> #define TUNE_MIPS9000 (mips_tune == PROCESSOR_R9000) >> -#define TUNE_OCTEON (mips_tune == PROCESSOR_OCTEON) >> +#define TUNE_OCTEON (mips_tune == PROCESSOR_OCTEON \ >> + || mips_tune == PROCESSOR_OCTEON2) >> +#define TUNE_OCTEON2 (mips_tune == PROCESSOR_OCTEON2) >> #define TUNE_SB1 (mips_tune == PROCESSOR_SB1 >> \ >> || mips_tune == PROCESSOR_SB1A) > > Do any follow-on patches need TUNE_OCTEON2? If not, let's leave it out > for now. (I realise they'll use TARGET_OCTEON2.) > > OK for 4.7 with those (trivial) changes. Here is the patch which I committed, I had to add support to mips.exp for -fdump-* options (I also fixed the -g* options, the regexp was incorrect, it needed a . in front of the *). Thanks, Andrew Pinski gcc/ChangeLog: * config/mips/mips-cpus.def: Add Octeon2. * config/mips/mips-tables.opt: Regenerate. * config/mips/mips.md (define_attr "cpu"): Add Octeon2. * config/mips/driver-native.c (host_detect_local_cpu): Support Octeon2 also. * config/mips/octeon.md (octeon_arith): Add Octeon2. (octeon_condmove): Likewise. (octeon_load): Rename to .. (octeon_load_o1): this. (octeon_load_o2): New reserve. (octeon_cop_o2): New reserve. (octeon_store): Match Octeon2 also. (octeon_brj): Rename to .. (octeon_brj_o1): this. (octeon_brj_o2): New reserve. (octeon_imul3): Rename to ... (octeon_imul3_o1): this. (octeon_imul3_o2): New reserve. (octeon_imul): Rename to ... (octeon_imul_o1): this. (octeon_imul_o2): New reserve. (octeon_mfhilo): Rename to ... (octeon_mfhilo_o1): This. (octeon_mfhilo_o2): New reserve. (octeon_imadd): Rename to ... (octeon_imadd_o1): this. (octeon_imadd_o2): New reserve. (octeon_idiv): Rename to .. (octeon_idiv_o1): This. (octeon_idiv_o2_si): New reserve. (octeon_idiv_o2_di): Likewise. (octeon_unknown): Match Octeon2 also. * config/mips/mips.c (mips_rtx_cost_data): Add Octeon2 cost data. (mips_issue_rate): Octeon2 can issue 2 at a time. * config/mips/mips.h (TARGET_OCTEON): Match Octeon2 also. (TARGET_OCTEON2): New define. (TUNE_OCTEON): Match Octeon2 also. testsuite/ChangeLog: * gcc.target/mips/mips.exp (mips_option_groups): Fix debug. Add -fdump-* options. * gcc.target/mips/octeon2-pipe-1.c: New testcase. * gcc.target/mips/octeon-pipe-1.c: New testcase. Index: testsuite/gcc.target/mips/mips.exp === --- testsuite/gcc.target/mips/mips.exp (revision 182183) +++ testsuite/gcc.target/mips/mips.exp (working copy) @@ -226,7 +226,7 @@ set mips_option_groups { abi "-mabi=.*" addressing "addressing=.*" arch "-mips([1-5]|32.*|64.*)|-march=.*|isa(|_rev)(=|<=|>=).*" -debug "-g*" +debug "-g.*" dump_pattern "-dp" endianness "-E(L|B)|-me(l|b)" float "-m(hard|soft)-float" @@ -241,6 +241,7 @@ set mips_option_groups { profiling "-pg" small-data "-G[0-9]+" warnings "-w" +dump "-fdump-.*" } # Add -mfoo/-mno-foo options to mips_option_groups. Index: testsuite/gcc.target/mips/octeon2-pipe-1.c === --- testsuite/gcc.target/mips/octeon2-pipe-1.c (revision 0) +++ testsuite/gcc.target/mips/octeon2-pipe-1.c (revision 0) @@ -0,0 +1,11 @@ +/* Check that we use the octeon2 pipeline description. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-sched2 -march=octeon2" } */ + +NOMIPS16 int f (int a, int b) +{ + return a / b; +} + +/* { dg-final { scan-rtl-dump "octeon_mult\\*17" "sched2" } } */ +/* { dg-final { cleanup-tree-dump "sched2" } } */ Index: testsuite/gcc.target/mips/octeon-pipe-1.c === --- testsuite/gcc.target/mips/octeon-pipe-1.c (revision 0) +++ testsuite/gcc.target/mips/octeon-pipe-1.c (revision 0) @@ -0,0 +1,11 @@ +/* Check that we use the octeon pipeline description. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=octeon -fdump-rtl-sched2" } */ + +NOMIPS16 int f (int a, int b) +{ + return a / b; +} + +/* { dg-final { scan-rtl-dump "octeon_mult\\*71" "sched2" } } */ +/* { dg-final { cleanup-tree-dump "sched2" } } */ Index: config/mips/mips-tables.opt ==
Re: [RFC][libitm] Convert to c++11 atomics
> I've committed the patch. It caused: ../../../work/libitm/config/posix/rwlock.cc: In member function 'bool GTM::gtm_rwlock::write_lock_generic(GTM::gtm_thread*)': ../../../work/libitm/config/posix/rwlock.cc:196:56: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare] cc1plus: all warnings being treated as errors TIA Dominique
Re: [Tentative patch] -finit-real=snan - would it really be so simple for automatic arrays.
Hi Toon, (For gcc-patches: Patch at http://gcc.gnu.org/ml/fortran/2011-12/msg00080.html ) I would appreciate a review and a regression test by someone who can. Regression-test passed on trunk. This one really looks obvious. Unless somebody objects who knows this field better than I do, OK for trunk. Thanks a lot! Thomas
[Patch,AVR] ad PR50931: Implement mulpsi3
This adds missing 24-Bit multiplication. Besides implementing __mulpsi3 in libgcc the patch adds some minor tweaks to multiply with small numbers and to represent the reduced register footprint of asm implementations. With this patch PR50931 is complete from the target side. (But there is still a segfault in the frontends, see PR51527) Ok for trunk? Johann libgcc/ PR target/50931 * config/avr/t-avr (LIB1ASMSRC): Add _mulpsi3, _mulsqipsi3. * config/avr/lib1funcs.S (__mulpsi3, __mulsqipsi3): New functions. gcc/ PR target/50931 * config/avr/avr.md (mulpsi3): New expander. (*umulqihipsi3, *umulhiqipsi3): New insns. (*mulsqipsi3.libgcc, *mulpsi3.libgcc): New insns. (mulsqipsi3, *mulpsi3): New insn-and-splits. (ashlpsi3): Turn to expander. Move insn code to... (*ashlpsi3): ...this new insn. testsuite/ PR target/50931 * gcc.target/avr/torture/int24-mul.c: New testcase. Index: gcc/config/avr/avr.md === --- gcc/config/avr/avr.md (revision 182277) +++ gcc/config/avr/avr.md (working copy) @@ -2113,7 +2113,7 @@ (define_insn "*mulhi3_call" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) -;; To support widening multiplicatioon with constant we postpone +;; To support widening multiplication with constant we postpone ;; expanding to the implicit library call until post combine and ;; prior to register allocation. Clobber all hard registers that ;; might be used by the (widening) multiply until it is split and @@ -2575,6 +2575,132 @@ (define_insn "*udivmodhi4_call" (set_attr "cc" "clobber")]) ;; +;; 24-bit multiply + +;; To support widening multiplication with constant we postpone +;; expanding to the implicit library call until post combine and +;; prior to register allocation. Clobber all hard registers that +;; might be used by the (widening) multiply until it is split and +;; it's final register footprint is worked out. + +(define_expand "mulpsi3" + [(parallel [(set (match_operand:PSI 0 "register_operand" "") + (mult:PSI (match_operand:PSI 1 "register_operand" "") + (match_operand:PSI 2 "nonmemory_operand" ""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { +if (s8_operand (operands[2], PSImode)) + { +rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode)); +emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1])); +DONE; + } + }) + +(define_insn "*umulqihipsi3" + [(set (match_operand:PSI 0 "register_operand" "=&r") +(mult:PSI (zero_extend:PSI (match_operand:QI 1 "register_operand" "r")) + (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"] + "AVR_HAVE_MUL" + "mul %1,%A2 + movw %A0,r0 + mul %1,%B2 + clr %C0 + add %B0,r0 + adc %C0,r1 + clr __zero_reg__" + [(set_attr "length" "7") + (set_attr "cc" "clobber")]) + +(define_insn "*umulhiqipsi3" + [(set (match_operand:PSI 0 "register_operand" "=&r") +(mult:PSI (zero_extend:PSI (match_operand:HI 2 "register_operand" "r")) + (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"] + "AVR_HAVE_MUL" + "mul %1,%A2 + movw %A0,r0 + mul %1,%B2 + add %B0,r0 + mov %C0,r1 + clr __zero_reg__ + adc %C0,__zero_reg__" + [(set_attr "length" "7") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "mulsqipsi3" + [(set (match_operand:PSI 0 "pseudo_register_operand" "=r") +(mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "r")) + (match_operand:PSI 2 "pseudo_register_or_const_int_operand""rn"))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:QI 25) +(match_dup 1)) + (set (reg:PSI 22) +(match_dup 2)) + (set (reg:PSI 18) +(mult:PSI (sign_extend:PSI (reg:QI 25)) + (reg:PSI 22))) + (set (match_dup 0) +(reg:PSI 18))]) + +(define_insn_and_split "*mulpsi3" + [(set (match_operand:PSI 0 "pseudo_register_operand" "=r") +(mult:PSI (match_operand:PSI 1 "pseudo_register_operand" "r") + (match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:PSI 18) +(match_dup 1)) + (set (reg:PSI 22) +(match_dup 2)) + (parallel [(set (reg:PSI 22) + (mult:PSI (reg:PSI 22) + (reg:PSI 18))) + (clobber (reg:QI 21)) + (clobber (reg:QI 25)) +
Re: [PATCH] regmove: Fix segfault when accessing call_used_regs
"Andreas Krebbel" writes: > Ok for mainline, 4.6, 4.5, and 4.4? OK everywhere, but (sorry to be picky)... > Index: gcc/regmove.c > === > *** gcc/regmove.c.orig > --- gcc/regmove.c > *** fixup_match_2 (rtx insn, rtx dst, rtx sr > *** 859,865 > if (REG_N_CALLS_CROSSED (REGNO (src)) == 0) > break; > > ! if (call_used_regs [REGNO (dst)] > || find_reg_fusage (p, CLOBBER, dst)) > break; > } > --- 859,865 > if (REG_N_CALLS_CROSSED (REGNO (src)) == 0) > break; > > ! if ((REGNO (dst) < FIRST_PSEUDO_REGISTER && call_used_regs [REGNO > (dst)]) > || find_reg_fusage (p, CLOBBER, dst)) > break; > } ...please use HARD_REGISTER_P (dst) instead. For one thing, it'll keep the line within the 80 column limit. Thanks, Richard
Re: [PATCH] [MIPS] Add Octeon2 cpu support to GCC
Andrew Pinski writes: > +(define_insn_reservation "octeon_imul3_o2" 6 > + (and (eq_attr "cpu" "octeon2") > + (eq_attr "type" "imul3,pop,clz")) > + " octeon_pipe1 + octeon_mult") Excess space before ". > Index: config/mips/mips.h > === > --- config/mips/mips.h(revision 182183) > +++ config/mips/mips.h(working copy) > @@ -250,7 +252,9 @@ struct mips_cpu_info { > #define TUNE_MIPS6000 (mips_tune == PROCESSOR_R6000) > #define TUNE_MIPS7000 (mips_tune == PROCESSOR_R7000) > #define TUNE_MIPS9000 (mips_tune == PROCESSOR_R9000) > -#define TUNE_OCTEON (mips_tune == PROCESSOR_OCTEON) > +#define TUNE_OCTEON (mips_tune == PROCESSOR_OCTEON \ > + || mips_tune == PROCESSOR_OCTEON2) > +#define TUNE_OCTEON2 (mips_tune == PROCESSOR_OCTEON2) > #define TUNE_SB1(mips_tune == PROCESSOR_SB1 > \ >|| mips_tune == PROCESSOR_SB1A) Do any follow-on patches need TUNE_OCTEON2? If not, let's leave it out for now. (I realise they'll use TARGET_OCTEON2.) OK for 4.7 with those (trivial) changes. Thanks, Richard
Re: Go patch committed: Multiplex goroutines onto OS threads
Rainer Orth writes: > Only a few minor compilation problems, fixed with the following patch. Thanks. Committed. Ian
Re: [RFC][libitm] Convert to c++11 atomics
On 11/30/2011 05:13 PM, Richard Henderson wrote: > The library is written in C++, so in theory we can use the real atomic<> > templates, etc. Except that we have the same horrid problem finding the C++ > headers as did for , so again we have a local copy of . > Blah. But given that it is a copy, the rest of the code is using the real > interfaces. > > This passes the testsuite on power7 (gcc110), which *is* picky about memory > barriers, but I seem to recall that the larger external tests that Velox used > were much better at picking out problems. And I've misplaced those... > > Torvald, if you'd be so kind as to cast another set of eyes across this, I'd > be grateful. I've committed the patch. r~
libgo patch committed: Update to weekly.2011-12-02
I have committed a patch to update libgo to the weekly.2011-12-02 release of the master library. In this message I have only included the diffs to files that are specific to the gccgo copy of the library. Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Committed to mainline. Ian diff -r 53056adb4c1e libgo/MERGE --- a/libgo/MERGE Tue Dec 13 10:51:45 2011 -0800 +++ b/libgo/MERGE Tue Dec 13 11:10:08 2011 -0800 @@ -1,4 +1,4 @@ -b4a91b693374 +0beb796b4ef8 The first line of this file holds the Mercurial revision number of the last merge done from the master library sources. diff -r 53056adb4c1e libgo/Makefile.am --- a/libgo/Makefile.am Tue Dec 13 10:51:45 2011 -0800 +++ b/libgo/Makefile.am Tue Dec 13 11:10:08 2011 -0800 @@ -233,7 +233,6 @@ toolexeclibgoexp_DATA = \ exp/ebnf.gox \ - exp/gui.gox \ $(exp_inotify_gox) \ exp/norm.gox \ exp/spdy.gox \ @@ -242,11 +241,6 @@ exp/terminal.gox \ exp/types.gox -toolexeclibgoexpguidir = $(toolexeclibgoexpdir)/gui - -toolexeclibgoexpgui_DATA = \ - exp/gui/x11.gox - toolexeclibgoexpsqldir = $(toolexeclibgoexpdir)/sql toolexeclibgoexpsql_DATA = \ @@ -447,6 +441,7 @@ runtime/go-map-len.c \ runtime/go-map-range.c \ runtime/go-nanotime.c \ + runtime/go-now.c \ runtime/go-new-map.c \ runtime/go-new.c \ runtime/go-panic.c \ @@ -576,6 +571,7 @@ go_html_files = \ go/html/const.go \ go/html/doc.go \ + go/html/doctype.go \ go/html/entity.go \ go/html/escape.go \ go/html/node.go \ @@ -888,7 +884,7 @@ go/time/sys_unix.go \ go/time/tick.go \ go/time/time.go \ - go/time/zoneinfo_posix.go \ + go/time/zoneinfo.go \ go/time/zoneinfo_unix.go go_unicode_files = \ @@ -1038,6 +1034,7 @@ go_crypto_x509_files = \ go/crypto/x509/cert_pool.go \ go/crypto/x509/pkcs1.go \ + go/crypto/x509/pkcs8.go \ go/crypto/x509/verify.go \ go/crypto/x509/x509.go go_crypto_xtea_files = \ @@ -1135,8 +1132,6 @@ go_exp_ebnf_files = \ go/exp/ebnf/ebnf.go \ go/exp/ebnf/parser.go -go_exp_gui_files = \ - go/exp/gui/gui.go go_exp_inotify_files = \ go/exp/inotify/inotify_linux.go go_exp_norm_files = \ @@ -1178,10 +1173,6 @@ go/exp/types/types.go \ go/exp/types/universe.go -go_exp_gui_x11_files = \ - go/exp/gui/x11/auth.go \ - go/exp/gui/x11/conn.go - go_exp_sql_driver_files = \ go/exp/sql/driver/driver.go \ go/exp/sql/driver/types.go @@ -1415,13 +1406,11 @@ go/text/template/exec.go \ go/text/template/funcs.go \ go/text/template/helper.go \ - go/text/template/parse.go \ - go/text/template/set.go + go/text/template/template.go go_text_template_parse_files = \ go/text/template/parse/lex.go \ go/text/template/parse/node.go \ - go/text/template/parse/parse.go \ - go/text/template/parse/set.go + go/text/template/parse/parse.go go_sync_atomic_files = \ go/sync/atomic/doc.go @@ -1725,14 +1714,12 @@ encoding/pem.lo \ encoding/xml.lo \ exp/ebnf.lo \ - exp/gui.lo \ exp/norm.lo \ exp/spdy.lo \ exp/sql.lo \ exp/ssh.lo \ exp/terminal.lo \ exp/types.lo \ - exp/gui/x11.lo \ exp/sql/driver.lo \ html/template.lo \ go/ast.lo \ @@ -2784,16 +2771,6 @@ @$(CHECK) .PHONY: exp/ebnf/check -@go_include@ exp/gui.lo.dep -exp/gui.lo.dep: $(go_exp_gui_files) - $(BUILDDEPS) -exp/gui.lo: $(go_exp_gui_files) - $(BUILDPACKAGE) -exp/gui/check: $(CHECK_DEPS) - @$(MKDIR_P) exp/gui - @$(CHECK) -.PHONY: exp/gui/check - @go_include@ exp/norm.lo.dep exp/norm.lo.dep: $(go_exp_norm_files) $(BUILDDEPS) @@ -2854,16 +2831,6 @@ @$(CHECK) .PHONY: exp/types/check -@go_include@ exp/gui/x11.lo.dep -exp/gui/x11.lo.dep: $(go_exp_gui_x11_files) - $(BUILDDEPS) -exp/gui/x11.lo: $(go_exp_gui_x11_files) - $(BUILDPACKAGE) -exp/gui/x11/check: $(CHECK_DEPS) - @$(MKDIR_P) exp/gui/x11 - @$(CHECK) -.PHONY: exp/gui/x11/check - @go_include@ exp/inotify.lo.dep exp/inotify.lo.dep: $(go_exp_inotify_files) $(BUILDDEPS) @@ -3686,8 +3653,6 @@ exp/ebnf.gox: exp/ebnf.lo $(BUILDGOX) -exp/gui.gox: exp/gui.lo - $(BUILDGOX) exp/inotify.gox: exp/inotify.lo $(BUILDGOX) exp/norm.gox: exp/norm.lo @@ -3703,9 +3668,6 @@ exp/types.gox: exp/types.lo $(BUILDGOX) -exp/gui/x11.gox: exp/gui/x11.lo - $(BUILDGOX) - exp/sql/driver.gox: exp/sql/driver.lo $(BUILDGOX) @@ -3950,6 +3912,7 @@ html/template/check \ go/ast/check \ $(go_build_check_omitted_since_it_calls_6g) \ + go/doc/check \ go/parser/check \ go/printer/check \ go/scanner/check \ diff -r 53056adb4c1e libgo/runtime/go-nanotime.c --- a/libgo/runtime/go-nanotime.c Tue Dec 13 10:51:45 2011 -0800 +++ b/libgo/runtime/go-nanotime.c Tue Dec 13 11:10:08 2011 -0800 @@ -6,17 +6,16 @@ #include -#include "go-assert.h" #include "runtime.h" +int64 runtime_nanotime (void) + __attribute__ ((no_split_stack)); + int64 runtime_nanotime (void) { - int i; struct timeval tv; - i = gettimeofday (&tv, NULL); - __go_assert (i == 0); - + gettimeofday (&tv, NULL); return (int64) tv.tv_sec * 10 + (int64) tv.tv_usec * 1000; } diff -r 53056adb4c1e libgo/runtime/go
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)
On Tue, Dec 13, 2011 at 10:56 AM, Richard Henderson wrote: > On 12/13/2011 10:26 AM, Sriraman Tallam wrote: >> Cool, this works for stores! It generates the movlps + movhps. I have >> to also make a similar change to another call to gen_sse2_movdqu for >> loads. Would it be ok to not do this when tune=core2? > > We can work something out. > > I'd like you to do the benchmarking to know if unaligned loads are really as > expensive as unaligned stores, and whether there are reformatting penalties > that make the movlps+movhps option for either load or store less attractive. Ok thanks, I will get back with the data. > > > r~
Re: fix TM-clone calls in C++ dumps
On 12/10/2011 11:32 AM, Aldy Hernandez wrote: > * trans-mem.c (ipa_tm_create_version_alias): Set DECL_CONTEXT and > DECL_LANG_SPECIFIC. Ok. r~
Re: [PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11
On 12/13/2011 02:03 PM, Jason Merrill wrote: On 12/13/2011 12:43 PM, Dodji Seketeli wrote: + if (TREE_CODE (expr) == PTRMEM_CST) + /* We don't need to test if a PTRMEM_CST is a constant value + because maybe_constant_value might crash and because + [temp.arg.nontype]/1 says it's not allowed as a template + argument anyway. */; A PTRMEM_CST is already constant, and a valid template argument for a parameter of pointer to member type, we just want to leave it in that form rather than lower it to a CONSTRUCTOR. But yes, the patch is OK after fixing the comment. Jason
Re: [PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11
On 12/13/2011 12:43 PM, Dodji Seketeli wrote: + if (TREE_CODE (expr) == PTRMEM_CST) + /* We don't need to test if a PTRMEM_CST is a constant value + because maybe_constant_value might crash and because + [temp.arg.nontype]/1 says it's not allowed as a template + argument anyway. */; A PTRMEM_CST is already constant, and a valid template argument for a parameter of pointer to member type, we just want to leave it in that form rather than lower it to a CONSTRUCTOR. Jason
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)
On 12/13/2011 10:26 AM, Sriraman Tallam wrote: > Cool, this works for stores! It generates the movlps + movhps. I have > to also make a similar change to another call to gen_sse2_movdqu for > loads. Would it be ok to not do this when tune=core2? We can work something out. I'd like you to do the benchmarking to know if unaligned loads are really as expensive as unaligned stores, and whether there are reformatting penalties that make the movlps+movhps option for either load or store less attractive. r~
Go patch committed: Fix last conversion patch
I came across a test case which showed that the conversion patch I just committed did not always use the correct types--it got a gimplification failure. The problem was that the builtin call was being recorded as returning the type for which it was first called, and that might be different from some later type. Since all types it will be used with will always look exactly the same, I just avoided the error by avoiding the caching. This code will be changed in the future to use the new Runtime::make_call interface anyhow. Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Committed to mainline. Ian diff -r 61ab46b28b40 go/expressions.cc --- a/go/expressions.cc Tue Dec 13 10:08:46 2011 -0800 +++ b/go/expressions.cc Tue Dec 13 10:38:21 2011 -0800 @@ -3669,7 +3669,7 @@ if (e->integer_type()->is_unsigned() && e->integer_type()->bits() == 8) { - static tree string_to_byte_array_fndecl; + tree string_to_byte_array_fndecl = NULL_TREE; ret = Gogo::call_builtin(&string_to_byte_array_fndecl, this->location(), "__go_string_to_byte_array", @@ -3681,7 +3681,7 @@ else { go_assert(e == Type::lookup_integer_type("int")); - static tree string_to_int_array_fndecl; + tree string_to_int_array_fndecl = NULL_TREE; ret = Gogo::call_builtin(&string_to_int_array_fndecl, this->location(), "__go_string_to_int_array",
Re: Go patch committed: Multiplex goroutines onto OS threads
Ian Lance Taylor writes: > Rainer Orth writes: > >> Ian Lance Taylor writes: >> >>> Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Tested >>> both with and without -fsplit-stack support. Committed to mainline. >> >> Once Go bootstrap works again on Solaris, I notice that there are many >> 64-bit testsuite failures, which have been introduced between 2025 >> (r181724) and 2030 (r181837), so this patch is the obvious culprit. > > I just committed another libgo update, which will no doubt lead to > further problems. Only a few minor compilation problems, fixed with the following patch. Rainer diff --git a/libgo/go/net/fd_select.go b/libgo/go/net/fd_select.go --- a/libgo/go/net/fd_select.go +++ b/libgo/go/net/fd_select.go @@ -85,7 +85,8 @@ func (p *pollster) WaitFD(s *pollServer, timeout = &tv } - var n, e int + var n int + var e error var tmpReadFds, tmpWriteFds syscall.FdSet for { // Temporary syscall.FdSet's into which the values are copied @@ -101,7 +102,7 @@ func (p *pollster) WaitFD(s *pollServer, break } } - if e != 0 { + if e != nil { return -1, 0, os.NewSyscallError("select", e) } if n == 0 { diff --git a/libgo/go/os/sys_uname.go b/libgo/go/os/sys_uname.go --- a/libgo/go/os/sys_uname.go +++ b/libgo/go/os/sys_uname.go @@ -10,7 +10,7 @@ import "syscall" func Hostname() (name string, err error) { var u syscall.Utsname - if errno := syscall.Uname(&u); errno != 0 { + if errno := syscall.Uname(&u); errno != nil { return "", NewSyscallError("uname", errno) } b := make([]byte, len(u.Nodename)) -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [patch] add __is_final trait to fix libstdc++/51365
Hi, > So the proposed patch just adds the __is_final intrinsic for use > internally by the library, to allow library changes so the test cases > in the bug report will pass. If preferred I won't even add __is_final > to the extend.texi docs, to leave it as an undocumented extension that > we could more easily remove (or deprecate) later if necessary. I think this makes absolutely sense. And documenting it seems fine: after all, for better and worse, 'final' itself is in C++11. For the internal library uses, on the other hand, I don't think we should overly document a trait like __is_empty_non_final (essentially an __and_) which, afaics, would usefully replace std::is_empty in various places for 4.7. Paolo
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)
On Mon, Dec 12, 2011 at 9:54 PM, Ira Rosen wrote: > > > gcc-patches-ow...@gcc.gnu.org wrote on 13/12/2011 04:05:57 AM: > >> On core2, unaligned vector load/store using movdqu is a very slow > operation. >> Experiments show it is six times slower than movdqa (aligned) and this is >> irrespective of whether the resulting data happens to be aligned or not. >> For Corei7, there is no performance difference between the two and on > AMDs, >> movdqu is only about 10% slower. >> >> This patch does not vectorize loops that need to generate the slow > unaligned >> memory load/stores on core2. >> >> >> Do not vectorize loops on Core2 that need to use unaligned >> vector load/stores. >> * tree-vect-stmts.c (is_slow_vect_unaligned_load_store): New function. >> (vect_analyze_stmt): Check if the vectorizable load/store is slow. >> * target.def (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New target hook. >> * doc/m.texi.in: Document new target hook: >> TARGET_SLOW_UNALIGNED_VECTOR_MEMOP >> * doc/m.texi: Regenerate. >> * config/i386/i386.c (ix86_slow_unaligned_vector_memop): New function. >> (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New macro. >> > > >> @@ -5065,27 +5112,43 @@ vect_analyze_stmt (gimple stmt, bool > *need_to_vect >> if (!bb_vinfo >> && (STMT_VINFO_RELEVANT_P (stmt_info) >> || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) >> + { >> ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL) >> || vectorizable_type_demotion (stmt, NULL, NULL, NULL) >> || vectorizable_conversion (stmt, NULL, NULL, NULL) >> || vectorizable_shift (stmt, NULL, NULL, NULL) >> || vectorizable_operation (stmt, NULL, NULL, NULL) >> || vectorizable_assignment (stmt, NULL, NULL, NULL) >> - || vectorizable_load (stmt, NULL, NULL, NULL, NULL) >> || vectorizable_call (stmt, NULL, NULL) >> - || vectorizable_store (stmt, NULL, NULL, NULL) >> - || vectorizable_reduction (stmt, NULL, NULL, NULL) >> + || vectorizable_reduction (stmt, NULL, NULL, NULL) >> || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); >> + >> + if (!ok) >> + { >> + ok = (vectorizable_load (stmt, NULL, NULL, NULL, NULL) >> + || vectorizable_store (stmt, NULL, NULL, NULL)); >> + >> + if (ok && is_slow_vect_unaligned_load_store (stmt)) >> + ok = false; > > Why not call is_slow_vect_unaligned_load_store from > vectorizable_load/store? Yes, I should have done that. Somehow, I missed that! > > Ira > > >> + } >> + } >> else >> { >> if (bb_vinfo) >> - ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) >> - || vectorizable_type_demotion (stmt, NULL, NULL, node) >> - || vectorizable_shift (stmt, NULL, NULL, node) >> - || vectorizable_operation (stmt, NULL, NULL, node) >> - || vectorizable_assignment (stmt, NULL, NULL, node) >> - || vectorizable_load (stmt, NULL, NULL, node, NULL) >> - || vectorizable_store (stmt, NULL, NULL, node)); >> + { >> + ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) >> + || vectorizable_type_demotion (stmt, NULL, NULL, node) >> + || vectorizable_shift (stmt, NULL, NULL, node) >> + || vectorizable_operation (stmt, NULL, NULL, node) >> + || vectorizable_assignment (stmt, NULL, NULL, node)); >> + if (!ok) >> + { >> + ok = (vectorizable_load (stmt, NULL, NULL, node, NULL) >> + || vectorizable_store (stmt, NULL, NULL, node)); >> + if (ok && is_slow_vect_unaligned_load_store (stmt)) >> + ok = false; >> + } >> + } >> } >
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)
On Mon, Dec 12, 2011 at 11:49 PM, Jakub Jelinek wrote: > On Mon, Dec 12, 2011 at 06:05:57PM -0800, Sriraman Tallam wrote: >> Do not vectorize loops on Core2 that need to use unaligned >> vector load/stores. >> * tree-vect-stmts.c (is_slow_vect_unaligned_load_store): New function. >> (vect_analyze_stmt): Check if the vectorizable load/store is slow. >> * target.def (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New target hook. >> * doc/m.texi.in: Document new target hook: >> TARGET_SLOW_UNALIGNED_VECTOR_MEMOP >> * doc/m.texi: Regenerate. >> * config/i386/i386.c (ix86_slow_unaligned_vector_memop): New function. >> (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New macro. > > IMHO it would be better if it didn't prevent vectorization of the loops > altogether, but lead to using aligned stores with an alignment check > before the vectorized loop if possible. > > Also, are unaligned loads equally expensive to unaligned stores? Unaligned stores are always expensive, irrespective of whether the data turns out to be aligned or not at run-time. Unaligned loads are only as expensive when the data item is unaligned. For unaligned loads of aligned data, the movdqu is still slow but by ~2x rather than 6x. > > See http://gcc.gnu.org/PR49442 for further info, this really should be done > using some cost model rather than a boolean hook. > > Jakub
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)
On Tue, Dec 13, 2011 at 9:58 AM, Richard Henderson wrote: > On 12/12/2011 06:05 PM, Sriraman Tallam wrote: >> On core2, unaligned vector load/store using movdqu is a very slow operation. >> Experiments show it is six times slower than movdqa (aligned) and this is >> irrespective of whether the resulting data happens to be aligned or not. >> For Corei7, there is no performance difference between the two and on AMDs, >> movdqu is only about 10% slower. >> >> This patch does not vectorize loops that need to generate the slow unaligned >> memory load/stores on core2. > > What happens if you temporarily disable > > /* ??? Similar to above, only less clear because of quote > typeless stores unquote. */ > if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES > && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) > { > op0 = gen_lowpart (V16QImode, op0); > op1 = gen_lowpart (V16QImode, op1); > emit_insn (gen_sse2_movdqu (op0, op1)); > return; > } > > so that the unaligned store happens via movlps + movhps? Cool, this works for stores! It generates the movlps + movhps. I have to also make a similar change to another call to gen_sse2_movdqu for loads. Would it be ok to not do this when tune=core2? Thanks, -Sri. > > > r~
Re: [libjava] Support 64-bit multilib for i?86-linux
On 12/13/2011 05:51 PM, Rainer Orth wrote: > 2011-12-13 Rainer Orth > > * configure.ac (i?86-*-linux*): Set SIGNAL_HANDLER_AUX. > * configure: Regenerate. > * include/i386-signal.h: Wrap in __i386__, include > java-signal-aux.h otherwise. OK, thanks. Andrew.
Go patch committed: Permit conversions between string and named slice
The Go language permits converting between string and []byte or []rune. It was recently changed to permit converting between string a named type whose base type is []byte or []rune. This patch implements that in gccgo. Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Committed to mainline. Ian Index: gcc/go/gofrontend/types.cc === --- gcc/go/gofrontend/types.cc (revision 182143) +++ gcc/go/gofrontend/types.cc (working copy) @@ -662,7 +662,7 @@ Type::are_convertible(const Type* lhs, c { if (rhs->integer_type() != NULL) return true; - if (rhs->is_slice_type() && rhs->named_type() == NULL) + if (rhs->is_slice_type()) { const Type* e = rhs->array_type()->element_type()->forwarded(); if (e->integer_type() != NULL @@ -673,9 +673,7 @@ Type::are_convertible(const Type* lhs, c } // A string may be converted to []byte or []int. - if (rhs->is_string_type() - && lhs->is_slice_type() - && lhs->named_type() == NULL) + if (rhs->is_string_type() && lhs->is_slice_type()) { const Type* e = lhs->array_type()->element_type()->forwarded(); if (e->integer_type() != NULL Index: gcc/go/gofrontend/expressions.cc === --- gcc/go/gofrontend/expressions.cc (revision 182154) +++ gcc/go/gofrontend/expressions.cc (working copy) @@ -3322,7 +3322,7 @@ Type_conversion_expression::do_lower(Gog mpfr_clear(imag); } - if (type->is_slice_type() && type->named_type() == NULL) + if (type->is_slice_type()) { Type* element_type = type->array_type()->element_type()->forwarded(); bool is_byte = element_type == Type::lookup_integer_type("uint8"); @@ -3621,20 +3621,11 @@ Type_conversion_expression::do_get_tree( integer_type_node, fold_convert(integer_type_node, expr_tree)); } - else if (type->is_string_type() - && (expr_type->array_type() != NULL - || (expr_type->points_to() != NULL - && expr_type->points_to()->array_type() != NULL))) + else if (type->is_string_type() && expr_type->is_slice_type()) { - Type* t = expr_type; - if (t->points_to() != NULL) - { - t = t->points_to(); - expr_tree = build_fold_indirect_ref(expr_tree); - } if (!DECL_P(expr_tree)) expr_tree = save_expr(expr_tree); - Array_type* a = t->array_type(); + Array_type* a = expr_type->array_type(); Type* e = a->element_type()->forwarded(); go_assert(e->integer_type() != NULL); tree valptr = fold_convert(const_ptr_type_node, Index: gcc/testsuite/go.test/test/convlit.go === --- gcc/testsuite/go.test/test/convlit.go (revision 181963) +++ gcc/testsuite/go.test/test/convlit.go (working copy) @@ -36,7 +36,7 @@ var good3 int = 1e9 var good4 float64 = 1e20 // explicit conversion of string is okay -var _ = []int("abc") +var _ = []rune("abc") var _ = []byte("abc") // implicit is not @@ -47,20 +47,20 @@ var _ []byte = "abc" // ERROR "cannot us type Tstring string var ss Tstring = "abc" -var _ = []int(ss) +var _ = []rune(ss) var _ = []byte(ss) // implicit is still not -var _ []int = ss // ERROR "cannot use|incompatible|invalid" +var _ []rune = ss // ERROR "cannot use|incompatible|invalid" var _ []byte = ss // ERROR "cannot use|incompatible|invalid" -// named slice is not -type Tint []int +// named slice is now ok +type Trune []rune type Tbyte []byte -var _ = Tint("abc") // ERROR "convert|incompatible|invalid" -var _ = Tbyte("abc") // ERROR "convert|incompatible|invalid" +var _ = Trune("abc") // ok +var _ = Tbyte("abc") // ok // implicit is still not -var _ Tint = "abc" // ERROR "cannot use|incompatible|invalid" +var _ Trune = "abc" // ERROR "cannot use|incompatible|invalid" var _ Tbyte = "abc" // ERROR "cannot use|incompatible|invalid" Index: gcc/testsuite/go.test/test/named1.go === --- gcc/testsuite/go.test/test/named1.go (revision 181963) +++ gcc/testsuite/go.test/test/named1.go (working copy) @@ -41,7 +41,6 @@ func main() { asBool(i < j) // ERROR "cannot use.*type bool.*as type Bool" _, b = m[2] // ERROR "cannot .* bool.*type Bool" - m[2] = 1, b // ERROR "cannot use.*type Bool.*as type bool" var inter interface{} _, b = inter.(Map) // ERROR "cannot .* bool.*type Bool" @@ -55,8 +54,8 @@ func main() { _, bb := <-c asBool(bb) // ERROR "cannot use.*type bool.*as type Bool" - _, b = <-c // ERROR "cannot .* bool.*type Bool" + _, b = <-c // ERROR "cannot .* bool.*type Bool" _ = b - asString(String(slice)) // ERROR "cannot .*type Slice.*type String" + asString(String(slice)) // ok }
Re: [patch] add __is_final trait to fix libstdc++/51365
On 13 December 2011 17:01, Paolo Carlini wrote: > Hi, >> >> This patch seems pretty simple and safe. Are you (Gaby and Paolo) arguing >> that even so, it shouldn't go in? > > As far as I'm concerned, definetely not! I also think that it would be great > if, for 4.7, Jon could handle the library issues with EBO by exploiting it. Yes, if this goes in for 4.7 I will definitely follow it with the library changes to make use of it. > I only meant to say that something seems to me more fundamentally wrong at > the design level about 'final' vs EBO, my hope is that for 4.8 we'll have a > longer term stable solution based on a ISO Committee position. In one of the earlier bug report comments I proposed a __gnu_cxx::is_final library trait to expose the __is_final(T) intrinsic for users, but decided against it precisely because I don't know how the committee will want to deal with the issue longer term. So the proposed patch just adds the __is_final intrinsic for use internally by the library, to allow library changes so the test cases in the bug report will pass. If preferred I won't even add __is_final to the extend.texi docs, to leave it as an undocumented extension that we could more easily remove (or deprecate) later if necessary.
Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)
On 12/12/2011 06:05 PM, Sriraman Tallam wrote: > On core2, unaligned vector load/store using movdqu is a very slow operation. > Experiments show it is six times slower than movdqa (aligned) and this is > irrespective of whether the resulting data happens to be aligned or not. > For Corei7, there is no performance difference between the two and on AMDs, > movdqu is only about 10% slower. > > This patch does not vectorize loops that need to generate the slow unaligned > memory load/stores on core2. What happens if you temporarily disable /* ??? Similar to above, only less clear because of quote typeless stores unquote. */ if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) { op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); emit_insn (gen_sse2_movdqu (op0, op1)); return; } so that the unaligned store happens via movlps + movhps? r~
Re: PR/51443: fix inline assembly in transaction_callable functions
On 12/13/11 11:49, Eric Botcazou wrote: This fixes the PR. You need to put the Component before the / in the ChangeLog, otherwise the commit won't be cross-referenced in Bugzilla. Funny, I just noticed. I will do so from now on. Thanks.
[libjava] Support 64-bit multilib for i?86-linux
This is the last patch necessary to have a full-blown i686-unknown-linux-gnu --enable-targets=all configuration to work. It requires http://gcc.gnu.org/ml/gcc-patches/2011-12/msg01011.html and fixes a compilation failure compiling the 64-bit libgcj multilib: $ /var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/./gcc/xgcc -shared-libgcc -B/var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/./gcc -nostdinc++ -L/var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/i686-unknown-linux-gnu/64/libstdc++-v3/src -L/var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/i686-unknown-linux-gnu/64/libstdc++-v3/src/.libs -B/vol/gcc/i686-unknown-linux-gnu/bin/ -B/vol/gcc/i686-unknown-linux-gnu/lib/ -isystem /vol/gcc/i686-unknown-linux-gnu/include -isystem /vol/gcc/i686-unknown-linux-gnu/sys-include -m64 -DHAVE_CONFIG_H -I. -I/vol/gcc/src/hg/trunk/local/libjava -I./include -I./gcj -I/vol/gcc/src/hg/trunk/local/libjava -Iinclude -I/vol/gcc/src/hg/trunk/local/libjava/include -I/vol/gcc/src/hg/trunk/local/libjava/classpath/include -Iclasspath/include -I/vol/gcc/src/hg/trunk/local/libjava/classpath/native/fdlibm -I/vol/gcc/src/hg/trunk/local/libjava/../boehm-gc/include -I../boehm-gc/include -I/vol/gcc/src/hg/trunk/local/libjava/libltdl -I/vol/gcc/src/hg/trunk/local/libjava/libltdl -I/vol/gcc/src/hg/trunk/local/libjava/.././libjava/../libgcc -I/vol/gcc/src/hg/trunk/local/libjava/../zlib -I/vol/gcc/src/hg/trunk/local/libjava/../libffi/include -I../libffi/include -fno-rtti -fnon-call-exceptions -fdollars-in-identifiers -Wswitch-enum -D_FILE_OFFSET_BITS=64 -ffloat-store -fomit-frame-pointer -Usun -Wextra -Wall -D_GNU_SOURCE -DPREFIX=\"/vol/gcc\" -DTOOLEXECLIBDIR=\"/vol/gcc/lib/../lib64\" -DJAVA_HOME=\"/vol/gcc\" -DBOOT_CLASS_PATH=\"/vol/gcc/share/java/libgcj-4.7.0.jar\" -DJAVA_EXT_DIRS=\"/vol/gcc/share/java/ext\" -DGCJ_ENDORSED_DIRS=\"/vol/gcc/share/java/gcj-endorsed\" -DGCJ_VERSIONED_LIBDIR=\"/vol/gcc/lib/../lib64/gcj-4.7.0-13\" -DPATH_SEPARATOR=\":\" -DECJ_JAR_FILE=\"\" -DLIBGCJ_DEFAULT_DATABASE=\"/vol/gcc/lib/../lib64/gcj-4.7.0-13/classmap.db\" -DLIBGCJ_DEFAULT_DATABASE_PATH_TAIL=\"gcj-4.7.0-13/classmap.db\" -fno-omit-frame-pointer -g -O2 -D_GNU_SOURCE -m64 -MT prims.lo -MD -MP -MF .deps/prims.Tpo -c /vol/gcc/src/hg/trunk/local/libjava/prims.cc -fPIC -DPIC -o .libs/prims.o /vol/gcc/src/hg/trunk/local/libjava/prims.cc: In function 'void _Jv_catch_fpe(int, siginfo_t*, void*)': /vol/gcc/src/hg/trunk/local/libjava/prims.cc:192:3: error: 'REG_EIP' was not declared in this scope /vol/gcc/src/hg/trunk/local/libjava/prims.cc:192:3: error: 'REG_EAX' was not declared in this scope /vol/gcc/src/hg/trunk/local/libjava/prims.cc:192:3: error: 'REG_EDX' was not declared in this scope It applies the same technique already used for the 32-bit multilib in the x86_64-*-linux* configuration. It allowed the i686-unknown-linux-gnu bootstrap to finish successfully and bootstrapped without regressions on x86_64-unknown-linux-gnu. Ok for mainline? Rainer 2011-12-13 Rainer Orth * configure.ac (i?86-*-linux*): Set SIGNAL_HANDLER_AUX. * configure: Regenerate. * include/i386-signal.h: Wrap in __i386__, include java-signal-aux.h otherwise. diff --git a/libjava/configure.ac b/libjava/configure.ac --- a/libjava/configure.ac +++ b/libjava/configure.ac @@ -1737,6 +1737,7 @@ case "${host}" in ;; i?86-*-linux*) SIGNAL_HANDLER=include/i386-signal.h +SIGNAL_HANDLER_AUX=include/x86_64-signal.h ;; # ia64-*) #SYSDEP_SOURCES=sysdep/ia64.c diff --git a/libjava/include/i386-signal.h b/libjava/include/i386-signal.h --- a/libjava/include/i386-signal.h +++ b/libjava/include/i386-signal.h @@ -1,7 +1,8 @@ // i386-signal.h - Catch runtime signals and turn them into exceptions // on an i386 based Linux system. -/* Copyright (C) 1998, 1999, 2001, 2002, 2006, 2007 Free Software Foundation +/* Copyright (C) 1998, 1999, 2001, 2002, 2006, 2007, 2011 + Free Software Foundation This file is part of libgcj. @@ -10,6 +11,8 @@ Libgcj License. Please consult the file details. */ +#ifdef __i386__ + #ifndef JAVA_SIGNAL_H #define JAVA_SIGNAL_H 1 @@ -165,3 +168,11 @@ while (0) #endif /* JAVA_SIGNAL_H */ +#else /* __i386__ */ + +/* This is for the 64-bit subsystem on i386. */ + +#define sigcontext_struct sigcontext +#include + +#endif /* __i386__ */ -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: PR/51443: fix inline assembly in transaction_callable functions
> This fixes the PR. You need to put the Component before the / in the ChangeLog, otherwise the commit won't be cross-referenced in Bugzilla. -- Eric Botcazou
Re: [PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11
Jason Merrill writes: > Let's check for PTRMEM_CST separately, before any of the other tests. Like this? I still need to run it through bootstrap ... gcc/cp/ PR c++/51476 * pt.c (convert_nontype_argument): Don't call maybe_constant_value for PTRMEM_CST nodes. gcc/testsuite/ PR c++/51476 * cpp0x/ptrmem-cst-arg1.C: New test. --- gcc/cp/pt.c | 10 +++--- gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C |9 + 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index bb5aa0c..d127a7b 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -5720,11 +5720,15 @@ convert_nontype_argument (tree type, tree expr, tsubst_flags_t complain) to a null value, but otherwise still need to be of a specific form. */ if (cxx_dialect >= cxx0x) { - if (INTEGRAL_OR_ENUMERATION_TYPE_P (type)) + if (TREE_CODE (expr) == PTRMEM_CST) + /* We don't need to test if a PTRMEM_CST is a constant value + because maybe_constant_value might crash and because + [temp.arg.nontype]/1 says it's not allowed as a template + argument anyway. */; + else if (INTEGRAL_OR_ENUMERATION_TYPE_P (type)) expr = maybe_constant_value (expr); else if (TYPE_PTR_P (type) - || (TYPE_PTR_TO_MEMBER_P (type) - && TREE_CODE (expr) != PTRMEM_CST)) + || TYPE_PTR_TO_MEMBER_P (type)) { tree folded = maybe_constant_value (expr); if (TYPE_PTR_P (type) ? integer_zerop (folded) diff --git a/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C new file mode 100644 index 000..b6c81d5 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C @@ -0,0 +1,9 @@ +// Origin PR c++/51476 +// { dg-options "-std=c++11" } + +template struct A {}; +struct B +{ +int i; +A<&B::i> a; // { dg-error "could not convert template argument" } +}; -- 1.7.6.4 -- Dodji
[PATCH] regmove: Fix segfault when accessing call_used_regs
Hi, a very high pseudo register number is needed here to actually cause a segfault. I observed it on s390 with a high value in max-completely-peeled-insns when compiling the href benchmark on s390. Ok for mainline, 4.6, 4.5, and 4.4? Bye, -Andreas- 2011-12-13 Andreas Krebbel * regmove.c (fixup_match_2): Only access call_used_regs with hard regs. Index: gcc/regmove.c === *** gcc/regmove.c.orig --- gcc/regmove.c *** fixup_match_2 (rtx insn, rtx dst, rtx sr *** 859,865 if (REG_N_CALLS_CROSSED (REGNO (src)) == 0) break; ! if (call_used_regs [REGNO (dst)] || find_reg_fusage (p, CLOBBER, dst)) break; } --- 859,865 if (REG_N_CALLS_CROSSED (REGNO (src)) == 0) break; ! if ((REGNO (dst) < FIRST_PSEUDO_REGISTER && call_used_regs [REGNO (dst)]) || find_reg_fusage (p, CLOBBER, dst)) break; }
[libffi] Build 64-bit multilib for i?86-linux
I've recently tried a i686-unknown-linux-gnu --enable-targets=all bootstrap, which failed to link the 64-bit jv-convert etc.: ./.libs/libgcj.so: undefined reference to `ffi_raw_call' ./.libs/libgcj.so: undefined reference to `ffi_prep_raw_closure_loc' ./.libs/libgcj.so: undefined reference to `ffi_prep_cif_machdep' ./.libs/libgcj.so: undefined reference to `ffi_prep_closure_loc' ./.libs/libgcj.so: undefined reference to `ffi_call' collect2: error: ld returned 1 exit status make[5]: *** [jv-convert] Error 1 This happens because libffi doesn't build the x86_64 objects in this configuration. The following patch fixes this and allowed (together with a libjava patch to be submitted shortly) the bootstrap to finish. I've also bootstrapped x86_64-unknown-linux-gnu without regressions. The same issue still exists in upstream libffi, and should be harmless without --enable-targets=all since the added files are empty then. Ok for mainline gcc? Rainer 2011-12-13 Rainer Orth * configure.ac (i?86-*-*): Set TARGET to X86_64. * configure: Regenerate. diff --git a/libffi/configure.ac b/libffi/configure.ac --- a/libffi/configure.ac +++ b/libffi/configure.ac @@ -99,7 +99,7 @@ case "$host" in TARGET=X86_64; TARGETDIR=x86 ;; i?86-*-*) - TARGET=X86; TARGETDIR=x86 + TARGET=X86_64; TARGETDIR=x86 ;; ia64*-*-*) -- - Rainer Orth, Center for Biotechnology, Bielefeld University
[Patch, Fortran] Some fixes for polymorphic coarrays
Two small fixes: a) There was an ICE when simplifying "THIS_IMAGE(caf)" (for -fcoarray=single); solution: Simply use internally lcobound(), which is identically (for a single image). b) There was an segfault of the compiled program when running "this_image(caf)" where "caf" is a corank-1 coarray. Calculating the extend of an assumed size array should be avoided ... The patch has been build and regtested on x86-64-linux. OK for the trunk? Tobias PS: There are still some other issues with polymorphic coarrays, see "Next steps" in http://users.physik.fu-berlin.de/~tburnus/coarray/README.txt for a list. For instance, there is an ICE if one tries to explicitly deallocate scalar polymorphic coarrays. 2011-12-13 Tobias Burnus * simplify.c (gfc_simplify_image_index): Directly call simplify_cobound. * trans-intrinsic.c (trans_this_image): Fix handling of corank = 1 arrays. 2011-12-13 Tobias Burnus * gfortran.dg/coarray/poly_run_3.f90: New. diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c index e82753a..282d88d 100644 --- a/gcc/fortran/simplify.c +++ b/gcc/fortran/simplify.c @@ -6227,10 +6227,6 @@ gfc_simplify_image_index (gfc_expr *coarray, gfc_expr *sub) gfc_expr * gfc_simplify_this_image (gfc_expr *coarray, gfc_expr *dim) { - gfc_ref *ref; - gfc_array_spec *as; - int d; - if (gfc_option.coarray != GFC_FCOARRAY_SINGLE) return NULL; @@ -6244,74 +6240,8 @@ gfc_simplify_this_image (gfc_expr *coarray, gfc_expr *dim) return result; } - gcc_assert (coarray->expr_type == EXPR_VARIABLE); - - /* Follow any component references. */ - as = coarray->symtree->n.sym->as; - for (ref = coarray->ref; ref; ref = ref->next) -if (ref->type == REF_COMPONENT) - as = ref->u.ar.as; - - if (as->type == AS_DEFERRED) -return NULL; - - if (dim == NULL) -{ - /* Multi-dimensional bounds. */ - gfc_expr *bounds[GFC_MAX_DIMENSIONS]; - gfc_expr *e; - - /* Simplify the bounds for each dimension. */ - for (d = 0; d < as->corank; d++) - { - bounds[d] = simplify_bound_dim (coarray, NULL, d + as->rank + 1, 0, - as, NULL, true); - if (bounds[d] == NULL || bounds[d] == &gfc_bad_expr) - { - int j; - - for (j = 0; j < d; j++) - gfc_free_expr (bounds[j]); - - return bounds[d]; - } - } - - /* Allocate the result expression. */ - e = gfc_get_expr (); - e->where = coarray->where; - e->expr_type = EXPR_ARRAY; - e->ts.type = BT_INTEGER; - e->ts.kind = gfc_default_integer_kind; - - e->rank = 1; - e->shape = gfc_get_shape (1); - mpz_init_set_ui (e->shape[0], as->corank); - - /* Create the constructor for this array. */ - for (d = 0; d < as->corank; d++) -gfc_constructor_append_expr (&e->value.constructor, - bounds[d], &e->where); - - return e; -} - else -{ - /* A DIM argument is specified. */ - if (dim->expr_type != EXPR_CONSTANT) - return NULL; - - d = mpz_get_si (dim->value.integer); - - if (d < 1 || d > as->corank) - { - gfc_error ("DIM argument at %L is out of bounds", &dim->where); - return &gfc_bad_expr; - } - - return simplify_bound_dim (coarray, NULL, d + as->rank, 0, as, NULL, - true); - } + /* For -fcoarray=single, this_image(A) is the same as lcobound(A). */ + return simplify_cobound (coarray, dim, NULL, 0); } diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c index 58112e3..5c964c1 100644 --- a/gcc/fortran/trans-intrinsic.c +++ b/gcc/fortran/trans-intrinsic.c @@ -1054,6 +1054,11 @@ trans_this_image (gfc_se * se, gfc_expr *expr) one always has a dim_arg argument. m = this_images() - 1 + if (corank == 1) + { + sub(1) = m + lcobound(corank) + return; + } i = rank min_var = min (rank + corank - 2, rank + dim_arg - 1) for (;;) @@ -1070,15 +1075,29 @@ trans_this_image (gfc_se * se, gfc_expr *expr) : m + lcobound(corank) */ + /* this_image () - 1. */ + tmp = fold_convert (type, gfort_gvar_caf_this_image); + tmp = fold_build2_loc (input_location, MINUS_EXPR, type, tmp, + build_int_cst (type, 1)); + if (corank == 1) +{ + /* sub(1) = m + lcobound(corank). */ + lbound = gfc_conv_descriptor_lbound_get (desc, + build_int_cst (TREE_TYPE (gfc_array_index_type), + corank+rank-1)); + lbound = fold_convert (type, lbound); + tmp = fold_build2_loc (input_location, PLUS_EXPR, type, tmp, lbound); + + se->expr = tmp; + return; +} + m = gfc_create_var (type, NULL); ml = gfc_create_var (type, NULL); loop_var = gfc_create_var (integer_type_node, NULL); min_var = gfc_create_var (integer_type_node, NULL); /* m = this_image () - 1. */ - tmp = fold_convert (type, gfort_gvar_caf_this_image); - tmp = fold_build2_loc (input_location, MINUS_EXPR, type, tmp, - build_int_cst (type, 1));
Re: [RFC] Port libitm to powerpc
On 12/12/2011 07:08 PM, David Edelsohn wrote: > If you don't want to grab the L2 cache line size directly, could you > default to 32 bytes on PPC32 and 128 bytes on PPC64 (__powerpc64__) ? Done. r~
[ada] Support 64-bit libgnat multilib on i?86-linux
I recently set up a bi-arch environment on i686-unknown-linux-gnu with --enable-targets=all to check which of a couple of problems I'm seeing on i386-pc-solaris2.1? are Solaris specific and which are generic. When trying to bootstrap such a compiler with all languages, Ada bootstrap failed compiling the 64-bit libada: $ /var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/./gcc/xgcc -B/var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/./gcc/ -B/vol/gcc/i686-unknown-linux-gnu/bin/ -B/vol/gcc/i686-unknown-linux-gnu/lib/ -isystem /vol/gcc/i686-unknown-linux-gnu/include -isystem /vol/gcc/i686-unknown-linux-gnu/sys-include-c -g -O2 -m64 -fpic -W -Wall -gnatpg -nostdinc -m64 a-finali.adb -o a-finali.o a-finali.ads:64:09: alignment for "Controlledb64s" must be at least 8 a-finali.ads:64:09: alignment for "Controlledr62s" must be at least 8 a-finali.ads:64:09: alignment for "Controlledt59s" must be at least 8 a-finali.ads:70:09: alignment for "Limited_Controlledb98s" must be at least 8 a-finali.ads:70:09: alignment for "Limited_Controlledr96s" must be at least 8 a-finali.ads:70:09: alignment for "Limited_Controlledt93s" must be at least 8 It turned out that this configuration incorrectly used system-linux-x86.ads for both multilibs. The following patch fixes this and allowed (together with a libffi and libjava patch to be submitted shortly) the bootstrap to complete. Ok for mainline? Rainer 2011-12-13 Rainer Orth * gcc-interface/Makefile.in (%86 linux%): (LIBGNAT_TARGET_PAIRS_32): Split off from LIBGNAT_TARGET_PAIRS. (LIBGNAT_TARGET_PAIRS_64): New. (LIBGNAT_TARGET_PAIRS): Add either depending on multilib. diff --git a/gcc/ada/gcc-interface/Makefile.in b/gcc/ada/gcc-interface/Makefile.in --- a/gcc/ada/gcc-interface/Makefile.in +++ b/gcc/ada/gcc-interface/Makefile.in @@ -1102,9 +1102,21 @@ ifeq ($(strip $(filter-out %86 linux%,$( a-exetim.ads -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [4.7][google] Adding a new option -fstack-protector-strong. (issue5461043)
Hi, further comments? Or ok for submit? And as suggested by Diego, I'd like to make it upstream and google branch. Thanks, -Han On Thu, Dec 8, 2011 at 4:55 PM, Han Shen(沈涵) wrote: > Hi, Jakub, thanks! Fixed! > > Hi, Andrew, it's good suggestion. Done. Also modified foo10. > > A small c++ test case was added also. > > Patches (also on http://codereview.appspot.com/5461043) > > diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c > index 8684721..0a7a9f7 100644 > --- a/gcc/cfgexpand.c > +++ b/gcc/cfgexpand.c > @@ -1507,15 +1507,38 @@ estimated_stack_frame_size (struct cgraph_node *node) > return size; > } > > +/* Helper routine to check if a record or union contains an array field. */ > + > +static int > +record_or_union_type_has_array (const_tree tree_type) > +{ > + tree fields = TYPE_FIELDS (tree_type); > + tree f; > + for (f = fields; f; f = DECL_CHAIN (f)) > + { > + if (TREE_CODE (f) == FIELD_DECL) > + { > + tree field_type = TREE_TYPE (f); > + if (RECORD_OR_UNION_TYPE_P (field_type)) > + return record_or_union_type_has_array (field_type); > + if (TREE_CODE (field_type) == ARRAY_TYPE) > + return 1; > + } > + } > + return 0; > +} > + > /* Expand all variables used in the function. */ > > static void > expand_used_vars (void) > { > tree var, outer_block = DECL_INITIAL (current_function_decl); > + referenced_var_iterator rvi; > VEC(tree,heap) *maybe_local_decls = NULL; > unsigned i; > unsigned len; > + int gen_stack_protect_signal = 0; > > /* Compute the phase of the stack frame for this function. */ > { > @@ -1548,6 +1571,28 @@ expand_used_vars (void) > } > } > > + FOR_EACH_REFERENCED_VAR (cfun, var, rvi) > + if (!is_global_var (var)) > + { > + tree var_type = TREE_TYPE (var); > + /* Examine local variables that have been address taken. */ > + if (TREE_ADDRESSABLE (var)) > + { > + ++gen_stack_protect_signal; > + break; > + } > + /* Examine local referenced variables that contain an array or are > + arrays. */ > + if (TREE_CODE (var) == VAR_DECL > + && (TREE_CODE (var_type) == ARRAY_TYPE > + || (RECORD_OR_UNION_TYPE_P (var_type) > + && record_or_union_type_has_array (var_type > + { > + ++gen_stack_protect_signal; > + break; > + } > + } > + > /* At this point all variables on the local_decls with TREE_USED > set are not associated with any block scope. Lay them out. */ > > @@ -1638,12 +1683,17 @@ expand_used_vars (void) > dump_stack_var_partition (); > } > > - /* There are several conditions under which we should create a > - stack guard: protect-all, alloca used, protected decls present. */ > + /* There are several conditions under which we should create a stack guard: > + protect-all, alloca used, protected decls present or a positive > + gen_stack_protect_signal. */ > if (flag_stack_protect == 2 > - || (flag_stack_protect > + || (flag_stack_protect == 1 > && (cfun->calls_alloca || has_protected_decls))) > create_stack_guard (); > + else if (flag_stack_protect == 3 > + && (gen_stack_protect_signal > + || cfun->calls_alloca || has_protected_decls)) > + create_stack_guard (); > > /* Assign rtl to each variable based on these partitions. */ > if (stack_vars_num > 0) > diff --git a/gcc/common.opt b/gcc/common.opt > index 55d3f2d..1ad9717 100644 > --- a/gcc/common.opt > +++ b/gcc/common.opt > @@ -1848,6 +1848,10 @@ fstack-protector-all > Common Report RejectNegative Var(flag_stack_protect, 2) > Use a stack protection method for every function > > +fstack-protector-strong > +Common Report RejectNegative Var(flag_stack_protect, 3) > +Use a smart stack protection method for certain functions > + > fstack-usage > Common RejectNegative Var(flag_stack_usage) > Output stack usage information on a per-function basis > diff --git a/gcc/testsuite/g++.dg/fstack-protector-strong.C > b/gcc/testsuite/g++.dg/fstack-protector-strong.C > new file mode 100644 > index 000..a4f0f81 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/fstack-protector-strong.C > @@ -0,0 +1,35 @@ > +/* Test that stack protection is done on chosen functions. */ > + > +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ > +/* { dg-options "-O2 -fstack-protector-strong" } */ > + > +class A > +{ > +public: > + A() {} > + ~A() {} > + void method(); > + int state; > +}; > + > +/* Frame address exposed to A::method via "this". */ > +int > +foo1 () > +{ > + A a; > + a.method (); > + return a.state; > +} > + > +/* Possible destroying foo2's stack via &a. */ > +int > +global_func (A& a); > + > +/* Frame address exposed to global_func. */ > +int foo2 () > +{ > + A a; > + return global_func (a); > +} > + > +/* { dg-final { scan-assembler-times "
Re: [RFC] Port libitm to powerpc
On 12/13/2011 01:51 AM, Iain Sandoe wrote: > works for me - modulo a couple of typos - probably already fixed in... > >> git://repo.or.cz/gcc/rth.git rth/tm-next > > ... but I haven't got a chance to check that out and rebuild this morning.. Thanks, no I didn't have all of these fixed. Applied to that branch and starting another run on linux. r~
Re: [Patch] Adjust diag-scans in vect-tests to fix fails on AVX/AVX2
On 12/12/2011 09:44 PM, Michael Zolotukhin wrote: > Should we introduce checks for each possible vector datatype (e.g. > vect_8byte_int_available, vect_16byte_int_available, > vect_32byte_int_available, vect_16byte_float_available, > vect_32byte_float_available etc.) along with a check for > prefer_something (e.g. vect_prefer_16byte)? Having such info, we'll be > able to distinguish all the cases, right? Meh. This seems excruciatingly error prone. I think we should simply re-write the test cases so that it doesn't matter how many times we see a particular string. Don't combine so many (potentially vectorizable) loops within a test case that it matters. r~
Re: PR/51443: fix inline assembly in transaction_callable functions
On 12/13/2011 07:37 AM, Aldy Hernandez wrote: > PR/51443 > * trans-mem.c (struct diagnose_tm): Remove saw_unsafe. > (diagnose_tm_1): Same. > (ipa_tm_execute): Do not test tm_may_enter_irr before we set it. > (ipa_tm_scan_irr_function): Return gracefully when no > DECL_STRUCT_FUNCTION. > (ipa_tm_scan_irr_block): Believe the user on TM attributes. Ok. r~
Re: [patch] add __is_final trait to fix libstdc++/51365
Hi, > > This patch seems pretty simple and safe. Are you (Gaby and Paolo) arguing > that even so, it shouldn't go in? As far as I'm concerned, definetely not! I also think that it would be great if, for 4.7, Jon could handle the library issues with EBO by exploiting it. I only meant to say that something seems to me more fundamentally wrong at the design level about 'final' vs EBO, my hope is that for 4.8 we'll have a longer term stable solution based on a ISO Committee position. Paolo
Re: [patch] add __is_final trait to fix libstdc++/51365
On 12/12/2011 09:14 AM, Gabriel Dos Reis wrote: On Mon, Dec 12, 2011 at 5:25 AM, Paolo Carlini wrote: I think being able to detect a final class is good enough for now, until we find out if there are real problems being encountered as people make more use of C++11. Maybe. But in my opinion we should not rush. Something is wrong here at a more fundamental level. I agree that we should wait a little bit for the dust to settle down. Users should avoid it, and implementors shouldn't go through hoops non commensurable with the benefits of "final". Maybe the "right" primitive is slightly different. This patch seems pretty simple and safe. Are you (Gaby and Paolo) arguing that even so, it shouldn't go in? Jason
Re: [PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11
On 12/13/2011 10:50 AM, Dodji Seketeli wrote: - if (INTEGRAL_OR_ENUMERATION_TYPE_P (type)) + if (INTEGRAL_OR_ENUMERATION_TYPE_P (type) + && TREE_CODE (expr) != PTRMEM_CST) expr = maybe_constant_value (expr); else if (TYPE_PTR_P (type) || (TYPE_PTR_TO_MEMBER_P (type) This could still break for a parameter of object pointer type. Let's check for PTRMEM_CST separately, before any of the other tests. Jason
[COMMITTED, PR 50628] More careful SRA sub-access propagation accross assignments
Hi, Richi approved the following patch to fix 50628 on IRC. Details about the problem and this particular patch are also in bugzilla. Of course, I bootstrapped and tested the patch today on x86_64-linux. Thanks, Martin 2011-12-13 Martin Jambor PR middle-end/50628 * tree-sra.c (propagate_subaccesses_across_link): Do not propagate sub-accesses of scalar accesses. Index: src/gcc/tree-sra.c === --- src.orig/gcc/tree-sra.c +++ src/gcc/tree-sra.c @@ -2234,21 +2234,23 @@ propagate_subaccesses_across_link (struc || racc->grp_unscalarizable_region) return false; - if (!lacc->first_child && !racc->first_child - && is_gimple_reg_type (racc->type)) + if (is_gimple_reg_type (racc->type)) { - tree t = lacc->base; - - lacc->type = racc->type; - if (build_user_friendly_ref_for_offset (&t, TREE_TYPE (t), lacc->offset, - racc->type)) - lacc->expr = t; - else + if (!lacc->first_child && !racc->first_child) { - lacc->expr = build_ref_for_model (EXPR_LOCATION (lacc->base), - lacc->base, lacc->offset, - racc, NULL, false); - lacc->grp_no_warning = true; + tree t = lacc->base; + + lacc->type = racc->type; + if (build_user_friendly_ref_for_offset (&t, TREE_TYPE (t), + lacc->offset, racc->type)) + lacc->expr = t; + else + { + lacc->expr = build_ref_for_model (EXPR_LOCATION (lacc->base), + lacc->base, lacc->offset, + racc, NULL, false); + lacc->grp_no_warning = true; + } } return false; }
Re: [PATCH, PR 51362] Make IPA-CP not ICE if estimate_ipcp_clone_size_and_time returns zero size
> On Tue, Dec 13, 2011 at 2:38 PM, Martin Jambor wrote: > > Hi, > > > > IPA-CP currently assumes that cloning estimates always have some > > positive size cost. However, there are apparently situations in which > > estimate_ipcp_clone_size_and_time does return zero size and which then > > mostly lead to divisions by zero or failed asserts. This patch avoids > > that by simply bumping the sizes to 1 in those cases. > > > > Bootstrapped and tested on x86_64-linux. OK for trunk? > > Ok. It sounds like some bug earlier, because offline functions are accounted to have size of 1, see estimate_function_body_sizes: /* Estimate static overhead for function prologue/epilogue and alignment. */ int size = 2; (later the value is divied by two) I will try to debug why this value is no longer accounted. Honza
[PATCH] Tidy replace_uses_by
After fixing PR51481 I noticed replace_uses_by does quite some unnecessary work despite being (one of) the core propagator routine(s). The following patch cleans it up to avoid doing useless work. LTO bootstrapped on x86_64-unknown-linux-gnu, testing in progress. I'll apply this tomorrow. Thanks, Richard. 2011-12-13 Richard Guenther * tree-cfg.c (replace_uses_by): Only mark blocks altered that will make a difference. Only recompute ADDR_EXPR invariantness if it could possibly have changed. Do so before folding the statement. Index: gcc/tree-cfg.c === --- gcc/tree-cfg.c (revision 182285) +++ gcc/tree-cfg.c (working copy) @@ -1592,7 +1596,7 @@ replace_uses_by (tree name, tree val) /* This can only occur for virtual operands, since for the real ones SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) would prevent replacement. */ - gcc_assert (!is_gimple_reg (name)); + gcc_checking_assert (!is_gimple_reg (name)); SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val) = 1; } } @@ -1604,28 +1608,37 @@ replace_uses_by (tree name, tree val) gimple orig_stmt = stmt; size_t i; - fold_stmt (&gsi); - stmt = gsi_stmt (gsi); - if (cfgcleanup_altered_bbs && !is_gimple_debug (stmt)) + /* Mark the block if we changed the last stmt in it. */ + if (cfgcleanup_altered_bbs + && stmt_ends_bb_p (stmt)) bitmap_set_bit (cfgcleanup_altered_bbs, gimple_bb (stmt)->index); - /* FIXME. This should go in update_stmt. */ - for (i = 0; i < gimple_num_ops (stmt); i++) + /* FIXME. It shouldn't be required to keep TREE_CONSTANT +on ADDR_EXPRs up-to-date on GIMPLE. Propagation will +only change sth from non-invariant to invariant, and only +when propagating integer constants. */ + if (TREE_CODE (val) == INTEGER_CST) + for (i = 0; i < gimple_num_ops (stmt); i++) + { + tree op = gimple_op (stmt, i); + /* Operands may be empty here. For example, the labels + of a GIMPLE_COND are nulled out following the creation + of the corresponding CFG edges. */ + if (op && TREE_CODE (op) == ADDR_EXPR) + recompute_tree_invariant_for_addr_expr (op); + } + + if (fold_stmt (&gsi)) { - tree op = gimple_op (stmt, i); - /* Operands may be empty here. For example, the labels - of a GIMPLE_COND are nulled out following the creation - of the corresponding CFG edges. */ - if (op && TREE_CODE (op) == ADDR_EXPR) - recompute_tree_invariant_for_addr_expr (op); + stmt = gsi_stmt (gsi); + maybe_clean_or_replace_eh_stmt (orig_stmt, stmt); } - maybe_clean_or_replace_eh_stmt (orig_stmt, stmt); update_stmt (stmt); } } - gcc_assert (has_zero_uses (name)); + gcc_checking_assert (has_zero_uses (name)); /* Also update the trees stored in loop structures. */ if (current_loops)
Re: [C++ Patch] PR 51464
OK. Jason
[PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11
Hello, In the example of the patch, we crash because convert_nontype_argument calls maybe_constant_value with the PTRMEM_CST for &B::i (in cxx0x mode) which ends up trying to poke at the offset of i, but struct B is not yet laid out. My understanding is that using a pointer to non-static member as an argument of the template A (in this context) is invalid anyway, so we could avoid trying to see if it's a constant value altogether, similar to what's done in the 'else if' right after the offending 'if'. Thus the patch below, bootstrapped and tested on x86_64-unknown-linux-gnu against trunk. gcc/cp/ PR c++/51476 * pt.c (convert_nontype_argument): Don't call maybe_constant_value for PTRMEM_CST nodes. gcc/testsuite/ PR c++/51476 * cpp0x/ptrmem-cst-arg1.C: New test. --- gcc/cp/pt.c |3 ++- gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C |9 + 2 files changed, 11 insertions(+), 1 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index bb5aa0c..8c9f97b 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -5720,7 +5720,8 @@ convert_nontype_argument (tree type, tree expr, tsubst_flags_t complain) to a null value, but otherwise still need to be of a specific form. */ if (cxx_dialect >= cxx0x) { - if (INTEGRAL_OR_ENUMERATION_TYPE_P (type)) + if (INTEGRAL_OR_ENUMERATION_TYPE_P (type) + && TREE_CODE (expr) != PTRMEM_CST) expr = maybe_constant_value (expr); else if (TYPE_PTR_P (type) || (TYPE_PTR_TO_MEMBER_P (type) diff --git a/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C new file mode 100644 index 000..b6c81d5 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C @@ -0,0 +1,9 @@ +// Origin PR c++/51476 +// { dg-options "-std=c++11" } + +template struct A {}; +struct B +{ +int i; +A<&B::i> a; // { dg-error "could not convert template argument" } +}; -- 1.7.6.4 -- Dodji
PR/51443: fix inline assembly in transaction_callable functions
A while back we redesigned how the may go irrevocable bit was set. In particular, we redesigned it so that the analysis was done after IPA-inline was done, so we didn't make incorrect assumptions about irrevocability. This means that any set or use of the tm_may_enter_irr bit before we calculate it in the IPA-tm pass is incorrect. The following patch removes such references. As is customary, this unearthed a few other buglets and cleanups, which I have also addressed. First, saw_unsafe is no longer needed. I removed it. Second, ipa_tm_scan_irr_function is now called with builtin operators such as new, so we need to exit gracefully when no cfun or CFG exists. Lastly, since we are being slightly more aggressive in determining irrevocability, we need to make sure that user annotated safe functions are respected. This fixes the PR. OK? PR/51443 * trans-mem.c (struct diagnose_tm): Remove saw_unsafe. (diagnose_tm_1): Same. (ipa_tm_execute): Do not test tm_may_enter_irr before we set it. (ipa_tm_scan_irr_function): Return gracefully when no DECL_STRUCT_FUNCTION. (ipa_tm_scan_irr_block): Believe the user on TM attributes. Index: testsuite/gcc.dg/tm/asm-1.c === --- testsuite/gcc.dg/tm/asm-1.c (revision 0) +++ testsuite/gcc.dg/tm/asm-1.c (revision 0) @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-fgnu-tm -O1" } */ + +static inline void asmfunc() +{ +__asm__(""); +} + +__attribute__((transaction_callable)) +void push() +{ +asmfunc(); +} Index: testsuite/g++.dg/tm/asm-1.c === --- testsuite/g++.dg/tm/asm-1.c (revision 0) +++ testsuite/g++.dg/tm/asm-1.c (revision 0) @@ -0,0 +1,22 @@ +// { dg-do compile } +// { dg-options "-fgnu-tm -O1" } + +template class shared_ptr { +public: +shared_ptr() { + __asm__ (""); +} +}; +template class deque { +public: +void push_back() { + ::new _Tp(); +} +}; +class Bar { + __attribute__((transaction_callable)) void push(); + deque > events; +}; +void Bar::push() { + events.push_back(); +} Index: trans-mem.c === --- trans-mem.c (revision 182244) +++ trans-mem.c (working copy) @@ -544,7 +544,6 @@ struct diagnose_tm unsigned int summary_flags : 8; unsigned int block_flags : 8; unsigned int func_flags : 8; - unsigned int saw_unsafe : 1; unsigned int saw_volatile : 1; gimple stmt; }; @@ -695,8 +694,6 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi else if (d->func_flags & DIAG_TM_SAFE) error_at (gimple_location (stmt), "asm not allowed in % function"); - else - d->saw_unsafe = true; break; case GIMPLE_TRANSACTION: @@ -711,8 +708,6 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi else if (d->func_flags & DIAG_TM_SAFE) error_at (gimple_location (stmt), "relaxed transaction in % function"); - else - d->saw_unsafe = true; inner_flags = DIAG_TM_RELAXED; } else if (gimple_transaction_subcode (stmt) & GTMA_IS_OUTER) @@ -727,8 +722,6 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi else if (d->func_flags & DIAG_TM_SAFE) error_at (gimple_location (stmt), "outer transaction in % function"); - else - d->saw_unsafe = true; inner_flags |= DIAG_TM_OUTER; } @@ -748,8 +741,6 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi walk_gimple_seq (gimple_transaction_body (stmt), diagnose_tm_1, diagnose_tm_1_op, &wi_inner); - - d->saw_unsafe |= d_inner.saw_unsafe; } } break; @@ -780,11 +771,6 @@ diagnose_tm_blocks (void) walk_gimple_seq (gimple_body (current_function_decl), diagnose_tm_1, diagnose_tm_1_op, &wi); - /* If we saw something other than a call that makes this function - unsafe, remember it so that the IPA pass only needs to scan calls. */ - if (d.saw_unsafe && !is_tm_safe_or_pure (current_function_decl)) -cgraph_local_info (current_function_decl)->tm_may_enter_irr = 1; - return 0; } @@ -3696,7 +3682,11 @@ ipa_tm_scan_irr_block (basic_block bb) break; d = get_cg_data (cgraph_get_node (fn)); - if (d->is_irrevocable) + + /* Return true if irrevocable, but above all, believe +the user. */ + if (d->is_irrevocable + && !is_tm_safe_or_pure (fn)) return true; } break; @@ -3880,6 +3870,11 @@ ipa_tm_scan_irr_function (struct cgraph_ VEC (basic_block, heap) *queue; bool ret = false; + /* Builtin operators (operator new, and such). */ + if (DECL_STRUCT_FUNCTION (node->decl) == NULL +
[PATCH, i386]: Fix PR testsuite/51524
Hello! Recent RA change disturbs delicate balance of register allocation, so a couple of scan tests begins to fail. Attached patch fixes these scan failures by forcing arguments of test function in registers. 2011-12-13 Uros Bizjak PR testsuite/51524 * gcc.target/i386/bmi2-mulx32-1.c (gen_mulx): Add attribute regparm(2). * gcc.target/i386/bmi2-mulx32-2.c (calc_mulx_u32): Ditto. Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN. Uros. Index: gcc.target/i386/bmi2-mulx32-1.c === --- gcc.target/i386/bmi2-mulx32-1.c (revision 182192) +++ gcc.target/i386/bmi2-mulx32-1.c (working copy) @@ -15,7 +15,7 @@ return res; } -__attribute__((noinline)) +__attribute__((noinline, regparm (2))) unsigned long long gen_mulx (unsigned a, unsigned b) { Index: gcc.target/i386/bmi2-mulx32-2.c === --- gcc.target/i386/bmi2-mulx32-2.c (revision 182192) +++ gcc.target/i386/bmi2-mulx32-2.c (working copy) @@ -17,7 +17,7 @@ return res; } -__attribute__((noinline)) +__attribute__((noinline, regparm (2))) unsigned calc_mulx_u32 (unsigned x, unsigned y, unsigned *res_h) { return (unsigned) _mulx_u32 (x, y, res_h);
[PATCH] Fix PR48354
This fixes PR48354 by streaming DECL_ORIGINAL_TYPE. LTO bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2011-12-13 Richard Guenther PR lto/48354 * tree.c (find_decls_types_r): Also walk DECL_ORIGINAL_TYPE. * tree-streamer-in.c (lto_input_ts_decl_non_common_tree_pointers): Stream DECL_ORIGINAL_TYPE. * tree-streamer-out.c (write_ts_decl_non_common_tree_pointers): Likewise. lto/ * lto.c (lto_ft_decl_non_common): When we merged DECL_ORIGINAL_TYPE with the type of the TYPE_DECL clear DECL_ORIGINAL_TYPE. * g++.dg/lto/pr48354-1_0.C: New testcase. Index: gcc/tree.c === *** gcc/tree.c.orig 2011-12-13 13:36:34.0 +0100 --- gcc/tree.c 2011-12-13 13:41:24.0 +0100 *** find_decls_types_r (tree *tp, int *ws, v *** 4796,4801 --- 4796,4802 { fld_worklist_push (DECL_ARGUMENT_FLD (t), fld); fld_worklist_push (DECL_VINDEX (t), fld); + fld_worklist_push (DECL_ORIGINAL_TYPE (t), fld); } else if (TREE_CODE (t) == FIELD_DECL) { Index: gcc/testsuite/g++.dg/lto/pr48354-1_0.C === *** /dev/null 1970-01-01 00:00:00.0 + --- gcc/testsuite/g++.dg/lto/pr48354-1_0.C 2011-12-13 13:41:24.0 +0100 *** *** 0 --- 1,16 + // { dg-lto-do link } + // { dg-lto-options { { -g -flto } } } + // { dg-extra-ld-options "-r -nostdlib" } + + template struct Identity { typedef T type; }; + struct S { + typedef void (S::*FP)(); + FP fp; + }; + void g(); + void f() { + typedef Identity::type Dummy; + S s; + g(); + } + Index: gcc/tree-streamer-in.c === *** gcc/tree-streamer-in.c.orig 2011-12-13 13:36:34.0 +0100 --- gcc/tree-streamer-in.c 2011-12-13 13:41:24.0 +0100 *** lto_input_ts_decl_non_common_tree_pointe *** 602,607 --- 602,609 DECL_ARGUMENTS (expr) = stream_read_tree (ib, data_in); DECL_RESULT (expr) = stream_read_tree (ib, data_in); } + else if (TREE_CODE (expr) == TYPE_DECL) + DECL_ORIGINAL_TYPE (expr) = stream_read_tree (ib, data_in); DECL_VINDEX (expr) = stream_read_tree (ib, data_in); } Index: gcc/tree-streamer-out.c === *** gcc/tree-streamer-out.c.orig2011-12-13 13:36:34.0 +0100 --- gcc/tree-streamer-out.c 2011-12-13 13:41:24.0 +0100 *** write_ts_decl_non_common_tree_pointers ( *** 508,513 --- 508,515 stream_write_tree (ob, DECL_ARGUMENTS (expr), ref_p); stream_write_tree (ob, DECL_RESULT (expr), ref_p); } + else if (TREE_CODE (expr) == TYPE_DECL) + stream_write_tree (ob, DECL_ORIGINAL_TYPE (expr), ref_p); stream_write_tree (ob, DECL_VINDEX (expr), ref_p); } Index: gcc/lto/lto.c === *** gcc/lto/lto.c.orig 2011-12-12 16:55:19.0 +0100 --- gcc/lto/lto.c 2011-12-13 13:48:19.0 +0100 *** lto_ft_decl_non_common (tree t) *** 381,386 --- 381,393 LTO_FIXUP_TREE (DECL_ARGUMENT_FLD (t)); LTO_FIXUP_TREE (DECL_RESULT_FLD (t)); LTO_FIXUP_TREE (DECL_VINDEX (t)); + /* The C frontends may create exact duplicates for DECL_ORIGINAL_TYPE + like for 'typedef enum foo foo'. We have no way of avoiding to + merge them and dwarf2out.c cannot deal with this, + so fix this up by clearing DECL_ORIGINAL_TYPE in this case. */ + if (TREE_CODE (t) == TYPE_DECL + && DECL_ORIGINAL_TYPE (t) == TREE_TYPE (t)) + DECL_ORIGINAL_TYPE (t) = NULL_TREE; } /* Fix up fields of a decl_non_common T. */
Re: Fix flags for edges from/to entry/exit basic blocks (issue5486043)
On 11-12-13 09:22 , Diego Novillo wrote: When testing the patch, you should just run 'make -jN -k check' from the toplevel build directory, do not just test a subset of the testsuite. Additionally, you will need to configure and build with all the default languages. Your change affects everything. Diego.
Re: Fix flags for edges from/to entry/exit basic blocks (issue5486043)
On Tue, Dec 13, 2011 at 05:43, Dmitry Vyukov wrote: > Sorry for the trouble. What should I do now? > Just in case I've prepared a patch that rolls it back: > http://codereview.appspot.com/5485054 Dmitry, please roll back the patch for now. You should be able to see the failures if you run full checking (I missed the fact that you had not run full checking in your initial patch, sorry). When testing the patch, you should just run 'make -jN -k check' from the toplevel build directory, do not just test a subset of the testsuite. Diego.
Re: Go patch committed: Multiplex goroutines onto OS threads
Rainer Orth writes: > Ian Lance Taylor writes: > >> of any differences between Solaris and GNU/Linux when it comes to the >> getcontext, setcontext, and makecontext functions? > > I've just found something in Solaris 11 makecontext(3C) (NOTES section), > but that doesn't explain why 32-bit Solaris/x86 works, while 64-bit > doesn't: > > http://docs.oracle.com/cd/E23824_01/html/821-1465/makecontext-3c.html Wow, that's annoying. You're right, though, it seems that that ought to affect both x86 and x86_64. Ian
Re: [PATCH, PR 51362] Make IPA-CP not ICE if estimate_ipcp_clone_size_and_time returns zero size
On Tue, Dec 13, 2011 at 2:38 PM, Martin Jambor wrote: > Hi, > > IPA-CP currently assumes that cloning estimates always have some > positive size cost. However, there are apparently situations in which > estimate_ipcp_clone_size_and_time does return zero size and which then > mostly lead to divisions by zero or failed asserts. This patch avoids > that by simply bumping the sizes to 1 in those cases. > > Bootstrapped and tested on x86_64-linux. OK for trunk? Ok. Thanks, Richard. > Thanks, > > Martin > > > 2011-12-12 Martin Jambor > > PR tree-optimization/51362 > * ipa-cp.c (estimate_local_effects): When estimated size of a > specialized clone is zero, bump it to one. > > * testsuite/gcc.dg/ipa/pr51362.c: New test. > > Index: src/gcc/ipa-cp.c > === > --- src.orig/gcc/ipa-cp.c > +++ src/gcc/ipa-cp.c > @@ -1409,6 +1409,14 @@ estimate_local_effects (struct cgraph_no > + devirtualization_time_bonus (node, known_csts, known_binfos) > + removable_params_cost + emc; > > + gcc_checking_assert (size >=0); > + /* The inliner-heuristics based estimates may think that in certain > + contexts some functions do not have any size at all but we want > + all specializations to have at least a tiny cost, not least not > to > + divide by zero. */ > + if (size == 0) > + size = 1; > + > if (dump_file && (dump_flags & TDF_DETAILS)) > { > fprintf (dump_file, " - estimates for value "); > Index: src/gcc/testsuite/gcc.dg/ipa/pr51362.c > === > --- /dev/null > +++ src/gcc/testsuite/gcc.dg/ipa/pr51362.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fipa-cp -fipa-cp-clone" } */ > + > +int > +baz (void) > +{ > + return 0; > +} > + > +int make_mess; > + > +__attribute__ ((noinline)) > +int bar (int x, int (*f) (void)) > +{ > + return f (); > +} > + > +int > +foo (void) > +{ > + return bar (1, baz); > +}
[Patch]: Guard the call to begin_epilogue
Hi, currently, the call to begin_epilogue debug hook in final_scan_insn is unconditional and therefore occurs even when DECL_IGNORED_P is true. This is not what is done for other debug hooks, and leads to ICE on VMS (the only platform that defines the hook) as last_filename is NULL. Tested on alpha64-dec-openvms, committed to trunk as obvious. Tristan. 2011-12-13 Tristan Gingold * final.c (final_scan_insn): Guard the call to begin_epilogue debug hook. Index: final.c === --- final.c (revision 182280) +++ final.c (working copy) @@ -1973,7 +1973,8 @@ break; case NOTE_INSN_EPILOGUE_BEG: - (*debug_hooks->begin_epilogue) (last_linenum, last_filename); + if (!DECL_IGNORED_P (current_function_decl)) +(*debug_hooks->begin_epilogue) (last_linenum, last_filename); targetm.asm_out.function_begin_epilogue (file); break;
[PATCH, PR 51362] Make IPA-CP not ICE if estimate_ipcp_clone_size_and_time returns zero size
Hi, IPA-CP currently assumes that cloning estimates always have some positive size cost. However, there are apparently situations in which estimate_ipcp_clone_size_and_time does return zero size and which then mostly lead to divisions by zero or failed asserts. This patch avoids that by simply bumping the sizes to 1 in those cases. Bootstrapped and tested on x86_64-linux. OK for trunk? Thanks, Martin 2011-12-12 Martin Jambor PR tree-optimization/51362 * ipa-cp.c (estimate_local_effects): When estimated size of a specialized clone is zero, bump it to one. * testsuite/gcc.dg/ipa/pr51362.c: New test. Index: src/gcc/ipa-cp.c === --- src.orig/gcc/ipa-cp.c +++ src/gcc/ipa-cp.c @@ -1409,6 +1409,14 @@ estimate_local_effects (struct cgraph_no + devirtualization_time_bonus (node, known_csts, known_binfos) + removable_params_cost + emc; + gcc_checking_assert (size >=0); + /* The inliner-heuristics based estimates may think that in certain +contexts some functions do not have any size at all but we want +all specializations to have at least a tiny cost, not least not to +divide by zero. */ + if (size == 0) + size = 1; + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, " - estimates for value "); Index: src/gcc/testsuite/gcc.dg/ipa/pr51362.c === --- /dev/null +++ src/gcc/testsuite/gcc.dg/ipa/pr51362.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fipa-cp -fipa-cp-clone" } */ + +int +baz (void) +{ + return 0; +} + +int make_mess; + +__attribute__ ((noinline)) +int bar (int x, int (*f) (void)) +{ + return f (); +} + +int +foo (void) +{ + return bar (1, baz); +}
[vms/committed]: fix crash in vmsdbgout.c
Hi, there were calls in vmsdbgout.c to dwarf2out_source_line outside the normal flow: in end_prologue, begin_epilogue, end_epilogue. This is useless and not not expected by dwarf2out. This patch removes these extra calls, and fixes a crash. Tested by building c+ada for alpha64-dec-openvms. Committed to trunk. Tristan. 2011-12-13 Tristan Gingold * vmsdbgout.c (vmsdbgout_write_source_line): New function. (vmsdbgout_end_prologue): Call vmsdbgout_write_source_line. (vmsdbgout_begin_epilogue): Likewise. (vmsdbgout_end_epilogue): Likewise. (vmsdbgout_source_line): Move code to vmsdbgout_write_source_line. Index: vmsdbgout.c === --- vmsdbgout.c (revision 182280) +++ vmsdbgout.c (working copy) @@ -158,6 +158,7 @@ static void vmsdbgout_end_block (unsigned int, unsigned int); static bool vmsdbgout_ignore_block (const_tree); static void vmsdbgout_source_line (unsigned int, const char *, int, bool); +static void vmsdbgout_write_source_line (unsigned, const char *, int , bool); static void vmsdbgout_begin_prologue (unsigned int, const char *); static void vmsdbgout_end_prologue (unsigned int, const char *); static void vmsdbgout_end_function (unsigned int); @@ -1162,7 +1163,7 @@ ASM_OUTPUT_LABEL (asm_out_file, label); /* VMS PCA expects every PC range to correlate to some line and file. */ - vmsdbgout_source_line (line, file, 0, true); + vmsdbgout_write_source_line (line, file, 0, true); } } @@ -1202,7 +1203,7 @@ } @@ -1202,7 +1203,7 @@ /* VMS PCA expects every PC range to correlate to some line and file. */ - vmsdbgout_source_line (line, file, 0, true); + vmsdbgout_write_source_line (line, file, 0, true); } } } @@ -1228,7 +1229,7 @@ ASM_OUTPUT_LABEL (asm_out_file, label); /* VMS PCA expects every PC range to correlate to some line and file. */ - vmsdbgout_source_line (line, file, 0, true); + vmsdbgout_write_source_line (line, file, 0, true); } } @@ -1388,6 +1389,31 @@ 'line_info_table' for later output of the .debug_line section. */ static void +vmsdbgout_write_source_line (unsigned line, const char *filename, + int discriminator, bool is_stmt) +{ + dst_line_info_ref line_info; + + targetm.asm_out.internal_label (asm_out_file, LINE_CODE_LABEL, + line_info_table_in_use); + + /* Expand the line info table if necessary. */ + if (line_info_table_in_use == line_info_table_allocated) +{ + line_info_table_allocated += LINE_INFO_TABLE_INCREMENT; + line_info_table = XRESIZEVEC (dst_line_info_entry, line_info_table, +line_info_table_allocated); +} + + /* Add the new entry at the end of the line_info_table. */ + line_info = &line_info_table[line_info_table_in_use++]; + line_info->dst_file_num = lookup_filename (filename); + line_info->dst_line_num = line; + if (line > file_info_table[line_info->dst_file_num].max_line) +file_info_table[line_info->dst_file_num].max_line = line; +} + +static void - - targetm.asm_out.internal_label (asm_out_file, LINE_CODE_LABEL, - line_info_table_in_use); - - /* Expand the line info table if necessary. */ - if (line_info_table_in_use == line_info_table_allocated) - { - line_info_table_allocated += LINE_INFO_TABLE_INCREMENT; - line_info_table = XRESIZEVEC (dst_line_info_entry, line_info_table, - line_info_table_allocated); - } - - /* Add the new entry at the end of the line_info_table. */ - line_info = &line_info_table[line_info_table_in_use++]; - line_info->dst_file_num = lookup_filename (filename); - line_info->dst_line_num = line; - if (line > file_info_table[line_info->dst_file_num].max_line) - file_info_table[line_info->dst_file_num].max_line = line; -} +vmsdbgout_write_source_line (line, filename, discriminator, is_stmt); } /* Record the beginning of a new source file, for later output.
Re: [PATCH, PR51491] add CLOBBER before __builtin_stack_restore when converting vla alloca_with_align to array decl
On Tue, Dec 13, 2011 at 01:26:42PM +0100, Tom de Vries wrote: > 2011-12-13 Tom de Vries > > PR tree-optimization/51491 > * tree-ssa-ccp.c (insert_clobber_before_stack_restore): New function. > (ccp_fold_stmt): Use insert_clobber_before_stack_restore after a > successful fold_builtin_alloca_with_align. I don't think this is safe. You don't want to look for any following __builtin_stack_restore, but for the matching one. Consider: int g (int *); int f (int n) { int tt = 0; int t = 4; { int a[t #ifdef DIFFERENT_BB2 + (tt != 0 ? 6 : 0) #endif ]; tt = g (a); { int b[n]; tt += g (b); #ifdef DIFFERENT_BB if (n > 20) tt += 148 * g (b); #endif tt += b[0]; } tt += a[0]; } { int a[4]; tt += g (a); tt += a[0]; } return tt; } Without any defines, this shows that looking for the first BUILT_IN_STACK_RESTORE is wrong if you ignore BUILT_IN_STACK_SAVE calls. And with the various defines it shows that neither the corresponding __builtin_stack_save nor __builtin_stack_restore have to be in the same bb as __builtin_alloca_with_align that is being folded. Perhaps you want to look for the closest enclosing __builtin_stack_save (search backwards in current basic block, its immediate dominator etc.?), remember what SSA_NAME it stores its result into and then just look at where is the (single?) user of that, which ought to be __builtin_stack_restore. Jakub
[PATCH] Fix PR51481
This fixes the maybe_clean_or_replace_eh_stmt call in replace_uses_by. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2011-12-13 Richard Guenther PR middle-end/51481 * tree-cfg.c (replace_uses_by): Pass proper arguments to maybe_clean_or_replace_eh_stmt. Index: gcc/tree-cfg.c === --- gcc/tree-cfg.c (revision 182220) +++ gcc/tree-cfg.c (working copy) @@ -1601,6 +1605,7 @@ replace_uses_by (tree name, tree val) if (gimple_code (stmt) != GIMPLE_PHI) { gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gimple orig_stmt = stmt; size_t i; fold_stmt (&gsi); @@ -1619,7 +1624,7 @@ replace_uses_by (tree name, tree val) recompute_tree_invariant_for_addr_expr (op); } - maybe_clean_or_replace_eh_stmt (stmt, stmt); + maybe_clean_or_replace_eh_stmt (orig_stmt, stmt); update_stmt (stmt); } }
[PATCH] Fix PR51519
This fixes PR51519. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2011-12-13 Richard Guenther PR tree-optimization/51519 * ipa-inline.c (edge_badness): Use edge growth in non-guessed branch probability case as well. * gcc.dg/pr51519.c: New testcase. Index: gcc/ipa-inline.c === --- gcc/ipa-inline.c(revision 182220) +++ gcc/ipa-inline.c(working copy) @@ -861,7 +861,7 @@ edge_badness (struct cgraph_edge *edge, else { int nest = MIN (inline_edge_summary (edge)->loop_depth, 8); - badness = estimate_growth (callee) * 256; + badness = growth * 256; /* Decrease badness if call is nested. */ if (badness > 0) Index: gcc/testsuite/gcc.dg/pr51519.c === --- gcc/testsuite/gcc.dg/pr51519.c (revision 0) +++ gcc/testsuite/gcc.dg/pr51519.c (revision 0) @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fno-guess-branch-probability -findirect-inlining" } */ + +void fe (void); +int i; + +static inline void +FX (void (*f) (void)) +{ + fe (); + (*f) (); +} + +static inline void +f4 () +{ + if (i) +FX (fe); +} + +static inline void +f3 (void) +{ + f4 (); + if (i) +FX (f4); +} + +static inline void +f2 (void) +{ + FX (&f3); +} + +void +f1 (void) +{ + FX (&f2); +}
[PATCH] New testcase for -flto -g
Committed. Richard. 2011-12-13 Richard Guenther * gcc.dg/lto/20111213-1_0.c: New testcase. Index: gcc/testsuite/gcc.dg/lto/20111213-1_0.c === --- gcc/testsuite/gcc.dg/lto/20111213-1_0.c (revision 0) +++ gcc/testsuite/gcc.dg/lto/20111213-1_0.c (revision 0) @@ -0,0 +1,8 @@ +/* { dg-lto-do link } */ +/* { dg-lto-options { { -flto -g } } } */ +/* { dg-extra-ld-options {-r -nostdlib} } */ + +void gfc_be_parse_file (void) +{ + typedef enum builtin_type builtin_type; +}
[PATCH, PR51491] add CLOBBER before __builtin_stack_restore when converting vla alloca_with_align to array decl
Jakub, I have a patch for PR51491. Consider the following test-case, compiled with gcc -O2 -S f.c -fdump-tree-all: ... int f(void) { int tt = 0; int t = 4; { int a[t]; tt = g(a); tt += a[0]; } { int a[4]; tt += g(a); tt += a[0]; } return tt; } ... The vla a[t] is gimplified to an alloca_with_align, with a __builtin_stack_save/__builtin_stack_restore pair inserted at the scope boundaries. This alloca_with_align is folded by ccp into an array declaration, resulting in this representation at f.c.149t.optimized: ... f () { D.1726[16]; int a[4]; int tt; int D.1722; int D.1721; void * saved_stack.2; int D.1719; : saved_stack.2_3 = __builtin_stack_save (); tt_18 = g (&D.1726); D.1719_19 = MEM[(int[0:D.1713] *)&D.1726][0]; tt_20 = D.1719_19 + tt_18; __builtin_stack_restore (saved_stack.2_3); D.1721_21 = g (&a); tt_22 = D.1721_21 + tt_20; D.1722_23 = a[0]; tt_24 = D.1722_23 + tt_22; a ={v} {CLOBBER}; return tt_24; } ... The patch inserts this clobber of D.1726 when alloca_with_align is folded: ... @@ -13,6 +13,7 @@ tt_18 = g (&D.1726); D.1719_19 = MEM[(int[0:D.1713] *)&D.1726][0]; tt_20 = D.1719_19 + tt_18; + D.1726 ={v} {CLOBBER}; __builtin_stack_restore (saved_stack.2_3); D.1721_21 = g (&a); tt_22 = D.1721_21 + tt_20; ... This allows D.1726 and a[4] to be mapped onto the same stack space, as can be seen in expand dump: ... Partition 0: size 16 align 16 D.1726 a ... Bootstrapped and reg-tested (ada inclusive) on x86_64. OK for stage3? Thanks, - Tom 2011-12-13 Tom de Vries PR tree-optimization/51491 * tree-ssa-ccp.c (insert_clobber_before_stack_restore): New function. (ccp_fold_stmt): Use insert_clobber_before_stack_restore after a successful fold_builtin_alloca_with_align. * gcc.dg/pr51491.c: New test. Index: gcc/tree-ssa-ccp.c === --- gcc/tree-ssa-ccp.c (revision 182098) +++ gcc/tree-ssa-ccp.c (working copy) @@ -1690,6 +1690,36 @@ evaluate_stmt (gimple stmt) return val; } +/* Try to find a BUILT_IN_STACK_RESTORE after gsi_stmt (I) and insert a clobber + of T before it. */ + +static void +insert_clobber_before_stack_restore (gimple_stmt_iterator i, tree t) +{ + bool found = false; + tree clobber; + + for (; !gsi_end_p (i); gsi_next (&i)) +{ + gimple stmt = gsi_stmt (i); + + if (!gimple_call_builtin_p (stmt, BUILT_IN_STACK_RESTORE)) + continue; + + found = true; + break; +} + + /* We should try harder, but for now, just return. */ + if (!found) +return; + + clobber = build_constructor (TREE_TYPE (t), NULL); + TREE_THIS_VOLATILE (clobber) = 1; + gsi_insert_before (&i, gimple_build_assign (t, clobber), + GSI_SAME_STMT); +} + /* Detects a __builtin_alloca_with_align with constant size argument. Declares fixed-size array and returns the address, if found, otherwise returns NULL_TREE. */ @@ -1824,7 +1854,9 @@ ccp_fold_stmt (gimple_stmt_iterator *gsi if (new_rhs) { bool res = update_call_from_tree (gsi, new_rhs); + tree var = TREE_OPERAND (TREE_OPERAND (new_rhs, 0),0); gcc_assert (res); + insert_clobber_before_stack_restore (*gsi, var); return true; } } Index: gcc/testsuite/gcc.dg/pr51491.c === --- /dev/null (new file) +++ gcc/testsuite/gcc.dg/pr51491.c (revision 0) @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-expand" } */ + + +int g(int*); + +int f(void) +{ + int tt = 0; + int t = 4; + { +int a[t]; +tt = g(a); +tt += a[0]; + } + { +int a[4]; +tt += g(a); +tt += a[0]; + } + return tt; +} + +/* { dg-final { scan-rtl-dump-times "Partition" 1 "expand"} } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */
Re: [patch] Remove occurrences of int64_t (and int32_t)
> Please simply switch it to unconditional use of tree_to_double_int > (DECL_SIZE_UNIT (t)).low and make 'size' a HOST_WIDE_INT (we properly > require 64 bit hwi for targets that have 64bit sizes/pointers). Sure, but we need a 64-bit container for 'size' at least, because 8 bytes are read from it. So I presume HOST_WIDEST_INT should simply be used there too. > +#if HOST_BITS_PER_WIDE_INT >= 64 > +# define host_int64 HOST_WIDE_INT > +#elif HOST_BITS_PER_WIDEST_INT >= 64 > +# define host_int64 HOST_WIDEST_INT > +#else > +# error "host has no 64-bit type" > +#endif > > well, as previous communication has shown we should use > HOST_WIDEST_INT unconditionally for a 64-bit type (allowing > the code to compile when no such type is available during stage1). > If we really need a true 64bit type then we should amend hwint.h > accordingly. OK. -- Eric Botcazou
Re: [PATCH] Call maybe_clean_or_redirect_eh_stmt in gimple_fold_call (PR tree-optimization/51481)
On Tue, Dec 13, 2011 at 1:15 PM, Jakub Jelinek wrote: > On Tue, Dec 13, 2011 at 01:05:30PM +0100, Richard Guenther wrote: >> Yeah, I'm testing a followup patch that fixes the call in replace_uses_by >> (that alone fixes the testcase in the PR). > > That patch looks good, but then forwprop has the same > static void > tidy_after_forward_propagate_addr (gimple stmt) > { > /* We may have turned a trapping insn into a non-trapping insn. */ > if (maybe_clean_or_replace_eh_stmt (stmt, stmt) > && gimple_purge_dead_eh_edges (gimple_bb (stmt))) > cfg_changed = true; > ... > > Other spots are maybe ok, so if you want, the > maybe_cleanup_or_replace_eh_stmt call from gimple_fold_call can be nuked > afterwards. But what about that lookup_*/tree_could_throw_p check? > Should it stay? I don't think so, if we want to prevent fold_stmt from running into such situation we should do it centrally (similar to the in-place variant). But I wouldn't go down this route without further evidence (read: a testcase that we cannot fix otherwise). Richard. > Jakub
Re: [PATCH] Call maybe_clean_or_redirect_eh_stmt in gimple_fold_call (PR tree-optimization/51481)
On Tue, Dec 13, 2011 at 01:05:30PM +0100, Richard Guenther wrote: > Yeah, I'm testing a followup patch that fixes the call in replace_uses_by > (that alone fixes the testcase in the PR). That patch looks good, but then forwprop has the same static void tidy_after_forward_propagate_addr (gimple stmt) { /* We may have turned a trapping insn into a non-trapping insn. */ if (maybe_clean_or_replace_eh_stmt (stmt, stmt) && gimple_purge_dead_eh_edges (gimple_bb (stmt))) cfg_changed = true; ... Other spots are maybe ok, so if you want, the maybe_cleanup_or_replace_eh_stmt call from gimple_fold_call can be nuked afterwards. But what about that lookup_*/tree_could_throw_p check? Should it stay? Jakub
Re: [PATCH] Call maybe_clean_or_redirect_eh_stmt in gimple_fold_call (PR tree-optimization/51481)
On Tue, Dec 13, 2011 at 1:00 PM, Jakub Jelinek wrote: > On Tue, Dec 13, 2011 at 11:52:38AM +0100, Richard Guenther wrote: >> On Mon, Dec 12, 2011 at 6:01 PM, Jakub Jelinek wrote: >> > In gimple_fold_call (called from fold_stmt from replace_uses_by from cfg >> > cleanup) we weren't calling maybe_clean_or_replace_eh_stmt, so when >> > we've replaced a printf call (which can throw) with puts (which can throw >> > too), nothing would update EH stmts. It would be problematic calling >> > gimple_purge_dead_eh_edges, because callers might be surprised by that, >> > especially when this happens during cfg cleanup, so instead I just assert >> > it is not needed and don't try to fold if a throwing stmt would be replaced >> > by non-throwing. FAB pass can handle that instead. No folding >> > has been actually disabled because of that check during bootstrap/regtest, >> > so it is there just in case. > > Note, I've already committed the patch yesterday. Yes, I've seen that. I think it's ugly and inconsistent - we can certaily fold non-call stmts from throwing to non-throwing. Think of -fnon-call-exceptions and x + 1.3 and folding after propagating 1.0 to x. It is not fold_stmts business to care about EH info or edges (it works on statements in isolation). >> I think that all callers of fold_stmt are supposed to handle EH >> updating themselves, similar to how they are supposed to >> call update_stmt. I see that replace_uses_by does call > > Some do something, but others don't. Do you think it is preferable when > the callers do that (all of them)? Even if that is chosen, while some could > purge dead eh edges, other places can't do that IMHO, so either we need to > simply not do transformations that remove EH edges in fold_stmt, or have an > argument that is passed down whether it is ok to do so. I think all callers can (should be able to) do that. In the light of -fnon-call-exceptions we probably miss quite a few - but we should fix them. [Unfortunately some of our propagation helpers do not, or do not have a good way of returning whether EH/CFG cleanup is necessary, notably gsi_remove, which removes the stmt from the EH tables but does not purge edges.] Richard.
Re: [PATCH] Call maybe_clean_or_redirect_eh_stmt in gimple_fold_call (PR tree-optimization/51481)
On Tue, Dec 13, 2011 at 1:00 PM, Jakub Jelinek wrote: > On Tue, Dec 13, 2011 at 11:52:38AM +0100, Richard Guenther wrote: >> On Mon, Dec 12, 2011 at 6:01 PM, Jakub Jelinek wrote: >> > In gimple_fold_call (called from fold_stmt from replace_uses_by from cfg >> > cleanup) we weren't calling maybe_clean_or_replace_eh_stmt, so when >> > we've replaced a printf call (which can throw) with puts (which can throw >> > too), nothing would update EH stmts. It would be problematic calling >> > gimple_purge_dead_eh_edges, because callers might be surprised by that, >> > especially when this happens during cfg cleanup, so instead I just assert >> > it is not needed and don't try to fold if a throwing stmt would be replaced >> > by non-throwing. FAB pass can handle that instead. No folding >> > has been actually disabled because of that check during bootstrap/regtest, >> > so it is there just in case. > > Note, I've already committed the patch yesterday. > >> I think that all callers of fold_stmt are supposed to handle EH >> updating themselves, similar to how they are supposed to >> call update_stmt. I see that replace_uses_by does call > > Some do something, but others don't. Do you think it is preferable when > the callers do that (all of them)? Even if that is chosen, while some could > purge dead eh edges, other places can't do that IMHO, so either we need to > simply not do transformations that remove EH edges in fold_stmt, or have an > argument that is passed down whether it is ok to do so. > >> maybe_clean_or_replace_eh_stmt - are you sure it is this path >> the issue triggers on? > > Yes, I'm sure this was in replace_uses_by. Apparently it calls > maybe_clean_or_replace_eh_stmt, but incorrectly (similarly forwprop in some > cases doesn't call it at all, in other places incorrectly): > > maybe_clean_or_replace_eh_stmt (stmt, stmt); > > This does nothing. It must be called with the old stmt and new stmt it was > replaced with, otherwise, when it is called just with new stmt as in this > place lookup_stmt_eh_lp will just return 0 (unless fold_stmt did call it > already > properly). Yeah, I'm testing a followup patch that fixes the call in replace_uses_by (that alone fixes the testcase in the PR). Richard. > Jakub
Re: [PATCH] _GCC_PICFLAG: use -fPIC for s390x targets
On 12/07/2011 10:33 AM, Rainer Orth wrote: > Mike Frysinger writes: >> s390*-*-*) >> $1=-fpic >> ;; > > Perhaps it's better to remove both s390* cases and use the -fPIC default > everywhere, as does libtool. picflag.m4 is supposed to be usable > everywhere. >From a performance perspective the "fix after breakage" approach still would >make sense since GCC generates slightly faster code with -fpic compared to -fPIC. On the other hand the user visible shared libs like libgomp, libgfortran, and libstdc++ are already built with -fPIC anyway. We recently also enabled -fPIC for libgcc_s.so. And now we are about to do the same for libiberty. So I think there does not remain much anyway - or am I missing something?! So I agree that removing the s390*-*-* case entirely in picflag.m4 will be the better way to fix this. We then also should revert that one: http://gcc.gnu.org/ml/gcc-patches/2011-11/msg00643.html Bye, -Andreas-
Re: [PATCH] Call maybe_clean_or_redirect_eh_stmt in gimple_fold_call (PR tree-optimization/51481)
On Tue, Dec 13, 2011 at 11:52:38AM +0100, Richard Guenther wrote: > On Mon, Dec 12, 2011 at 6:01 PM, Jakub Jelinek wrote: > > In gimple_fold_call (called from fold_stmt from replace_uses_by from cfg > > cleanup) we weren't calling maybe_clean_or_replace_eh_stmt, so when > > we've replaced a printf call (which can throw) with puts (which can throw > > too), nothing would update EH stmts. It would be problematic calling > > gimple_purge_dead_eh_edges, because callers might be surprised by that, > > especially when this happens during cfg cleanup, so instead I just assert > > it is not needed and don't try to fold if a throwing stmt would be replaced > > by non-throwing. FAB pass can handle that instead. No folding > > has been actually disabled because of that check during bootstrap/regtest, > > so it is there just in case. Note, I've already committed the patch yesterday. > I think that all callers of fold_stmt are supposed to handle EH > updating themselves, similar to how they are supposed to > call update_stmt. I see that replace_uses_by does call Some do something, but others don't. Do you think it is preferable when the callers do that (all of them)? Even if that is chosen, while some could purge dead eh edges, other places can't do that IMHO, so either we need to simply not do transformations that remove EH edges in fold_stmt, or have an argument that is passed down whether it is ok to do so. > maybe_clean_or_replace_eh_stmt - are you sure it is this path > the issue triggers on? Yes, I'm sure this was in replace_uses_by. Apparently it calls maybe_clean_or_replace_eh_stmt, but incorrectly (similarly forwprop in some cases doesn't call it at all, in other places incorrectly): maybe_clean_or_replace_eh_stmt (stmt, stmt); This does nothing. It must be called with the old stmt and new stmt it was replaced with, otherwise, when it is called just with new stmt as in this place lookup_stmt_eh_lp will just return 0 (unless fold_stmt did call it already properly). Jakub
[Ada] Add more vectorization (sub-)tests
Tested on i586-suse-linux, applied on the mainline. 2011-12-13 Eric Botcazou * gnat.dg/vect1.ad[sb]: Add more tests. * gnat.dg/vect2.ad[sb]: Likewise. * gnat.dg/vect3.ad[sb]: Likewise. * gnat.dg/vect4.ad[sb]: Likewise. * gnat.dg/vect5.ad[sb]: Likewise. * gnat.dg/vect6.ad[sb]: Likewise. -- Eric Botcazou Index: gnat.dg/vect4.adb === --- gnat.dg/vect4.adb (revision 182203) +++ gnat.dg/vect4.adb (working copy) @@ -12,6 +12,13 @@ package body Vect4 is return R; end; + procedure Add (X : Varray; Y : Long_Float; R : out Varray) is + begin + for I in X'Range loop + R(I) := X(I) + Y; + end loop; + end; + procedure Add (X : not null access Varray; Y : Long_Float; R : not null access Varray) is begin for I in X'Range loop @@ -29,6 +36,13 @@ package body Vect4 is return R; end; + procedure Add (X : Sarray; Y : Long_Float; R : out Sarray) is + begin + for I in Sarray'Range loop + R(I) := X(I) + Y; + end loop; + end; + procedure Add (X : not null access Sarray; Y : Long_Float; R : not null access Sarray) is begin for I in Sarray'Range loop @@ -46,6 +60,13 @@ package body Vect4 is return R; end; + procedure Add (X : Darray1; Y : Long_Float; R : out Darray1) is + begin + for I in Darray1'Range loop + R(I) := X(I) + Y; + end loop; + end; + procedure Add (X : not null access Darray1; Y : Long_Float; R : not null access Darray1) is begin for I in Darray1'Range loop @@ -63,6 +84,13 @@ package body Vect4 is return R; end; + procedure Add (X : Darray2; Y : Long_Float; R : out Darray2) is + begin + for I in Darray2'Range loop + R(I) := X(I) + Y; + end loop; + end; + procedure Add (X : not null access Darray2; Y : Long_Float; R : not null access Darray2) is begin for I in Darray2'Range loop @@ -80,6 +108,13 @@ package body Vect4 is return R; end; + procedure Add (X : Darray3; Y : Long_Float; R : out Darray3) is + begin + for I in Darray3'Range loop + R(I) := X(I) + Y; + end loop; + end; + procedure Add (X : not null access Darray3; Y : Long_Float; R : not null access Darray3) is begin for I in Darray3'Range loop @@ -89,5 +124,5 @@ package body Vect4 is end Vect4; --- { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } +-- { dg-final { scan-tree-dump-times "vectorized 1 loops" 15 "vect" } } -- { dg-final { cleanup-tree-dump "vect" } } Index: gnat.dg/vect4.ads === --- gnat.dg/vect4.ads (revision 182203) +++ gnat.dg/vect4.ads (working copy) @@ -8,6 +8,7 @@ package Vect4 is for Varray'Alignment use 16; function "+" (X : Varray; Y : Long_Float) return Varray; + procedure Add (X : Varray; Y : Long_Float; R : out Varray); procedure Add (X : not null access Varray; Y : Long_Float; R : not null access Varray); @@ -16,6 +17,7 @@ package Vect4 is for Sarray'Alignment use 16; function "+" (X : Sarray; Y : Long_Float) return Sarray; + procedure Add (X : Sarray; Y : Long_Float; R : out Sarray); procedure Add (X : not null access Sarray; Y : Long_Float; R : not null access Sarray); @@ -23,6 +25,7 @@ package Vect4 is for Darray1'Alignment use 16; function "+" (X : Darray1; Y : Long_Float) return Darray1; + procedure Add (X : Darray1; Y : Long_Float; R : out Darray1); procedure Add (X : not null access Darray1; Y : Long_Float; R : not null access Darray1); @@ -30,6 +33,7 @@ package Vect4 is for Darray2'Alignment use 16; function "+" (X : Darray2; Y : Long_Float) return Darray2; + procedure Add (X : Darray2; Y : Long_Float; R : out Darray2); procedure Add (X : not null access Darray2; Y : Long_Float; R : not null access Darray2); @@ -37,6 +41,7 @@ package Vect4 is for Darray3'Alignment use 16; function "+" (X : Darray3; Y : Long_Float) return Darray3; + procedure Add (X : Darray3; Y : Long_Float; R : out Darray3); procedure Add (X : not null access Darray3; Y : Long_Float; R : not null access Darray3); end Vect4; Index: gnat.dg/vect5.adb === --- gnat.dg/vect5.adb (revision 182203) +++ gnat.dg/vect5.adb (working copy) @@ -12,6 +12,13 @@ package body Vect5 is return R; end; + procedure Add (X : Varray; Y : Long_Float; R : out Varray) is + begin + for I in X'Range loop + R(I) := X(I) + Y; + end loop; + end; + procedure Add (X : not null access Varray; Y : Long_Float; R : not null access Varray) is begin for I in X'Range loop @@ -29,6 +36,13 @@ package body Vect5 is return R; end; + procedure Add (X : Sarray; Y : Long_Float; R : out Sarray) is + begin + for