Re: [google] dump inline decisions to stderr under -fopt-info

2011-12-13 Thread Xinliang David Li
There are a couple of problems with the patch (the patch is only
compatible with 4_6 branch)

1) the dump_inline_decision should be called inside
cgraph_mark_inline_edge when the edge is finally picked (and when the
callee node is cloned)
2) The source location is not printed which makes the dumping less useful
3) the information line should clearly print out the final caller
where the inlining happens, and the intermediate inline instance
information should be clearly printed as additional information
showing the inline context. For instance,

  foo1->foo2->foo3->foo4

If all callsites are inlined in top down order, the messages I expect
to see are:

a.c:5: note: 'foo2' is inlined into 'foo1' with call count 
a.c:10: note: 'foo3' is inlined into 'foo1' with call count  (via
inline instance 'foo2')
a.c:20: note: 'foo4' is inlined into foo1' with call count  (via
inline instances 'foo3', 'foo2')


If the decision is bottom up, the messages should look like:

a.c:20: note: 'foo4' is inlined into 'foo3' with call count 
a.c:10: note: 'foo3' is inlined into 'foo2' with call count 
a.c:5: note: 'foo2' is inlined into 'foo1' with call count 

Ideally, the caller and callee should also be marked with 'entry count
and max bb count' information -- but that probably should be
controlled by opt-info level.

4) Also notice that for inline node clones, the right way to walk up
the call context chain is via 'edge->caller->callers'.
5) there should be a test case.

Thanks,

David

On Tue, Dec 13, 2011 at 5:13 PM, Dehao Chen  wrote:
> I've updated the patch to fix a bug in dump_inline_decision.
>
> Thanks,
> Dehao
>
> On Thu, Dec 1, 2011 at 9:59 AM, Dehao Chen  wrote:
>>
>> This patch is for google-{main|gcc_4.6} only.
>>
>> Tested with bootstrap and regression tests.
>>
>> Dump inline decisions, also output the inline chain.
>>
>> Dehao
>>
>> 2011-12-01  Dehao Chen  
>>
>>        * ipa-inline.c (dump_inline_decision): New function.
>>        (inline_small_functions): Use it to dump the inline decisions to 
>> stderr.
>>
>> Index: gcc/ipa-inline.c
>> ===
>> --- gcc/ipa-inline.c    (revision 181835)
>> +++ gcc/ipa-inline.c    (working copy)
>> @@ -1377,6 +1377,45 @@
>>  }
>>
>>
>> +/* Dump the inline decision of EDGE to stderr.  */
>> +
>> +static void
>> +dump_inline_decision (struct cgraph_edge *edge)
>> +{
>> +  location_t locus;
>> +  size_t buf_size = 4096;
>> +  size_t current_string_len = 0;
>> +  char *buf = (char *) xmalloc (buf_size);
>> +  struct cgraph_node *inlined_to;
>> +  gcov_type callee_count = edge->callee->count;
>> +  buf[0] = 0;
>> +  if (edge->inline_failed == CIF_OK && edge->callee->clone_of)
>> +    callee_count += edge->callee->clone_of->count;
>> +  for (inlined_to = edge->caller->global.inlined_to;
>> +       inlined_to; inlined_to = inlined_to->global.inlined_to)
>> +    {
>> +      const char *name = cgraph_node_name (inlined_to);
>> +      if (!name)
>> +       name = "unknown";
>> +      current_string_len += (strlen (name) + 4);
>> +      while (current_string_len >= buf_size)
>> +       {
>> +         buf_size *= 2;
>> +         buf = (char *) xrealloc (buf, buf_size);
>> +       }
>> +      strcat (buf, "-->");
>> +      strcat (buf, name);
>> +    }
>> +  locus = gimple_location (edge->call_stmt);
>> +  inform (locus, "%s ("HOST_WIDEST_INT_PRINT_DEC") --"
>> +         HOST_WIDEST_INT_PRINT_DEC"--> %s ("
>> +         HOST_WIDEST_INT_PRINT_DEC") %s : %s",
>> +         cgraph_node_name (edge->callee), callee_count, edge->count,
>> +         cgraph_node_name (edge->caller), edge->caller->count, buf,
>> +         edge->inline_failed == CIF_OK ? "INLINED": "IGNORED");
>> +}
>> +
>> +
>>  /* We use greedy algorithm for inlining of small functions:
>>    All inline candidates are put into prioritized heap ordered in
>>    increasing badness.
>> @@ -1428,6 +1467,7 @@
>>   overall_size = initial_size;
>>   max_size = compute_max_insns (overall_size);
>>   min_size = overall_size;
>> +  edge = NULL;
>>
>>   /* Populate the heeap with all edges we might inline.  */
>>
>> @@ -1462,6 +1502,9 @@
>>       int current_badness;
>>       int growth;
>>
>> +      if (edge && flag_opt_info >= OPT_INFO_MIN)
>> +       dump_inline_decision (edge);
>> +
>>       edge = (struct cgraph_edge *) fibheap_extract_min (heap);
>>       gcc_assert (edge->aux);
>>       edge->aux = NULL;
>> @@ -1482,6 +1525,7 @@
>>       if (current_badness != badness)
>>        {
>>          edge->aux = fibheap_insert (heap, current_badness, edge);
>> +         edge = NULL;
>>          continue;
>>        }
>>
>> @@ -1636,6 +1680,8 @@
>>            fprintf (dump_file, "New minimal size reached: %i\n", min_size);
>>        }
>>     }
>> +  if (edge && flag_opt_info >= OPT_INFO_MIN)
>> +    dump_inline_decision (edge);
>>
>>   free_growth_caches ();
>>   if (new_indirect_edges)


Re: [google] dump inline decisions to stderr under -fopt-info

2011-12-13 Thread Dehao Chen
Sorry, forgot to attach the patch...

Dehao

On Wed, Dec 14, 2011 at 9:13 AM, Dehao Chen  wrote:
> I've updated the patch to fix a bug in dump_inline_decision.
>
> Thanks,
> Dehao
>
> On Thu, Dec 1, 2011 at 9:59 AM, Dehao Chen  wrote:
>>
>> This patch is for google-{main|gcc_4.6} only.
>>
>> Tested with bootstrap and regression tests.
>>
>> Dump inline decisions, also output the inline chain.
>>
>> Dehao
>>
>> 2011-12-01  Dehao Chen  
>>
>>        * ipa-inline.c (dump_inline_decision): New function.
>>        (inline_small_functions): Use it to dump the inline decisions to 
>> stderr.
>>
>> Index: gcc/ipa-inline.c
>> ===
>> --- gcc/ipa-inline.c    (revision 181835)
>> +++ gcc/ipa-inline.c    (working copy)
>> @@ -1377,6 +1377,45 @@
>>  }
>>
>>
>> +/* Dump the inline decision of EDGE to stderr.  */
>> +
>> +static void
>> +dump_inline_decision (struct cgraph_edge *edge)
>> +{
>> +  location_t locus;
>> +  size_t buf_size = 4096;
>> +  size_t current_string_len = 0;
>> +  char *buf = (char *) xmalloc (buf_size);
>> +  struct cgraph_node *inlined_to;
>> +  gcov_type callee_count = edge->callee->count;
>> +  buf[0] = 0;
>> +  if (edge->inline_failed == CIF_OK && edge->callee->clone_of)
>> +    callee_count += edge->callee->clone_of->count;
>> +  for (inlined_to = edge->caller->global.inlined_to;
>> +       inlined_to; inlined_to = inlined_to->global.inlined_to)
>> +    {
>> +      const char *name = cgraph_node_name (inlined_to);
>> +      if (!name)
>> +       name = "unknown";
>> +      current_string_len += (strlen (name) + 4);
>> +      while (current_string_len >= buf_size)
>> +       {
>> +         buf_size *= 2;
>> +         buf = (char *) xrealloc (buf, buf_size);
>> +       }
>> +      strcat (buf, "-->");
>> +      strcat (buf, name);
>> +    }
>> +  locus = gimple_location (edge->call_stmt);
>> +  inform (locus, "%s ("HOST_WIDEST_INT_PRINT_DEC") --"
>> +         HOST_WIDEST_INT_PRINT_DEC"--> %s ("
>> +         HOST_WIDEST_INT_PRINT_DEC") %s : %s",
>> +         cgraph_node_name (edge->callee), callee_count, edge->count,
>> +         cgraph_node_name (edge->caller), edge->caller->count, buf,
>> +         edge->inline_failed == CIF_OK ? "INLINED": "IGNORED");
>> +}
>> +
>> +
>>  /* We use greedy algorithm for inlining of small functions:
>>    All inline candidates are put into prioritized heap ordered in
>>    increasing badness.
>> @@ -1428,6 +1467,7 @@
>>   overall_size = initial_size;
>>   max_size = compute_max_insns (overall_size);
>>   min_size = overall_size;
>> +  edge = NULL;
>>
>>   /* Populate the heeap with all edges we might inline.  */
>>
>> @@ -1462,6 +1502,9 @@
>>       int current_badness;
>>       int growth;
>>
>> +      if (edge && flag_opt_info >= OPT_INFO_MIN)
>> +       dump_inline_decision (edge);
>> +
>>       edge = (struct cgraph_edge *) fibheap_extract_min (heap);
>>       gcc_assert (edge->aux);
>>       edge->aux = NULL;
>> @@ -1482,6 +1525,7 @@
>>       if (current_badness != badness)
>>        {
>>          edge->aux = fibheap_insert (heap, current_badness, edge);
>> +         edge = NULL;
>>          continue;
>>        }
>>
>> @@ -1636,6 +1680,8 @@
>>            fprintf (dump_file, "New minimal size reached: %i\n", min_size);
>>        }
>>     }
>> +  if (edge && flag_opt_info >= OPT_INFO_MIN)
>> +    dump_inline_decision (edge);
>>
>>   free_growth_caches ();
>>   if (new_indirect_edges)
Index: ipa-inline.c
===
--- ipa-inline.c(revision 181835)
+++ ipa-inline.c(working copy)
@@ -1073,6 +1073,44 @@
   return false;
 }
 
+/* Dump the inline decision of EDGE to stderr.  */
+
+static void
+dump_inline_decision (struct cgraph_edge *edge)
+{
+  location_t locus;
+  size_t buf_size = 4096;
+  size_t current_string_len = 0;
+  char *buf = (char *) xmalloc (buf_size);
+  struct cgraph_node *inlined_to;
+  gcov_type callee_count = edge->callee->count;
+  buf[0] = 0;
+  if (edge->inline_failed == CIF_OK && edge->callee->clone_of)
+callee_count += edge->callee->clone_of->count;
+  for (inlined_to = edge->caller->global.inlined_to;
+   inlined_to; inlined_to = inlined_to->global.inlined_to)
+{
+  const char *name = cgraph_node_name (inlined_to);
+  if (!name)
+   name = "unknown";
+  current_string_len += (strlen (name) + 4);
+  while (current_string_len >= buf_size)
+   {
+ buf_size *= 2;
+ buf = (char *) xrealloc (buf, buf_size);
+   }
+  strcat (buf, "-->");
+  strcat (buf, name);
+}
+  locus = gimple_location (edge->call_stmt);
+  inform (locus, "%s ("HOST_WIDEST_INT_PRINT_DEC") --"
+ HOST_WIDEST_INT_PRINT_DEC"--> %s ("
+ HOST_WIDEST_INT_PRINT_DEC") %s : %s",
+ xstrdup (cgraph_node_name (edge->callee)), callee_count, edge->count,
+ xstrdup (cgraph_node_name (edge->caller)), edge->caller->count, buf,
+ edge->inline_failed == CIF

Re: [Patch,AVR] ad PR50931: Implement mulpsi3

2011-12-13 Thread Denis Chertykov
2011/12/13 Georg-Johann Lay :
> This adds missing 24-Bit multiplication.
>
> Besides implementing __mulpsi3 in libgcc the patch adds some minor tweaks to
> multiply with small numbers and to represent the reduced register footprint of
> asm implementations.
>
> With this patch PR50931 is complete from the target side.
> (But there is still a segfault in the frontends, see PR51527)
>
> Ok for trunk?
>
> Johann
>
> libgcc/
>        PR target/50931
>        * config/avr/t-avr (LIB1ASMSRC): Add _mulpsi3, _mulsqipsi3.
>        * config/avr/lib1funcs.S (__mulpsi3, __mulsqipsi3): New functions.
> gcc/
>        PR target/50931
>        * config/avr/avr.md (mulpsi3): New expander.
>        (*umulqihipsi3, *umulhiqipsi3): New insns.
>        (*mulsqipsi3.libgcc, *mulpsi3.libgcc): New insns.
>        (mulsqipsi3, *mulpsi3): New insn-and-splits.
>        (ashlpsi3): Turn to expander.  Move insn code to...
>        (*ashlpsi3): ...this new insn.
> testsuite/
>        PR target/50931
>        * gcc.target/avr/torture/int24-mul.c: New testcase.
>

Approved.

Denis.


Re: Fix flags for edges from/to entry/exit basic blocks (issue5486043)

2011-12-13 Thread Hans-Peter Nilsson
> From: Dmitry Vyukov 
> Date: Tue, 13 Dec 2011 11:43:18 +0100

> On Tue, Dec 13, 2011 at 3:04 AM, Hans-Peter Nilsson
>  wrote:
> > Details in PR51521.
> Sorry for the trouble. What should I do now?

Do what Diego and others asked, roll back your patch
immediately; I see it hasn't happened yet.  Normally, there'd be
a 48h grace period, but since you didn't follow protocol
regarding testing, it should be reverted immediately.

> Just in case I've prepared a patch that rolls it back:
> http://codereview.appspot.com/5485054

I'm not going there, patches for trunk must be sent to
gcc-patches@.  But, rolling back your patch means only one thing
wrt. the contents, no need to post it.  You don't even have to
ask for permission, see the docs: left as an exercise to find
the exact sentence.

brgds, H-P


C++ PATCH for c++/51406, 51161 (wrong-code with static cast to rvalue ref)

2011-12-13 Thread Jason Merrill
The code for casting to rvalue ref was assuming that no base adjustment 
would be necessary.  This patch delegates to the normal lvalue binding 
code, and then changes the result to be an rvalue reference.


Tested x86_64-pc-linux-gnu, applying to trunk.  Will apply to 4.5/4.6 as 
well after testing.
commit ed6825825765c8c02a53dc011e252ddfacc9f5e5
Author: Jason Merrill 
Date:   Tue Dec 13 23:02:02 2011 -0500

	PR c++/51406
	PR c++/51161
	* typeck.c (build_static_cast_1): Fix cast of lvalue to
	base rvalue reference.

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 4973d7d..b168963 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -5856,12 +5856,22 @@ build_static_cast_1 (tree type, tree expr, bool c_cast_p,
  cv2 T2 if cv2 T2 is reference-compatible with cv1 T1 (8.5.3)."  */
   if (TREE_CODE (type) == REFERENCE_TYPE
   && TYPE_REF_IS_RVALUE (type)
-  && lvalue_or_rvalue_with_address_p (expr)
+  && real_lvalue_p (expr)
   && reference_related_p (TREE_TYPE (type), intype)
   && (c_cast_p || at_least_as_qualified_p (TREE_TYPE (type), intype)))
 {
-  expr = build_typed_address (expr, type);
-  return convert_from_reference (expr);
+  /* Handle the lvalue case here by casting to lvalue reference and
+	 then changing it to an rvalue reference.  Casting an xvalue to
+	 rvalue reference will be handled by the main code path.  */
+  tree lref = cp_build_reference_type (TREE_TYPE (type), false);
+  result = (perform_direct_initialization_if_possible
+		(lref, expr, c_cast_p, complain));
+  result = cp_fold_convert (type, result);
+  /* Make sure we don't fold back down to a named rvalue reference,
+	 because that would be an lvalue.  */
+  if (DECL_P (result))
+	result = build1 (NON_LVALUE_EXPR, type, result);
+  return convert_from_reference (result);
 }
 
   /* Resolve overloaded address here rather than once in
diff --git a/gcc/testsuite/g++.dg/cpp0x/rv-cast3.C b/gcc/testsuite/g++.dg/cpp0x/rv-cast3.C
new file mode 100644
index 000..6c70324
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/rv-cast3.C
@@ -0,0 +1,17 @@
+// PR c++/51406
+// { dg-do run { target c++11 } }
+
+extern "C" int printf(const char *,...);
+extern "C" void abort();
+
+struct A { int a; A() : a(1) {} };
+struct B { int b; B() : b(2) {} };
+struct X : A, B {};
+
+int main() {
+X x;
+int a=static_cast(x).a;
+int b=static_cast(x).b;
+// printf ("%d %d\n", a, b);
+if (a!=1 || b!=2) abort();
+}
diff --git a/gcc/testsuite/g++.dg/cpp0x/rv-cast4.C b/gcc/testsuite/g++.dg/cpp0x/rv-cast4.C
new file mode 100644
index 000..13f369d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/rv-cast4.C
@@ -0,0 +1,13 @@
+// PR c++/51161
+// { dg-do compile { target c++11 } }
+
+struct A{};
+struct B : A{};
+struct C : A{};
+struct D : B, C{};
+
+int main()
+{
+  D d;
+  static_cast(d);		// { dg-error "ambiguous" }
+}


[PATCH] MIPS16: Fix truncated DWARF-2 line information

2011-12-13 Thread Maciej W. Rozycki
Hi,

 I've noticed in the presence of a specific MIPS16 function thunk, GCC 
fails to emit suitable DWARF-2 location directives, which in turn causes 
DWARF-2 line records to provide truncated information.  This is probably 
best illustrated with an example.

 Given the following source code:

$ cat sinfrob16.c
int i;

double sinfrob16(double d)
{
i++;
return d;
}

we get this:

$ mips-linux-gnu-gcc -mips16 -Wa,-call_nonpic -fPIC -G0 -g -S sinfrob16.c
$ cat sinfrob16.s
.section .mdebug.abi32
.previous
.gnu_attribute 4, 1
.abicalls
.text
$Ltext0:
.cfi_sections   .debug_frame

.comm   i,4,4
.align  2
.globl  sinfrob16
$LFB0 = .
.file 1 "sinfrob16.c"
.loc 1 4 0
.cfi_startproc
# Stub function for sinfrob16 (double)
.section.mips16.fn.sinfrob16,"ax",@progbits
.align  2
.setnomips16
.ent__fn_stub_sinfrob16
.type   __fn_stub_sinfrob16, @function
__fn_stub_sinfrob16:
.setnoreorder
.cpload $25
.setreorder
.reloc  0,R_MIPS_NONE,sinfrob16
la  $25,__fn_local_sinfrob16
mfc1$5,$f12
mfc1$4,$f13
jr  $25
.end__fn_stub_sinfrob16
__fn_local_sinfrob16 = sinfrob16
.text
.setmips16
.entsinfrob16
.type   sinfrob16, @function
sinfrob16:
.frame  $17,8,$31   # vars= 0, regs= 2/0, args= 0, gp= 0
.mask   0x8002,-4
.fmask  0x,0
li  $2,%hi(_gp_disp)
addiu   $3,$pc,%lo(_gp_disp)
sll $2,16
addu$2,$3
save8,$17,$31
$LCFI0 = .
.cfi_def_cfa_offset 8
.cfi_offset 31, -4
.cfi_offset 17, -8
move$17,$sp
$LCFI1 = .
.cfi_def_cfa_register 17
move$28,$2
.loc 1 5 0
move$2,$28
move$24,$2
.loc 1 4 0
sw  $5,12($17)
sw  $4,8($17)
.loc 1 5 0
move$3,$24
lw  $2,%got(i)($3)
lw  $2,0($2)
addiu   $2,1
move$3,$24
lw  $3,%got(i)($3)
move$24,$3
move$3,$24
sw  $2,0($3)
.loc 1 6 0
lw  $3,12($17)
lw  $2,8($17)
move$25,$3
move$24,$2
move$3,$25
move$2,$24
move$25,$3
move$24,$2
.loc 1 7 0
move$3,$25
move$2,$24
move$6,$28
lw  $6,%got(__mips16_ret_df)($6)
jalr$6
lw  $6,0($17)
move$28,$6
move$sp,$17
$LCFI2 = .
.cfi_def_cfa_register 29
restore 8,$17,$31
$LCFI3 = .
.cfi_restore 17
.cfi_restore 31
.cfi_def_cfa_offset 0
j   $31
.endsinfrob16
.cfi_endproc
$LFE0:
.size   sinfrob16, .-sinfrob16
$Letext0:
.section.debug_info,"",@progbits
$Ldebug_info0:
.4byte  0x6f
.2byte  0x2
.4byte  $Ldebug_abbrev0
.byte   0x4
.uleb128 0x1
.4byte  $LASF1
.byte   0x1
.4byte  $LASF2
.4byte  $LASF3
.4byte  $Ldebug_ranges0+0
.4byte  0
.4byte  0
.4byte  $Ldebug_line0
.uleb128 0x2
.byte   0x1
.4byte  $LASF4
.byte   0x1
.byte   0x3
.byte   0x1
.4byte  0x54
.4byte  $LFB0
.4byte  $LFE0
.4byte  $LLST0
.byte   0x1
.4byte  0x54
.uleb128 0x3
.ascii  "d\000"
.byte   0x1
.byte   0x3
.4byte  0x54
.byte   0x2
.byte   0x91
.sleb128 0
.byte   0
.uleb128 0x4
.byte   0x8
.byte   0x4
.4byte  $LASF0
.uleb128 0x5
.ascii  "i\000"
.byte   0x1
.byte   0x1
.4byte  0x6b
.byte   0x1
.byte   0x5
.byte   0x3
.4byte  i
.uleb128 0x6
.byte   0x4
.byte   0x5
.ascii  "int\000"
.byte   0
.section.debug_abbrev,"",@progbits
$Ldebug_abbrev0:
.uleb128 0x1
.uleb128 0x11
.byte   0x1
.uleb128 0x25
.uleb128 0xe
.uleb128 0x13
.uleb128 0xb
.uleb128 0x3
.uleb128 0xe
.uleb128 0x1b
.uleb128 0xe
.uleb128 0x55
.uleb128 0x6
.uleb128 0x11
.uleb128 0x1
.uleb128 0x52
.uleb128 0x1
.uleb128 0x10
.uleb128 0x6
.byte   0
.byte   0
.uleb128 0x2
.uleb128 0x2e
.byte   0x1
.uleb128 0x3f
.uleb128 0xc
.uleb128 0x3
.uleb128 0xe
.uleb128 0x3a
.uleb128 0xb
.uleb128 0x3b
.uleb128 0xb
.uleb128 0x27
.uleb128 0xc
.uleb128 0x4

Go patch committed: Move reading export data to gcc interface

2011-12-13 Thread Ian Lance Taylor
This patch moves reading export data from an object file out of the Go
frontend proper into the gcc side of the interface.  The purpose of this
move is to let the gcc side entirely control the name of the Mach-O
segment and section where the export data is stored.  Bootstrapped and
ran Go testsuite on x86_64-unknown-linux-gnu.  Committed to mainline.

Ian


2011-12-13  Ian Lance Taylor  

* go-backend.c: #include "simple-object.h" and "intl.h".
(GO_EXPORT_SEGMENT_NAME): Define if not defined.
(GO_EXPORT_SECTION_NAME): Likewise.
(go_write_export_data): Use GO_EXPORT_SECTION_NAME.
(go_read_export_data): New function.
* go-c.h (go_read_export_data): Declare.


Index: gcc/go/gofrontend/import.h
===
--- gcc/go/gofrontend/import.h	(revision 181813)
+++ gcc/go/gofrontend/import.h	(working copy)
@@ -287,7 +287,7 @@ class Stream_from_string : public Import
   size_t pos_;
 };
 
-// Read import data from an allocated buffer.
+// Read import data from a buffer allocated using malloc.
 
 class Stream_from_buffer : public Import::Stream
 {
@@ -297,7 +297,7 @@ class Stream_from_buffer : public Import
   { }
 
   ~Stream_from_buffer()
-  { delete[] this->buf_; }
+  { free(this->buf_); }
 
  protected:
   bool
Index: gcc/go/gofrontend/import.cc
===
--- gcc/go/gofrontend/import.cc	(revision 181889)
+++ gcc/go/gofrontend/import.cc	(working copy)
@@ -215,46 +215,24 @@ Import::find_object_export_data(const st
 off_t offset,
 Location location)
 {
-  const char* errmsg;
+  char *buf;
+  size_t len;
   int err;
-  simple_object_read* sobj = simple_object_start_read(fd, offset,
-		  "__GNU_GO",
-		  &errmsg, &err);
-  if (sobj == NULL)
-return NULL;
-
-  off_t sec_offset;
-  off_t sec_length;
-  int found = simple_object_find_section(sobj, ".go_export", &sec_offset,
-	 &sec_length, &errmsg, &err);
-
-  simple_object_release_read(sobj);
-
-  if (!found)
-return NULL;
-
-  if (lseek(fd, offset + sec_offset, SEEK_SET) < 0)
+  const char *errmsg = go_read_export_data(fd, offset, &buf, &len, &err);
+  if (errmsg != NULL)
 {
-  error_at(location, "lseek %s failed: %m", filename.c_str());
+  if (err == 0)
+	error_at(location, "%s: %s", filename.c_str(), errmsg);
+  else
+	error_at(location, "%s: %s: %s", filename.c_str(), errmsg,
+		 xstrerror(err));
   return NULL;
 }
 
-  char* buf = new char[sec_length];
-  ssize_t c = read(fd, buf, sec_length);
-  if (c < 0)
-{
-  error_at(location, "read %s failed: %m", filename.c_str());
-  delete[] buf;
-  return NULL;
-}
-  if (c < sec_length)
-{
-  error_at(location, "%s: short read", filename.c_str());
-  delete[] buf;
-  return NULL;
-}
+  if (buf == NULL)
+return NULL;
 
-  return new Stream_from_buffer(buf, sec_length);
+  return new Stream_from_buffer(buf, len);
 }
 
 // Class Import.
Index: gcc/go/go-c.h
===
--- gcc/go/go-c.h	(revision 181628)
+++ gcc/go/go-c.h	(working copy)
@@ -69,6 +69,8 @@ extern void go_imported_unsafe (void);
 
 extern void go_write_export_data (const char *, unsigned int);
 
+extern const char *go_read_export_data (int, off_t, char **, size_t *, int *);
+
 #if defined(__cplusplus) && !defined(ENABLE_BUILD_WITH_CXX)
 } /* End extern "C".  */
 #endif
Index: gcc/go/go-backend.c
===
--- gcc/go/go-backend.c	(revision 181628)
+++ gcc/go/go-backend.c	(working copy)
@@ -20,16 +20,31 @@ along with GCC; see the file COPYING3.  
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "simple-object.h"
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
 #include "tm_p.h"
+#include "intl.h"
 #include "output.h"
 #include "target.h"
 #include "common/common-target.h"
 
 #include "go-c.h"
 
+/* The segment name we pass to simple_object_start_read to find Go
+   export data.  */
+
+#ifndef GO_EXPORT_SEGMENT_NAME
+#define GO_EXPORT_SEGMENT_NAME "__GNU_GO"
+#endif
+
+/* The section name we use when reading and writing export data.  */
+
+#ifndef GO_EXPORT_SECTION_NAME
+#define GO_EXPORT_SECTION_NAME ".go_export"
+#endif
+
 /* This file holds all the cases where the Go frontend needs
information from gcc's backend.  */
 
@@ -95,7 +110,7 @@ go_imported_unsafe (void)
 }
 
 /* This is called by the Go frontend proper to add data to the
-   .go_export section.  */
+   section containing Go export data.  */
 
 void
 go_write_export_data (const char *bytes, unsigned int size)
@@ -105,9 +120,87 @@ go_write_export_data (const char *bytes,
   if (sec == NULL)
 {
   gcc_assert (targetm_common.have_named_sections);
-  sec = get_section (".go_export", SECTION_DEBUG, NULL);
+  sec = get_section (GO_EXPORT_SECTION_NAME, SECTION_DEBUG, NULL);
 }
 
   

Re: [google] dump inline decisions to stderr under -fopt-info

2011-12-13 Thread Dehao Chen
I've updated the patch to fix a bug in dump_inline_decision.

Thanks,
Dehao

On Thu, Dec 1, 2011 at 9:59 AM, Dehao Chen  wrote:
>
> This patch is for google-{main|gcc_4.6} only.
>
> Tested with bootstrap and regression tests.
>
> Dump inline decisions, also output the inline chain.
>
> Dehao
>
> 2011-12-01  Dehao Chen  
>
>        * ipa-inline.c (dump_inline_decision): New function.
>        (inline_small_functions): Use it to dump the inline decisions to 
> stderr.
>
> Index: gcc/ipa-inline.c
> ===
> --- gcc/ipa-inline.c    (revision 181835)
> +++ gcc/ipa-inline.c    (working copy)
> @@ -1377,6 +1377,45 @@
>  }
>
>
> +/* Dump the inline decision of EDGE to stderr.  */
> +
> +static void
> +dump_inline_decision (struct cgraph_edge *edge)
> +{
> +  location_t locus;
> +  size_t buf_size = 4096;
> +  size_t current_string_len = 0;
> +  char *buf = (char *) xmalloc (buf_size);
> +  struct cgraph_node *inlined_to;
> +  gcov_type callee_count = edge->callee->count;
> +  buf[0] = 0;
> +  if (edge->inline_failed == CIF_OK && edge->callee->clone_of)
> +    callee_count += edge->callee->clone_of->count;
> +  for (inlined_to = edge->caller->global.inlined_to;
> +       inlined_to; inlined_to = inlined_to->global.inlined_to)
> +    {
> +      const char *name = cgraph_node_name (inlined_to);
> +      if (!name)
> +       name = "unknown";
> +      current_string_len += (strlen (name) + 4);
> +      while (current_string_len >= buf_size)
> +       {
> +         buf_size *= 2;
> +         buf = (char *) xrealloc (buf, buf_size);
> +       }
> +      strcat (buf, "-->");
> +      strcat (buf, name);
> +    }
> +  locus = gimple_location (edge->call_stmt);
> +  inform (locus, "%s ("HOST_WIDEST_INT_PRINT_DEC") --"
> +         HOST_WIDEST_INT_PRINT_DEC"--> %s ("
> +         HOST_WIDEST_INT_PRINT_DEC") %s : %s",
> +         cgraph_node_name (edge->callee), callee_count, edge->count,
> +         cgraph_node_name (edge->caller), edge->caller->count, buf,
> +         edge->inline_failed == CIF_OK ? "INLINED": "IGNORED");
> +}
> +
> +
>  /* We use greedy algorithm for inlining of small functions:
>    All inline candidates are put into prioritized heap ordered in
>    increasing badness.
> @@ -1428,6 +1467,7 @@
>   overall_size = initial_size;
>   max_size = compute_max_insns (overall_size);
>   min_size = overall_size;
> +  edge = NULL;
>
>   /* Populate the heeap with all edges we might inline.  */
>
> @@ -1462,6 +1502,9 @@
>       int current_badness;
>       int growth;
>
> +      if (edge && flag_opt_info >= OPT_INFO_MIN)
> +       dump_inline_decision (edge);
> +
>       edge = (struct cgraph_edge *) fibheap_extract_min (heap);
>       gcc_assert (edge->aux);
>       edge->aux = NULL;
> @@ -1482,6 +1525,7 @@
>       if (current_badness != badness)
>        {
>          edge->aux = fibheap_insert (heap, current_badness, edge);
> +         edge = NULL;
>          continue;
>        }
>
> @@ -1636,6 +1680,8 @@
>            fprintf (dump_file, "New minimal size reached: %i\n", min_size);
>        }
>     }
> +  if (edge && flag_opt_info >= OPT_INFO_MIN)
> +    dump_inline_decision (edge);
>
>   free_growth_caches ();
>   if (new_indirect_edges)


Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue 5488054)

2011-12-13 Thread tmsriram

On 2011/12/14 00:04:10, davidxl wrote:

http://codereview.appspot.com/5488054/diff/4002/tree-vect-stmts.c
File tree-vect-stmts.c (right):



http://codereview.appspot.com/5488054/diff/4002/tree-vect-stmts.c#newcode3712

tree-vect-stmts.c:3712: }
The check can be put into a helper function.


Fixed.

http://codereview.appspot.com/5488054/


Re: PR middle-end/51411: handle transaction_safe virtual inlined methods

2011-12-13 Thread Patrick Marlier

On 12/13/2011 04:04 PM, Aldy Hernandez wrote:

On 12/13/11 15:02, Richard Henderson wrote:

On 12/13/2011 12:48 PM, Aldy Hernandez wrote:

PR middle-end/51411
* trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL.

...

/* ??? Is it worth trying to use make_decl_one_only? */
if (DECL_DECLARED_INLINE_P (new_decl)&& DECL_EXTERNAL (new_decl))
- {
- DECL_EXTERNAL (new_decl) = 0;
- TREE_PUBLIC (new_decl) = 0;
- }
+ DECL_EXTERNAL (new_decl) = 0;


Yes, that's what we had in mind. Though of course the changelog
doesn't match.


r~


Ok, my English typing privileges have been revoked until further notice.
I'm obviously typing cross-eyed right now.

Here's what I have, but keep in mind that I'm going to watch Sesame
Street so I'll be away from my keyboard for a few hours...

PR middle-end/51411
* trans-mem.c (ipa_tm_create_version): Do not zap TREE_PUBLIC.



This causes multiple definitions.

Extract of output with velox/glob2 benchmark:

src/AINicowar.o: In function `_ZGTtNSsD2Ev':
./include/c++/4.7.0/bits/basic_string.h:535: multiple definition of 
`_ZGTtNSsD2Ev'
src/AICastor.o:./include/c++/4.7.0/bits/basic_string.h:535: first 
defined here

src/AINicowar.o: In function `_ZGTtNKSs13get_allocatorEv':
./include/c++/4.7.0/bits/basic_string.h:1814: multiple definition of 
`_ZGTtNKSs13get_allocatorEv'
src/AICastor.o:./include/c++/4.7.0/bits/basic_string.h:1814: first 
defined here

src/AINicowar.o: In function `_ZGTtNKSs6_M_repEv':
./include/c++/4.7.0/bits/basic_string.h:297: multiple definition of 
`_ZGTtNKSs6_M_repEv'
src/AICastor.o:./include/c++/4.7.0/bits/basic_string.h:297: first 
defined here

src/AINicowar.o: In function `_ZGTtNSs4_Rep10_M_disposeERKSaIcE':
./include/c++/4.7.0/bits/basic_string.h:234: multiple definition of 
`_ZGTtNSs4_Rep10_M_disposeERKSaIcE'
src/AICastor.o:./include/c++/4.7.0/bits/basic_string.h:234: first 
defined here

src/AINicowar.o: In function `_ZGTtNSaIcED2Ev':
./include/c++/4.7.0/bits/allocator.h:112: multiple definition of 
`_ZGTtNSaIcED1Ev'

src/AICastor.o:./include/c++/4.7.0/bits/allocator.h:112: first defined here
src/AINicowar.o: In function `_ZGTtNSaIcEC2ERKS_':
./include/c++/4.7.0/bits/allocator.h:106: multiple definition of 
`_ZGTtNSaIcEC1ERKS_'

...

Without the patch, it is ok.
If I remove completely this part:
if (DECL_DECLARED_INLINE_P (new_decl) && DECL_EXTERNAL (new_decl))
  {
DECL_EXTERNAL (new_decl) = 0;
TREE_PUBLIC (new_decl) = 0;
  }
It ends with *only* one undefined symbol:
src/UnitsSkins.o: In function 
`_ZGTtStltIcSt11char_traitsIcESaIcEEbRKSbIT_T0_T1_ES8_':
./include/c++/4.7.0/bits/basic_string.h:2568: undefined reference to 
`_ZGTtNKSs7compareERKSs'


$ c++filt _ZNKSs7compareERKSs
std::basic_string, std::allocator 
>::compare(std::basic_string, 
std::allocator > const&) const


I am not sure the way to give you useful information.

Patrick.


Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue 5488054)

2011-12-13 Thread davidxl


http://codereview.appspot.com/5488054/diff/4002/tree-vect-stmts.c
File tree-vect-stmts.c (right):

http://codereview.appspot.com/5488054/diff/4002/tree-vect-stmts.c#newcode3712
tree-vect-stmts.c:3712: }
The check can be put into a helper function.

http://codereview.appspot.com/5488054/


Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

2011-12-13 Thread Sriraman Tallam
I updated the patch to add the checks in vectorizable_load and
vectorizable_store itself.

Thanks,
-Sri.

On Tue, Dec 13, 2011 at 12:16 PM, Xinliang David Li  wrote:
> See instruction tables here:
> http://www.agner.org/optimize/instruction_tables.pdf
>
> My brief reading of the table for core2 and corei7 suggest the following:
>
> 1. On core2
>
> movdqu -- both load and store forms take up to 8 cycles to complete,
> and store form produces 8 uops while load produces 4 uops
>
> movsd load:  1 uop, 2 cycle latency
> movsd store: 1 uop, 3 cycle latency
>
> movhpd, movlpd load: 2 uops, 3 cycle latency
> movhpd store: 2 uops, 5 cycle latency
> movlpd store: 1uop, 3 cycle latency
>
>
> 2. Core i7
>
> movdqu load: 1 uop, 2 cycle latency
> movdqu store: 1 uop, 3 cycle latency
>
> movsd load: 1 uop, 2 cycle latency
> movsd store: 1uop, 3 cycle latency
>
> movhpd, movlpd load: 2 uop, 3 cycle latency
> movhpd, movlpd sotre: 2 uop, 5 cycle latency
>
>
> From the above, looks like a Sri's original simple heuristic should work fine
>
> 1) for corei7, if the load and stores can not be proved to be 128 bit
> aligned, always use movdqu
>
> 2) for core2,  experiment can be done to determine whether to look at
> unaligned stores or both unaligned loads to disable vectorization.
>
> Yes, for longer term, a more precise cost model is probably needed --
> but require lots of work which may not work a lot better in practice.
>
> What is more important is to beef up gcc infrastructure to allow more
> aggressive alignment (info) propagation.
>
> In 4.4, gcc does alignment (output array) based versioning -- Sri's
> patch has the effect of doing the samething but only for selected
> targets.
>
> thanks,
>
> David
>
> On Tue, Dec 13, 2011 at 10:56 AM, Richard Henderson  wrote:
>> On 12/13/2011 10:26 AM, Sriraman Tallam wrote:
>>> Cool, this works for stores!  It generates the movlps + movhps. I have
>>> to also make a similar change to another call to gen_sse2_movdqu for
>>> loads. Would it be ok to not do this when tune=core2?
>>
>> We can work something out.
>>
>> I'd like you to do the benchmarking to know if unaligned loads are really as 
>> expensive as unaligned stores, and whether there are reformatting penalties 
>> that make the movlps+movhps option for either load or store less attractive.
>>
>>
>> r~


Re: Go patch committed: Multiplex goroutines onto OS threads

2011-12-13 Thread Ian Lance Taylor
Uros Bizjak  writes:

>> This patch changes the Go library to multiplex goroutines onto operating
>> system threads.  Previously, each new goroutine ran in a separate
>> thread.  That is inefficient for programs with lots of goroutines.  This
>> patch changes the library such that it runs a certain numbers of
>> threads, and lets each thread switch between goroutines.  This is how
>> the master Go library works, and this patch brings in code from the
>> master Go library, adjusted for use by gccgo.
>
> For some reason I get this failure on alphaev68-pc-linux-gnu:
>
> --- FAIL: runtime_test.TestGcSys (4.64 seconds)
> using 64 MB
> using too much memory: 64 MB
>
> Raising the value in runtime/gc_test.go to 10e8 runs the test OK.

Thanks for reporting this.  I just committed the appended patch to both
the master Go library and to libgo.  I hope this will fix this problem.
Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.

Ian

diff -r 832d9ebdb3c6 libgo/go/runtime/gc_test.go
--- a/libgo/go/runtime/gc_test.go	Tue Dec 13 14:24:59 2011 -0800
+++ b/libgo/go/runtime/gc_test.go	Tue Dec 13 15:13:43 2011 -0800
@@ -6,16 +6,24 @@
 )
 
 func TestGcSys(t *testing.T) {
+	runtime.GC()
+	runtime.UpdateMemStats()
+	sys := runtime.MemStats.Sys
+
 	for i := 0; i < 100; i++ {
 		workthegc()
 	}
 
 	// Should only be using a few MB.
 	runtime.UpdateMemStats()
-	sys := runtime.MemStats.Sys
-	t.Logf("using %d MB", sys>>20)
-	if sys > 10e6 {
-		t.Fatalf("using too much memory: %d MB", sys>>20)
+	if sys > runtime.MemStats.Sys {
+		sys = 0
+	} else {
+		sys = runtime.MemStats.Sys - sys
+	}
+	t.Logf("used %d extra bytes", sys)
+	if sys > 2<<20 {
+		t.Fatalf("using too much memory: %d bytes", sys)
 	}
 }
 


Re: [google] Add support for delete operator that takes the size of the object as a parameter

2011-12-13 Thread Easwaran Raman
On Mon, Dec 12, 2011 at 1:52 PM, Easwaran Raman  wrote:
> On Mon, Dec 12, 2011 at 12:41 PM, Paolo Carlini
>  wrote:
>> On 12/12/2011 09:37 PM, Easwaran Raman wrote:
>>>
>>> Thanks for the comments Paolo. I have attached the new patch. I have
>>> bumped the version to 3.4.18
>>
>> You shouldn't: 4.7 is not out yet, thus no reason to increase the minor
>> version beyond the current 17.
> Ok, I then don't understand your comment
>  "Note that backporting the patch as-is to 4_6-branch would be very
> wrong in terms of ABI (in mainline we already have a 3.4.17)".
> My original patch added the new symbol in version 3.4.17. Since we
> don't want to add the symbol to 3.4.16 (if we have a situation where
> the new runtime is not available when running a program compiled with
> -fsized-delete) and you   said I shouldn't be using 3.4.17, I assumed
> I had to bump up the version.
>
>>
>>>  and used _ZdlPv[jmy] in gnu.ver.  I have
>>> also added the symbol to baseline_symbols.txt of other targets.
>>
>> You should not, just read again what I wrote. And you don't have to believe
>> me: just browse the libstdc++ ChangeLogs and see if somebody ever does that
>> when the linker map is touched.
>
> Sorry, I again misunderstood this as well (and still don't have a good
> picture). Is the part which adds _ZdlPv[jmy] in gnu.ver ok? I added
> that by mimicking the symbol  _Znw[jmy] found in the same file. From
> the log, it looks like the baseline_symbols.txt seems to be generated,
> but I am not sure how that is to be done. For example, r145437 says a
> bunch of these baseline_symbols.txt are regenerated, but I don't see
> any other change from which this might be generated.
>
> Thanks,
> Easwaran

It looks like running 'make new-abi-baseline' under
TARGET/libstdc++-v3/testsuite generates the baseline file. Should I
check in that? What about other targets?

Thanks,
Easwaran

>
>> Paolo.


Re: [patch] add __is_final trait to fix libstdc++/51365

2011-12-13 Thread Gabriel Dos Reis
On Tue, Dec 13, 2011 at 12:02 PM, Jonathan Wakely  wrote:
> On 13 December 2011 17:01, Paolo Carlini  wrote:
>> Hi,
>>>
>>> This patch seems pretty simple and safe.  Are you (Gaby and Paolo) arguing 
>>> that even so, it shouldn't go in?
>>
>> As far as I'm concerned, definetely not! I also think that it would be great 
>> if, for 4.7, Jon could handle the library issues with EBO by exploiting it.
>
> Yes, if this goes in for 4.7 I will definitely follow it with the
> library changes to make use of it.
>
>> I only meant to say that something seems to me more fundamentally wrong at 
>> the design level about 'final' vs EBO, my hope is that for 4.8 we'll have a 
>> longer term stable solution based on a ISO Committee position.
>
> In one of the earlier bug report comments I proposed a
> __gnu_cxx::is_final library trait to expose the __is_final(T)
> intrinsic for users, but decided against it precisely because I don't
> know how the committee will want to deal with the issue longer term.
>
> So the proposed patch just adds the __is_final intrinsic for use
> internally by the library, to allow library changes so the test cases
> in the bug report will pass.  If preferred I won't even add __is_final
> to the extend.texi docs, to leave it as an undocumented extension that
> we could more easily remove (or deprecate) later if necessary.

Leaving __is_final undocumented is probably a good compromise.


Re: [patch] add __is_final trait to fix libstdc++/51365

2011-12-13 Thread Gabriel Dos Reis
On Tue, Dec 13, 2011 at 10:43 AM, Jason Merrill  wrote:
> On 12/12/2011 09:14 AM, Gabriel Dos Reis wrote:
>>
>> On Mon, Dec 12, 2011 at 5:25 AM, Paolo Carlini
>>  wrote:
>>
 I think being able to detect a final class is good enough for now,
 until we find out if there are real problems being encountered as
 people make more use of C++11.
>>>
>>>
>>> Maybe. But in my opinion we should not rush. Something is wrong here at a
>>> more fundamental level.
>>
>>
>> I agree that we should wait a little bit for the dust to settle down.
>>  Users should
>> avoid it, and implementors shouldn't go through hoops non commensurable
>> with
>> the benefits of "final".  Maybe the "right" primitive is slightly
>> different.
>
>
> This patch seems pretty simple and safe.  Are you (Gaby and Paolo) arguing
> that even so, it shouldn't go in?

My comment isn't about the technical aspect of the patch itself -- it
is a simple patch.
But we don't seem to understand yet the implications of "final".  As
was observed
on the standard reflectors, the appropriate trait might actually need
to be binary
instead of unary as this patch implements -- for the purpose of empty base
optimization which is where the problems pop up.


Re: [Tentative patch] -finit-real=snan - would it really be so simple for automatic arrays.

2011-12-13 Thread Toon Moene

On 12/13/2011 08:41 PM, Thomas Koenig wrote:


Hi Toon,

(For gcc-patches: Patch at
http://gcc.gnu.org/ml/fortran/2011-12/msg00080.html )

I would appreciate a review and a regression test by someone who can.


Regression-test passed on trunk.

This one really looks obvious. Unless somebody objects who knows this
field better than I do, OK for trunk.


Thanks - I'll wait two days for further comment and then apply.

--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands
At home: http://moene.org/~toon/; weather: http://moene.org/~hirlam/
Progress of GNU Fortran: http://gcc.gnu.org/wiki/GFortran#news


Re: [gofrontend-dev] *-rtems Compilation Failures Advice Wanted

2011-12-13 Thread Ian Lance Taylor
Joel Sherrill  writes:

> RTEMS does not currently have IPv6.
>
> /users/joel/test-gcc/gcc-svn/libgo/go/syscall/socket.go:299:51: error:
> use of undefined type 'IPv6Mreq'

This patch is intended to fix this problem.  Bootstrapped on
x86_64-unknown-linux-gnu, which proves little since the code is not
exercised.  Committed to mainline.

Ian

diff -r f332ea69a151 libgo/mksysinfo.sh
--- a/libgo/mksysinfo.sh	Tue Dec 13 14:07:07 2011 -0800
+++ b/libgo/mksysinfo.sh	Tue Dec 13 14:23:23 2011 -0800
@@ -183,10 +183,12 @@
 grep '^const _SHUT_' gen-sysinfo.go |
   sed -e 's/^\(const \)_\(SHUT[^= ]*\)\(.*\)$/\1\2 = _\2/' >> ${OUT}
 
-# The net package requires a definition for IPV6ONLY.
-if ! grep '^const IPV6_V6ONLY ' ${OUT} >/dev/null 2>&1; then
-  echo "const IPV6_V6ONLY = 0" >> ${OUT}
-fi
+# The net package requires some const definitions.
+for m in IPV6_V6ONLY IPPROTO_IPV6 IPV6_JOIN_GROUP IPV6_LEAVE_GROUP; do
+  if ! grep "^const $m " ${OUT} >/dev/null 2>&1; then
+echo "const $m = 0" >> ${OUT}
+  fi
+done
 
 # pathconf constants.
 grep '^const __PC' gen-sysinfo.go |
@@ -474,10 +476,13 @@
   -e 's/_in_addr/[4]byte/g' \
 >> ${OUT}
 
+# We need IPMreq to compile the net package.
+if ! grep 'type IPMreq ' ${OUT} >/dev/null 2>&1; then
+  echo 'type IPMreq struct { Multiaddr [4]byte; Interface [4]byte; }' >> ${OUT}
+fi
+
 # The size of the ip_mreq struct.
-if grep 'type IPMreq ' ${OUT} > /dev/null 2>&1; then
-  echo 'var SizeofIPMreq = int(unsafe.Sizeof(IPMreq{}))' >> ${OUT}
-fi
+echo 'var SizeofIPMreq = int(unsafe.Sizeof(IPMreq{}))' >> ${OUT}
 
 # The ipv6_mreq struct.
 grep '^type _ipv6_mreq ' gen-sysinfo.go | \
@@ -487,6 +492,11 @@
   -e 's/_in6_addr/[16]byte/' \
 >> ${OUT}
 
+# We need IPv6Mreq to compile the net package.
+if ! grep 'type IPv6Mreq ' ${OUT} >/dev/null 2>&1; then
+  echo 'type IPv6Mreq struct { Multiaddr [16]byte; Interface uint32; }' >> ${OUT}
+fi
+
 # Try to guess the type to use for fd_set.
 fd_set=`grep '^type _fd_set ' gen-sysinfo.go || true`
 fds_bits_type="_C_long"


Re: RTEMS Go Patch

2011-12-13 Thread Ian Lance Taylor
Joel Sherrill  writes:

> * libgo/go/syscall/wait.c:  Conditionalize on WIFxxx macros
> and SIGxxx being defined.

I think I have fixed the wait issues as follows.  Bootstrapped on
x86_64-unknown-linux-gnu, which means little as the files are not
compiled there.  Committed to mainline.

Ian

diff -r e62855c629d3 libgo/Makefile.am
--- a/libgo/Makefile.am	Tue Dec 13 13:58:40 2011 -0800
+++ b/libgo/Makefile.am	Tue Dec 13 14:06:40 2011 -0800
@@ -1450,11 +1450,22 @@
 endif
 
 # Define Wait4.
+if LIBGO_IS_RTEMS
+syscall_wait_file =
+else
 if HAVE_WAIT4
 syscall_wait_file = go/syscall/libcall_wait4.go
 else
 syscall_wait_file = go/syscall/libcall_waitpid.go
 endif
+endif
+
+# Support for pulling apart wait status.
+if LIBGO_IS_RTEMS
+syscall_wait_c_file =
+else
+syscall_wait_c_file = go/syscall/wait.c
+endif
 
 # Define Sleep.
 if LIBGO_IS_RTEMS
@@ -1564,7 +1575,7 @@
 	syscall_arch.go
 go_syscall_c_files = \
 	go/syscall/errno.c \
-	go/syscall/wait.c
+	$(syscall_wait_c_file)
 
 libcalls.go: s-libcalls; @true
 s-libcalls: Makefile go/syscall/mksyscall.awk $(go_base_syscall_files)
diff -r e62855c629d3 libgo/go/syscall/exec_stubs.go
--- a/libgo/go/syscall/exec_stubs.go	Tue Dec 13 13:58:40 2011 -0800
+++ b/libgo/go/syscall/exec_stubs.go	Tue Dec 13 14:06:40 2011 -0800
@@ -7,17 +7,27 @@
 package syscall
 
 func ForkExec(argv0 string, argv []string, envv []string, dir string, fd []int) (pid int, err int) {
-	return -1, ENOSYS;
+	return -1, ENOSYS
 }
 
 func Exec(argv0 string, argv []string, envv []string) (err int) {
-	return ENOSYS;
+	return ENOSYS
 }
 
 func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, err error) {
-	return -1, ENOSYS;
+	return -1, ENOSYS
 }
 
+func (w WaitStatus) Exited() bool{ return false }
+func (w WaitStatus) Signaled() bool  { return false }
+func (w WaitStatus) Stopped() bool   { return false }
+func (w WaitStatus) Continued() bool { return false }
+func (w WaitStatus) CoreDump() bool  { return false }
+func (w WaitStatus) ExitStatus() int { return 0 }
+func (w WaitStatus) Signal() int { return 0 }
+func (w WaitStatus) StopSignal() int { return 0 }
+func (w WaitStatus) TrapCause() int  { return 0 }
+
 func raw_ptrace(request int, pid int, addr *byte, data *byte) Errno {
 	return ENOSYS
 }


Re: RTEMS Go Patch

2011-12-13 Thread Ian Lance Taylor
Joel Sherrill  writes:

> 2011-12-02  Joel Sherrill 
>
> * runtime/go-signal.c: Add conditional on SIGPROF.
> * runtime/mem_posix_memalign.c: Add USED directives.
> * libgo/go/syscall/wait.c:  Conditionalize on WIFxxx macros
> and SIGxxx being defined.

I committed the changes to runtime/go-signal.c and
runtime/mem_posix_memalign.c.

As I mentioned in an earlier message, we probably don't want to compile
wait.c on RTEMS at all.  I will work on that now.

Ian


[PATCH] PR c++/51477 - ICE with invalid NSDMI

2011-12-13 Thread Dodji Seketeli
Hello,

In the example of this patch, during the implicit declaration of the
destructor, walk_field_subobs calls locate_fn_flags on the field
invalid field 'x', to locate its destructor.  That function pokes the
BINFO of that field, which is NULL, and passes it along to
lookup_fnfields.  And we ICE because of that NULL base type.

Just preventing lookup_member from crashing because of a NULL (or
otherwise invalid) base type fixes the issue.

Bootstrapped and tested on x86_64-unknown-linux-gnu against trunk.

gcc/cp/

PR c++/51477
* search.c (lookup_member): Get out early on invalid base type.

gcc/testsuite/

PR c++/51477
* g++.dg/cpp0x/nsdmi6.C: New test.
---
 gcc/cp/search.c |4 +++-
 gcc/testsuite/g++.dg/cpp0x/nsdmi6.C |8 
 2 files changed, 11 insertions(+), 1 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/nsdmi6.C

diff --git a/gcc/cp/search.c b/gcc/cp/search.c
index 3894c68..0ceb5bc 100644
--- a/gcc/cp/search.c
+++ b/gcc/cp/search.c
@@ -1171,7 +1171,9 @@ lookup_member (tree xbasetype, tree name, int protect, 
bool want_type,
 
   const char *errstr = 0;
 
-  if (name == error_mark_node)
+  if (name == error_mark_node
+  || xbasetype == NULL_TREE
+  || xbasetype == error_mark_node)
 return NULL_TREE;
 
   gcc_assert (TREE_CODE (name) == IDENTIFIER_NODE);
diff --git a/gcc/testsuite/g++.dg/cpp0x/nsdmi6.C 
b/gcc/testsuite/g++.dg/cpp0x/nsdmi6.C
new file mode 100644
index 000..bb455e7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/nsdmi6.C
@@ -0,0 +1,8 @@
+// Origin PR c++/51477
+// { dg-options "-std=c++11" }
+
+struct A
+{
+typedef int int T; // { dg-error "two or more data types in declaration" }
+struct T x[1] = { 0 }; // { dg-error "invalid|forward" }
+};
-- 
1.7.6.4


-- 
Dodji


Re: [PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11

2011-12-13 Thread Dodji Seketeli
Jason Merrill  writes:

> But yes, the patch is OK after fixing the comment.

Thanks.  Here is what I am bootstrapping.

gcc/cp/

PR c++/51476
* pt.c (convert_nontype_argument): Don't call maybe_constant_value
for PTRMEM_CST nodes.

gcc/testsuite/

PR c++/51476
* cpp0x/ptrmem-cst-arg1.C: New test.
---
 gcc/cp/pt.c  |   10 +++---
 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C |9 +
 2 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index bb5aa0c..3e958bc 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -5720,11 +5720,15 @@ convert_nontype_argument (tree type, tree expr, 
tsubst_flags_t complain)
  to a null value, but otherwise still need to be of a specific form.  */
   if (cxx_dialect >= cxx0x)
 {
-  if (INTEGRAL_OR_ENUMERATION_TYPE_P (type))
+  if (TREE_CODE (expr) == PTRMEM_CST)
+   /* A PTRMEM_CST is already constant, and a valid template
+  argument for a parameter of pointer to member type, we just want
+  to leave it in that form rather than lower it to a
+  CONSTRUCTOR.  */;
+  else if (INTEGRAL_OR_ENUMERATION_TYPE_P (type))
expr = maybe_constant_value (expr);
   else if (TYPE_PTR_P (type)
-  || (TYPE_PTR_TO_MEMBER_P (type)
-  && TREE_CODE (expr) != PTRMEM_CST))
+  || TYPE_PTR_TO_MEMBER_P (type))
{
  tree folded = maybe_constant_value (expr);
  if (TYPE_PTR_P (type) ? integer_zerop (folded)
diff --git a/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C 
b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C
new file mode 100644
index 000..b6c81d5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C
@@ -0,0 +1,9 @@
+// Origin PR c++/51476
+// { dg-options "-std=c++11" }
+
+template struct A {}; 
  
+struct B
+{
+int i;
+A<&B::i> a; // { dg-error "could not convert template argument" }
+};
-- 
1.7.6.4

-- 
Dodji


Re: PR middle-end/51411: handle transaction_safe virtual inlined methods

2011-12-13 Thread Aldy Hernandez

On 12/13/11 15:02, Richard Henderson wrote:

On 12/13/2011 12:48 PM, Aldy Hernandez wrote:

PR middle-end/51411
* trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL.

...

/* ??? Is it worth trying to use make_decl_one_only?  */
if (DECL_DECLARED_INLINE_P (new_decl)&&  DECL_EXTERNAL (new_decl))
-   {
- DECL_EXTERNAL (new_decl) = 0;
- TREE_PUBLIC (new_decl) = 0;
-   }
+   DECL_EXTERNAL (new_decl) = 0;


Yes, that's what we had in mind.  Though of course the changelog doesn't match.


r~


Ok, my English typing privileges have been revoked until further notice. 
 I'm obviously typing cross-eyed right now.


Here's what I have, but keep in mind that I'm going to watch Sesame 
Street so I'll be away from my keyboard for a few hours...


PR middle-end/51411
* trans-mem.c (ipa_tm_create_version): Do not zap TREE_PUBLIC.



Re: PR middle-end/51411: handle transaction_safe virtual inlined methods

2011-12-13 Thread Richard Henderson
On 12/13/2011 12:48 PM, Aldy Hernandez wrote:
>   PR middle-end/51411
>   * trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL.
...
>/* ??? Is it worth trying to use make_decl_one_only?  */
>if (DECL_DECLARED_INLINE_P (new_decl) && DECL_EXTERNAL (new_decl))
> - {
> -   DECL_EXTERNAL (new_decl) = 0;
> -   TREE_PUBLIC (new_decl) = 0;
> - }
> + DECL_EXTERNAL (new_decl) = 0;

Yes, that's what we had in mind.  Though of course the changelog doesn't match.


r~


Re: PR middle-end/51411: handle transaction_safe virtual inlined methods

2011-12-13 Thread Jason Merrill

On 12/13/2011 03:48 PM, Aldy Hernandez wrote:

Be that as it may, I gather this is what you want?


Yep, that's what I had in mind.

Jason



Re: PR middle-end/51411: handle transaction_safe virtual inlined methods

2011-12-13 Thread Aldy Hernandez

On 12/13/11 14:42, Richard Henderson wrote:

On 12/13/2011 12:41 PM, Aldy Hernandez wrote:

The TM clone is marked weak as the original decl was weak, but it is no longer 
external or public.  I suggested to just clear the DECL_WEAK bit on the clone, 
but Jason mentioned that unsetting DECL_EXTERNAL is more optimal-- so that the 
clone can still be public and comdat.

No regressions.  PR fixed.

OK?

curr


PR middle-end/51411
* trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL.


The actual patch is more or less the exact opposite of what Jason recommended.


r~


Bah, you guys are speaking Bostoneese or something, cause reading back 
on the IRC log I still think I implemented what Jason said.


Be that as it may, I gather this is what you want?

OK pending another round of tests?
PR middle-end/51411
* trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL.

Index: testsuite/g++.dg/tm/pr51411.C
===
--- testsuite/g++.dg/tm/pr51411.C   (revision 0)
+++ testsuite/g++.dg/tm/pr51411.C   (revision 0)
@@ -0,0 +1,7 @@
+// { dg-do compile }
+// { dg-options "-fgnu-tm -O" }
+
+struct A
+{
+  __attribute__ ((transaction_safe)) virtual void virtfoo () { }
+};
Index: trans-mem.c
===
--- trans-mem.c (revision 182290)
+++ trans-mem.c (working copy)
@@ -4256,10 +4256,7 @@ ipa_tm_create_version (struct cgraph_nod
   /* Remap extern inline to static inline.  */
   /* ??? Is it worth trying to use make_decl_one_only?  */
   if (DECL_DECLARED_INLINE_P (new_decl) && DECL_EXTERNAL (new_decl))
-   {
- DECL_EXTERNAL (new_decl) = 0;
- TREE_PUBLIC (new_decl) = 0;
-   }
+   DECL_EXTERNAL (new_decl) = 0;
 
   tree_function_versioning (old_decl, new_decl, NULL, false, NULL,
NULL, NULL);


Re: PR middle-end/51411: handle transaction_safe virtual inlined methods

2011-12-13 Thread Richard Henderson
On 12/13/2011 12:41 PM, Aldy Hernandez wrote:
> The TM clone is marked weak as the original decl was weak, but it is no 
> longer external or public.  I suggested to just clear the DECL_WEAK bit on 
> the clone, but Jason mentioned that unsetting DECL_EXTERNAL is more optimal-- 
> so that the clone can still be public and comdat.
> 
> No regressions.  PR fixed.
> 
> OK?
> 
> curr
> 
> 
>   PR middle-end/51411
>   * trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL.

The actual patch is more or less the exact opposite of what Jason recommended.


r~


PR middle-end/51411: handle transaction_safe virtual inlined methods

2011-12-13 Thread Aldy Hernandez
IPA's function_and_variable_visibility is dying, because for a C++ 
virtual inlined TM clone, we have zapped the DECL_EXTERNAL and 
TREE_PUBLIC bits while building the clone in ipa_tm_create_version.


The assert fails here in function_and_variable_visibility():

  gcc_assert ((!DECL_WEAK (node->decl) && !DECL_COMDAT (node->decl))
  	  || TREE_PUBLIC (node->decl) || DECL_EXTERNAL 
(node->decl));


The TM clone is marked weak as the original decl was weak, but it is no 
longer external or public.  I suggested to just clear the DECL_WEAK bit 
on the clone, but Jason mentioned that unsetting DECL_EXTERNAL is more 
optimal-- so that the clone can still be public and comdat.


No regressions.  PR fixed.

OK?
PR middle-end/51411
* trans-mem.c (ipa_tm_create_version): Do not zap DECL_EXTERNAL.

Index: testsuite/g++.dg/tm/pr51411.C
===
--- testsuite/g++.dg/tm/pr51411.C   (revision 0)
+++ testsuite/g++.dg/tm/pr51411.C   (revision 0)
@@ -0,0 +1,7 @@
+// { dg-do compile }
+// { dg-options "-fgnu-tm -O" }
+
+struct A
+{
+  __attribute__ ((transaction_safe)) virtual void virtfoo () { }
+};
Index: trans-mem.c
===
--- trans-mem.c (revision 182290)
+++ trans-mem.c (working copy)
@@ -4256,10 +4256,7 @@ ipa_tm_create_version (struct cgraph_nod
   /* Remap extern inline to static inline.  */
   /* ??? Is it worth trying to use make_decl_one_only?  */
   if (DECL_DECLARED_INLINE_P (new_decl) && DECL_EXTERNAL (new_decl))
-   {
- DECL_EXTERNAL (new_decl) = 0;
- TREE_PUBLIC (new_decl) = 0;
-   }
+   TREE_PUBLIC (new_decl) = 0;
 
   tree_function_versioning (old_decl, new_decl, NULL, false, NULL,
NULL, NULL);


Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

2011-12-13 Thread Sriraman Tallam
On Tue, Dec 13, 2011 at 10:56 AM, Richard Henderson  wrote:
> On 12/13/2011 10:26 AM, Sriraman Tallam wrote:
>> Cool, this works for stores!  It generates the movlps + movhps. I have
>> to also make a similar change to another call to gen_sse2_movdqu for
>> loads. Would it be ok to not do this when tune=core2?
>
> We can work something out.
>
> I'd like you to do the benchmarking to know if unaligned loads are really as 
> expensive as unaligned stores, and whether there are reformatting penalties 
> that make the movlps+movhps option for either load or store less attractive.

I can confirm that movhps+movlps is *not at all* a good substitute for
movdqu on core2. It makes it much worse. MOVHPS/MOVLPS has a very high
penalty (~10x) for unaligned load/stores.

>
>
> r~


Re: [RFC][libitm] Convert to c++11 atomics

2011-12-13 Thread Richard Henderson
On 12/13/2011 11:43 AM, Dominique Dhumieres wrote:
>> I've committed the patch.
> 
> It caused:
> 
> ../../../work/libitm/config/posix/rwlock.cc: In member function 'bool 
> GTM::gtm_rwlock::write_lock_generic(GTM::gtm_thread*)':
> ../../../work/libitm/config/posix/rwlock.cc:196:56: error: comparison between 
> signed and unsigned integer expressions [-Werror=sign-compare]
> cc1plus: all warnings being treated as errors

Fixed.  Tested on linux with --disable-linux-futex.


r~
Index: libitm/ChangeLog
===
--- libitm/ChangeLog(revision 182301)
+++ libitm/ChangeLog(working copy)
@@ -1,5 +1,8 @@
 2011-12-13  Richard Henderson  
 
+   * config/posix/rwlock.cc (gtm_rwlock::write_lock_generic): Fix
+   signed/unsigned comparison werror.
+
* local_atomic: New file.
* libitm_i.h: Include it.
(gtm_thread::shared_state): Use atomic template.
Index: libitm/config/posix/rwlock.cc
===
--- libitm/config/posix/rwlock.cc   (revision 182301)
+++ libitm/config/posix/rwlock.cc   (working copy)
@@ -193,7 +193,7 @@
  it = it->next_thread)
{
  // Don't count ourself if this is an upgrade.
- if (it->shared_state.load(memory_order_relaxed) != -1)
+ if (it->shared_state.load(memory_order_relaxed) != (gtm_word)-1)
readers++;
}
 


Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

2011-12-13 Thread Xinliang David Li
See instruction tables here:
http://www.agner.org/optimize/instruction_tables.pdf

My brief reading of the table for core2 and corei7 suggest the following:

1. On core2

movdqu -- both load and store forms take up to 8 cycles to complete,
and store form produces 8 uops while load produces 4 uops

movsd load:  1 uop, 2 cycle latency
movsd store: 1 uop, 3 cycle latency

movhpd, movlpd load: 2 uops, 3 cycle latency
movhpd store: 2 uops, 5 cycle latency
movlpd store: 1uop, 3 cycle latency


2. Core i7

movdqu load: 1 uop, 2 cycle latency
movdqu store: 1 uop, 3 cycle latency

movsd load: 1 uop, 2 cycle latency
movsd store: 1uop, 3 cycle latency

movhpd, movlpd load: 2 uop, 3 cycle latency
movhpd, movlpd sotre: 2 uop, 5 cycle latency


>From the above, looks like a Sri's original simple heuristic should work fine

1) for corei7, if the load and stores can not be proved to be 128 bit
aligned, always use movdqu

2) for core2,  experiment can be done to determine whether to look at
unaligned stores or both unaligned loads to disable vectorization.

Yes, for longer term, a more precise cost model is probably needed --
but require lots of work which may not work a lot better in practice.

What is more important is to beef up gcc infrastructure to allow more
aggressive alignment (info) propagation.

In 4.4, gcc does alignment (output array) based versioning -- Sri's
patch has the effect of doing the samething but only for selected
targets.

thanks,

David

On Tue, Dec 13, 2011 at 10:56 AM, Richard Henderson  wrote:
> On 12/13/2011 10:26 AM, Sriraman Tallam wrote:
>> Cool, this works for stores!  It generates the movlps + movhps. I have
>> to also make a similar change to another call to gen_sse2_movdqu for
>> loads. Would it be ok to not do this when tune=core2?
>
> We can work something out.
>
> I'd like you to do the benchmarking to know if unaligned loads are really as 
> expensive as unaligned stores, and whether there are reformatting penalties 
> that make the movlps+movhps option for either load or store less attractive.
>
>
> r~


Re: [patch] add __is_final trait to fix libstdc++/51365

2011-12-13 Thread Jason Merrill

On 12/13/2011 12:01 PM, Paolo Carlini wrote:

As far as I'm concerned, definetely not! I also think that it would be great 
if, for 4.7, Jon could handle the library issues with EBO by exploiting it.


The patch is OK, then.

Jason


Re: [PATCH] [MIPS] Add Octeon2 cpu support to GCC

2011-12-13 Thread Andrew Pinski
On Tue, Dec 13, 2011 at 11:19 AM, Richard Sandiford
 wrote:
> Andrew Pinski  writes:
>> +(define_insn_reservation "octeon_imul3_o2" 6
>> +  (and (eq_attr "cpu" "octeon2")
>> +       (eq_attr "type" "imul3,pop,clz"))
>> +  " octeon_pipe1 + octeon_mult")
>
> Excess space before ".
>
>> Index: config/mips/mips.h
>> ===
>> --- config/mips/mips.h        (revision 182183)
>> +++ config/mips/mips.h        (working copy)
>> @@ -250,7 +252,9 @@ struct mips_cpu_info {
>>  #define TUNE_MIPS6000               (mips_tune == PROCESSOR_R6000)
>>  #define TUNE_MIPS7000               (mips_tune == PROCESSOR_R7000)
>>  #define TUNE_MIPS9000               (mips_tune == PROCESSOR_R9000)
>> -#define TUNE_OCTEON              (mips_tune == PROCESSOR_OCTEON)
>> +#define TUNE_OCTEON              (mips_tune == PROCESSOR_OCTEON      \
>> +                                  || mips_tune == PROCESSOR_OCTEON2)
>> +#define TUNE_OCTEON2             (mips_tune == PROCESSOR_OCTEON2)
>>  #define TUNE_SB1                    (mips_tune == PROCESSOR_SB1             
>>  \
>>                                    || mips_tune == PROCESSOR_SB1A)
>
> Do any follow-on patches need TUNE_OCTEON2?  If not, let's leave it out
> for now.  (I realise they'll use TARGET_OCTEON2.)
>
> OK for 4.7 with those (trivial) changes.

Here is the patch which I committed, I had to add support to mips.exp
for -fdump-* options (I also fixed the -g* options, the regexp was
incorrect, it needed a . in front of the *).

Thanks,
Andrew Pinski

gcc/ChangeLog:
* config/mips/mips-cpus.def: Add Octeon2.
* config/mips/mips-tables.opt: Regenerate.
* config/mips/mips.md (define_attr "cpu"): Add Octeon2.
* config/mips/driver-native.c (host_detect_local_cpu): Support Octeon2 also.
* config/mips/octeon.md (octeon_arith): Add Octeon2.
(octeon_condmove): Likewise.
(octeon_load): Rename to ..
(octeon_load_o1): this.
(octeon_load_o2): New reserve.
(octeon_cop_o2): New reserve.
(octeon_store):  Match Octeon2 also.
(octeon_brj): Rename to ..
(octeon_brj_o1): this.
(octeon_brj_o2): New reserve.
(octeon_imul3): Rename to ...
(octeon_imul3_o1): this.
(octeon_imul3_o2): New reserve.
(octeon_imul): Rename to ...
(octeon_imul_o1): this.
(octeon_imul_o2): New reserve.
(octeon_mfhilo): Rename to ...
(octeon_mfhilo_o1): This.
(octeon_mfhilo_o2): New reserve.
(octeon_imadd): Rename to ...
(octeon_imadd_o1): this.
(octeon_imadd_o2): New reserve.
(octeon_idiv): Rename to ..
(octeon_idiv_o1): This.
(octeon_idiv_o2_si): New reserve.
(octeon_idiv_o2_di): Likewise.
(octeon_unknown): Match Octeon2 also.
* config/mips/mips.c (mips_rtx_cost_data): Add Octeon2 cost data.
(mips_issue_rate): Octeon2 can issue 2 at a time.
* config/mips/mips.h (TARGET_OCTEON): Match Octeon2 also.
(TARGET_OCTEON2): New define.
(TUNE_OCTEON): Match Octeon2 also.

testsuite/ChangeLog:
* gcc.target/mips/mips.exp (mips_option_groups): Fix debug.  Add
-fdump-* options.
* gcc.target/mips/octeon2-pipe-1.c: New testcase.
* gcc.target/mips/octeon-pipe-1.c: New testcase.
Index: testsuite/gcc.target/mips/mips.exp
===
--- testsuite/gcc.target/mips/mips.exp  (revision 182183)
+++ testsuite/gcc.target/mips/mips.exp  (working copy)
@@ -226,7 +226,7 @@ set mips_option_groups {
 abi "-mabi=.*"
 addressing "addressing=.*"
 arch "-mips([1-5]|32.*|64.*)|-march=.*|isa(|_rev)(=|<=|>=).*"
-debug "-g*"
+debug "-g.*"
 dump_pattern "-dp"
 endianness "-E(L|B)|-me(l|b)"
 float "-m(hard|soft)-float"
@@ -241,6 +241,7 @@ set mips_option_groups {
 profiling "-pg"
 small-data "-G[0-9]+"
 warnings "-w"
+dump "-fdump-.*"
 }
 
 # Add -mfoo/-mno-foo options to mips_option_groups.
Index: testsuite/gcc.target/mips/octeon2-pipe-1.c
===
--- testsuite/gcc.target/mips/octeon2-pipe-1.c  (revision 0)
+++ testsuite/gcc.target/mips/octeon2-pipe-1.c  (revision 0)
@@ -0,0 +1,11 @@
+/* Check that we use the octeon2 pipeline description.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-sched2 -march=octeon2" } */
+
+NOMIPS16 int f (int a, int b)
+{
+  return a / b;
+}
+
+/* { dg-final { scan-rtl-dump "octeon_mult\\*17" "sched2" } }  */
+/* { dg-final { cleanup-tree-dump "sched2" } }  */
Index: testsuite/gcc.target/mips/octeon-pipe-1.c
===
--- testsuite/gcc.target/mips/octeon-pipe-1.c   (revision 0)
+++ testsuite/gcc.target/mips/octeon-pipe-1.c   (revision 0)
@@ -0,0 +1,11 @@
+/* Check that we use the octeon pipeline description.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=octeon -fdump-rtl-sched2" } */
+
+NOMIPS16 int f (int a, int b)
+{
+  return a / b;
+}
+
+/* { dg-final { scan-rtl-dump "octeon_mult\\*71" "sched2" } }  */
+/* { dg-final { cleanup-tree-dump "sched2" } }  */
Index: config/mips/mips-tables.opt
==

Re: [RFC][libitm] Convert to c++11 atomics

2011-12-13 Thread Dominique Dhumieres
> I've committed the patch.

It caused:

../../../work/libitm/config/posix/rwlock.cc: In member function 'bool 
GTM::gtm_rwlock::write_lock_generic(GTM::gtm_thread*)':
../../../work/libitm/config/posix/rwlock.cc:196:56: error: comparison between 
signed and unsigned integer expressions [-Werror=sign-compare]
cc1plus: all warnings being treated as errors

TIA

Dominique


Re: [Tentative patch] -finit-real=snan - would it really be so simple for automatic arrays.

2011-12-13 Thread Thomas Koenig

Hi Toon,

(For gcc-patches: Patch at 
http://gcc.gnu.org/ml/fortran/2011-12/msg00080.html )

I would appreciate a review and a regression test by someone who can.


Regression-test passed on trunk.

This one really looks obvious.  Unless somebody objects who knows this
field better than I do, OK for trunk.

Thanks a lot!

Thomas


[Patch,AVR] ad PR50931: Implement mulpsi3

2011-12-13 Thread Georg-Johann Lay
This adds missing 24-Bit multiplication.

Besides implementing __mulpsi3 in libgcc the patch adds some minor tweaks to
multiply with small numbers and to represent the reduced register footprint of
asm implementations.

With this patch PR50931 is complete from the target side.
(But there is still a segfault in the frontends, see PR51527)

Ok for trunk?

Johann

libgcc/
PR target/50931
* config/avr/t-avr (LIB1ASMSRC): Add _mulpsi3, _mulsqipsi3.
* config/avr/lib1funcs.S (__mulpsi3, __mulsqipsi3): New functions.
gcc/
PR target/50931
* config/avr/avr.md (mulpsi3): New expander.
(*umulqihipsi3, *umulhiqipsi3): New insns.
(*mulsqipsi3.libgcc, *mulpsi3.libgcc): New insns.
(mulsqipsi3, *mulpsi3): New insn-and-splits.
(ashlpsi3): Turn to expander.  Move insn code to...
(*ashlpsi3): ...this new insn.
testsuite/
PR target/50931
* gcc.target/avr/torture/int24-mul.c: New testcase.

Index: gcc/config/avr/avr.md
===
--- gcc/config/avr/avr.md	(revision 182277)
+++ gcc/config/avr/avr.md	(working copy)
@@ -2113,7 +2113,7 @@ (define_insn "*mulhi3_call"
   [(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
 
-;; To support widening multiplicatioon with constant we postpone
+;; To support widening multiplication with constant we postpone
 ;; expanding to the implicit library call until post combine and
 ;; prior to register allocation.  Clobber all hard registers that
 ;; might be used by the (widening) multiply until it is split and
@@ -2575,6 +2575,132 @@ (define_insn "*udivmodhi4_call"
(set_attr "cc" "clobber")])
 
 ;;
+;; 24-bit multiply
+
+;; To support widening multiplication with constant we postpone
+;; expanding to the implicit library call until post combine and
+;; prior to register allocation.  Clobber all hard registers that
+;; might be used by the (widening) multiply until it is split and
+;; it's final register footprint is worked out.
+
+(define_expand "mulpsi3"
+  [(parallel [(set (match_operand:PSI 0 "register_operand" "")
+   (mult:PSI (match_operand:PSI 1 "register_operand" "")
+ (match_operand:PSI 2 "nonmemory_operand" "")))
+  (clobber (reg:HI 26))
+  (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  {
+if (s8_operand (operands[2], PSImode))
+  {
+rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode));
+emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1]));
+DONE;
+  }
+  })
+
+(define_insn "*umulqihipsi3"
+  [(set (match_operand:PSI 0 "register_operand" "=&r")
+(mult:PSI (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))
+  (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"]
+  "AVR_HAVE_MUL"
+  "mul %1,%A2
+	movw %A0,r0
+	mul %1,%B2
+	clr %C0
+	add %B0,r0
+	adc %C0,r1
+	clr __zero_reg__"
+  [(set_attr "length" "7")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*umulhiqipsi3"
+  [(set (match_operand:PSI 0 "register_operand" "=&r")
+(mult:PSI (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))
+  (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"]
+  "AVR_HAVE_MUL"
+  "mul %1,%A2
+	movw %A0,r0
+	mul %1,%B2
+	add %B0,r0
+	mov %C0,r1
+	clr __zero_reg__
+	adc %C0,__zero_reg__"
+  [(set_attr "length" "7")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "mulsqipsi3"
+  [(set (match_operand:PSI 0 "pseudo_register_operand"  "=r")
+(mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "r"))
+  (match_operand:PSI 2 "pseudo_register_or_const_int_operand""rn")))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:QI 25)
+(match_dup 1))
+   (set (reg:PSI 22)
+(match_dup 2))
+   (set (reg:PSI 18)
+(mult:PSI (sign_extend:PSI (reg:QI 25))
+  (reg:PSI 22)))
+   (set (match_dup 0)
+(reg:PSI 18))])
+
+(define_insn_and_split "*mulpsi3"
+  [(set (match_operand:PSI 0 "pseudo_register_operand"   "=r")
+(mult:PSI (match_operand:PSI 1 "pseudo_register_operand"  "r")
+  (match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn")))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:PSI 18)
+(match_dup 1))
+   (set (reg:PSI 22) 
+(match_dup 2))
+   (parallel [(set (reg:PSI 22)
+   (mult:PSI (reg:PSI 22)
+ (reg:PSI 18)))
+  (clobber (reg:QI 21))
+  (clobber (reg:QI 25))
+ 

Re: [PATCH] regmove: Fix segfault when accessing call_used_regs

2011-12-13 Thread Richard Sandiford
"Andreas Krebbel"  writes:
> Ok for mainline, 4.6, 4.5, and 4.4?

OK everywhere, but (sorry to be picky)...

> Index: gcc/regmove.c
> ===
> *** gcc/regmove.c.orig
> --- gcc/regmove.c
> *** fixup_match_2 (rtx insn, rtx dst, rtx sr
> *** 859,865 
> if (REG_N_CALLS_CROSSED (REGNO (src)) == 0)
>   break;
>   
> !   if (call_used_regs [REGNO (dst)]
> || find_reg_fusage (p, CLOBBER, dst))
>   break;
>   }
> --- 859,865 
> if (REG_N_CALLS_CROSSED (REGNO (src)) == 0)
>   break;
>   
> !   if ((REGNO (dst) < FIRST_PSEUDO_REGISTER && call_used_regs [REGNO 
> (dst)])
> || find_reg_fusage (p, CLOBBER, dst))
>   break;
>   }

...please use HARD_REGISTER_P (dst) instead.  For one thing, it'll keep
the line within the 80 column limit.

Thanks,
Richard


Re: [PATCH] [MIPS] Add Octeon2 cpu support to GCC

2011-12-13 Thread Richard Sandiford
Andrew Pinski  writes:
> +(define_insn_reservation "octeon_imul3_o2" 6
> +  (and (eq_attr "cpu" "octeon2")
> +   (eq_attr "type" "imul3,pop,clz"))
> +  " octeon_pipe1 + octeon_mult")

Excess space before ".

> Index: config/mips/mips.h
> ===
> --- config/mips/mips.h(revision 182183)
> +++ config/mips/mips.h(working copy)
> @@ -250,7 +252,9 @@ struct mips_cpu_info {
>  #define TUNE_MIPS6000   (mips_tune == PROCESSOR_R6000)
>  #define TUNE_MIPS7000   (mips_tune == PROCESSOR_R7000)
>  #define TUNE_MIPS9000   (mips_tune == PROCESSOR_R9000)
> -#define TUNE_OCTEON  (mips_tune == PROCESSOR_OCTEON)
> +#define TUNE_OCTEON  (mips_tune == PROCESSOR_OCTEON  \
> +  || mips_tune == PROCESSOR_OCTEON2)
> +#define TUNE_OCTEON2 (mips_tune == PROCESSOR_OCTEON2)
>  #define TUNE_SB1(mips_tune == PROCESSOR_SB1  
> \
>|| mips_tune == PROCESSOR_SB1A)

Do any follow-on patches need TUNE_OCTEON2?  If not, let's leave it out
for now.  (I realise they'll use TARGET_OCTEON2.)

OK for 4.7 with those (trivial) changes.

Thanks,
Richard


Re: Go patch committed: Multiplex goroutines onto OS threads

2011-12-13 Thread Ian Lance Taylor
Rainer Orth  writes:

> Only a few minor compilation problems, fixed with the following patch.

Thanks.  Committed.

Ian


Re: [RFC][libitm] Convert to c++11 atomics

2011-12-13 Thread Richard Henderson
On 11/30/2011 05:13 PM, Richard Henderson wrote:
> The library is written in C++, so in theory we can use the real atomic<> 
> templates, etc.  Except that we have the same horrid problem finding the C++ 
> headers as did for , so again we have a local copy of .  
> Blah.  But given that it is a copy, the rest of the code is using the real 
> interfaces.
> 
> This passes the testsuite on power7 (gcc110), which *is* picky about memory 
> barriers, but I seem to recall that the larger external tests that Velox used 
> were much better at picking out problems.  And I've misplaced those...
> 
> Torvald, if you'd be so kind as to cast another set of eyes across this, I'd 
> be grateful.

I've committed the patch.


r~



libgo patch committed: Update to weekly.2011-12-02

2011-12-13 Thread Ian Lance Taylor
I have committed a patch to update libgo to the weekly.2011-12-02
release of the master library.  In this message I have only included the
diffs to files that are specific to the gccgo copy of the library.
Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.
Committed to mainline.

Ian

diff -r 53056adb4c1e libgo/MERGE
--- a/libgo/MERGE	Tue Dec 13 10:51:45 2011 -0800
+++ b/libgo/MERGE	Tue Dec 13 11:10:08 2011 -0800
@@ -1,4 +1,4 @@
-b4a91b693374
+0beb796b4ef8
 
 The first line of this file holds the Mercurial revision number of the
 last merge done from the master library sources.
diff -r 53056adb4c1e libgo/Makefile.am
--- a/libgo/Makefile.am	Tue Dec 13 10:51:45 2011 -0800
+++ b/libgo/Makefile.am	Tue Dec 13 11:10:08 2011 -0800
@@ -233,7 +233,6 @@
 
 toolexeclibgoexp_DATA = \
 	exp/ebnf.gox \
-	exp/gui.gox \
 	$(exp_inotify_gox) \
 	exp/norm.gox \
 	exp/spdy.gox \
@@ -242,11 +241,6 @@
 	exp/terminal.gox \
 	exp/types.gox
 
-toolexeclibgoexpguidir = $(toolexeclibgoexpdir)/gui
-
-toolexeclibgoexpgui_DATA = \
-	exp/gui/x11.gox
-
 toolexeclibgoexpsqldir = $(toolexeclibgoexpdir)/sql
 
 toolexeclibgoexpsql_DATA = \
@@ -447,6 +441,7 @@
 	runtime/go-map-len.c \
 	runtime/go-map-range.c \
 	runtime/go-nanotime.c \
+	runtime/go-now.c \
 	runtime/go-new-map.c \
 	runtime/go-new.c \
 	runtime/go-panic.c \
@@ -576,6 +571,7 @@
 go_html_files = \
 	go/html/const.go \
 	go/html/doc.go \
+	go/html/doctype.go \
 	go/html/entity.go \
 	go/html/escape.go \
 	go/html/node.go \
@@ -888,7 +884,7 @@
 	go/time/sys_unix.go \
 	go/time/tick.go \
 	go/time/time.go \
-	go/time/zoneinfo_posix.go \
+	go/time/zoneinfo.go \
 	go/time/zoneinfo_unix.go
 
 go_unicode_files = \
@@ -1038,6 +1034,7 @@
 go_crypto_x509_files = \
 	go/crypto/x509/cert_pool.go \
 	go/crypto/x509/pkcs1.go \
+	go/crypto/x509/pkcs8.go \
 	go/crypto/x509/verify.go \
 	go/crypto/x509/x509.go
 go_crypto_xtea_files = \
@@ -1135,8 +1132,6 @@
 go_exp_ebnf_files = \
 	go/exp/ebnf/ebnf.go \
 	go/exp/ebnf/parser.go
-go_exp_gui_files = \
-	go/exp/gui/gui.go
 go_exp_inotify_files = \
 	go/exp/inotify/inotify_linux.go
 go_exp_norm_files = \
@@ -1178,10 +1173,6 @@
 	go/exp/types/types.go \
 	go/exp/types/universe.go
 
-go_exp_gui_x11_files = \
-	go/exp/gui/x11/auth.go \
-	go/exp/gui/x11/conn.go
-
 go_exp_sql_driver_files = \
 	go/exp/sql/driver/driver.go \
 	go/exp/sql/driver/types.go
@@ -1415,13 +1406,11 @@
 	go/text/template/exec.go \
 	go/text/template/funcs.go \
 	go/text/template/helper.go \
-	go/text/template/parse.go \
-	go/text/template/set.go
+	go/text/template/template.go
 go_text_template_parse_files = \
 	go/text/template/parse/lex.go \
 	go/text/template/parse/node.go \
-	go/text/template/parse/parse.go \
-	go/text/template/parse/set.go
+	go/text/template/parse/parse.go
 
 go_sync_atomic_files = \
 	go/sync/atomic/doc.go
@@ -1725,14 +1714,12 @@
 	encoding/pem.lo \
 	encoding/xml.lo \
 	exp/ebnf.lo \
-	exp/gui.lo \
 	exp/norm.lo \
 	exp/spdy.lo \
 	exp/sql.lo \
 	exp/ssh.lo \
 	exp/terminal.lo \
 	exp/types.lo \
-	exp/gui/x11.lo \
 	exp/sql/driver.lo \
 	html/template.lo \
 	go/ast.lo \
@@ -2784,16 +2771,6 @@
 	@$(CHECK)
 .PHONY: exp/ebnf/check
 
-@go_include@ exp/gui.lo.dep
-exp/gui.lo.dep: $(go_exp_gui_files)
-	$(BUILDDEPS)
-exp/gui.lo: $(go_exp_gui_files)
-	$(BUILDPACKAGE)
-exp/gui/check: $(CHECK_DEPS)
-	@$(MKDIR_P) exp/gui
-	@$(CHECK)
-.PHONY: exp/gui/check
-
 @go_include@ exp/norm.lo.dep
 exp/norm.lo.dep: $(go_exp_norm_files)
 	$(BUILDDEPS)
@@ -2854,16 +2831,6 @@
 	@$(CHECK)
 .PHONY: exp/types/check
 
-@go_include@ exp/gui/x11.lo.dep
-exp/gui/x11.lo.dep: $(go_exp_gui_x11_files)
-	$(BUILDDEPS)
-exp/gui/x11.lo: $(go_exp_gui_x11_files)
-	$(BUILDPACKAGE)
-exp/gui/x11/check: $(CHECK_DEPS)
-	@$(MKDIR_P) exp/gui/x11
-	@$(CHECK)
-.PHONY: exp/gui/x11/check
-
 @go_include@ exp/inotify.lo.dep
 exp/inotify.lo.dep: $(go_exp_inotify_files)
 	$(BUILDDEPS)
@@ -3686,8 +3653,6 @@
 
 exp/ebnf.gox: exp/ebnf.lo
 	$(BUILDGOX)
-exp/gui.gox: exp/gui.lo
-	$(BUILDGOX)
 exp/inotify.gox: exp/inotify.lo
 	$(BUILDGOX)
 exp/norm.gox: exp/norm.lo
@@ -3703,9 +3668,6 @@
 exp/types.gox: exp/types.lo
 	$(BUILDGOX)
 
-exp/gui/x11.gox: exp/gui/x11.lo
-	$(BUILDGOX)
-
 exp/sql/driver.gox: exp/sql/driver.lo
 	$(BUILDGOX)
 
@@ -3950,6 +3912,7 @@
 	html/template/check \
 	go/ast/check \
 	$(go_build_check_omitted_since_it_calls_6g) \
+	go/doc/check \
 	go/parser/check \
 	go/printer/check \
 	go/scanner/check \
diff -r 53056adb4c1e libgo/runtime/go-nanotime.c
--- a/libgo/runtime/go-nanotime.c	Tue Dec 13 10:51:45 2011 -0800
+++ b/libgo/runtime/go-nanotime.c	Tue Dec 13 11:10:08 2011 -0800
@@ -6,17 +6,16 @@
 
 #include 
 
-#include "go-assert.h"
 #include "runtime.h"
 
+int64 runtime_nanotime (void)
+  __attribute__ ((no_split_stack));
+
 int64
 runtime_nanotime (void)
 {
-  int i;
   struct timeval tv;
 
-  i = gettimeofday (&tv, NULL);
-  __go_assert (i == 0);
-
+  gettimeofday (&tv, NULL);
   return (int64) tv.tv_sec * 10 + (int64) tv.tv_usec * 1000;
 }
diff -r 53056adb4c1e libgo/runtime/go

Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

2011-12-13 Thread Sriraman Tallam
On Tue, Dec 13, 2011 at 10:56 AM, Richard Henderson  wrote:
> On 12/13/2011 10:26 AM, Sriraman Tallam wrote:
>> Cool, this works for stores!  It generates the movlps + movhps. I have
>> to also make a similar change to another call to gen_sse2_movdqu for
>> loads. Would it be ok to not do this when tune=core2?
>
> We can work something out.
>
> I'd like you to do the benchmarking to know if unaligned loads are really as 
> expensive as unaligned stores, and whether there are reformatting penalties 
> that make the movlps+movhps option for either load or store less attractive.

Ok thanks, I will get back with the data.



>
>
> r~


Re: fix TM-clone calls in C++ dumps

2011-12-13 Thread Richard Henderson
On 12/10/2011 11:32 AM, Aldy Hernandez wrote:
>   * trans-mem.c (ipa_tm_create_version_alias): Set DECL_CONTEXT and
>   DECL_LANG_SPECIFIC.

Ok.


r~


Re: [PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11

2011-12-13 Thread Jason Merrill

On 12/13/2011 02:03 PM, Jason Merrill wrote:

On 12/13/2011 12:43 PM, Dodji Seketeli wrote:

+ if (TREE_CODE (expr) == PTRMEM_CST)
+ /* We don't need to test if a PTRMEM_CST is a constant value
+ because maybe_constant_value might crash and because
+ [temp.arg.nontype]/1 says it's not allowed as a template
+ argument anyway. */;


A PTRMEM_CST is already constant, and a valid template argument for a
parameter of pointer to member type, we just want to leave it in that
form rather than lower it to a CONSTRUCTOR.


But yes, the patch is OK after fixing the comment.

Jason



Re: [PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11

2011-12-13 Thread Jason Merrill

On 12/13/2011 12:43 PM, Dodji Seketeli wrote:

+  if (TREE_CODE (expr) == PTRMEM_CST)
+   /* We don't need to test if a PTRMEM_CST is a constant value
+  because maybe_constant_value might crash and because
+  [temp.arg.nontype]/1 says it's not allowed as a template
+  argument anyway.  */;


A PTRMEM_CST is already constant, and a valid template argument for a 
parameter of pointer to member type, we just want to leave it in that 
form rather than lower it to a CONSTRUCTOR.


Jason


Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

2011-12-13 Thread Richard Henderson
On 12/13/2011 10:26 AM, Sriraman Tallam wrote:
> Cool, this works for stores!  It generates the movlps + movhps. I have
> to also make a similar change to another call to gen_sse2_movdqu for
> loads. Would it be ok to not do this when tune=core2?

We can work something out.  

I'd like you to do the benchmarking to know if unaligned loads are really as 
expensive as unaligned stores, and whether there are reformatting penalties 
that make the movlps+movhps option for either load or store less attractive.


r~


Go patch committed: Fix last conversion patch

2011-12-13 Thread Ian Lance Taylor
I came across a test case which showed that the conversion patch I just
committed did not always use the correct types--it got a gimplification
failure.  The problem was that the builtin call was being recorded as
returning the type for which it was first called, and that might be
different from some later type.  Since all types it will be used with
will always look exactly the same, I just avoided the error by avoiding
the caching.  This code will be changed in the future to use the new
Runtime::make_call interface anyhow.  Bootstrapped and ran Go testsuite
on x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

diff -r 61ab46b28b40 go/expressions.cc
--- a/go/expressions.cc	Tue Dec 13 10:08:46 2011 -0800
+++ b/go/expressions.cc	Tue Dec 13 10:38:21 2011 -0800
@@ -3669,7 +3669,7 @@
   if (e->integer_type()->is_unsigned()
 	  && e->integer_type()->bits() == 8)
 	{
-	  static tree string_to_byte_array_fndecl;
+	  tree string_to_byte_array_fndecl = NULL_TREE;
 	  ret = Gogo::call_builtin(&string_to_byte_array_fndecl,
    this->location(),
    "__go_string_to_byte_array",
@@ -3681,7 +3681,7 @@
   else
 	{
 	  go_assert(e == Type::lookup_integer_type("int"));
-	  static tree string_to_int_array_fndecl;
+	  tree string_to_int_array_fndecl = NULL_TREE;
 	  ret = Gogo::call_builtin(&string_to_int_array_fndecl,
    this->location(),
    "__go_string_to_int_array",


Re: Go patch committed: Multiplex goroutines onto OS threads

2011-12-13 Thread Rainer Orth
Ian Lance Taylor  writes:

> Rainer Orth  writes:
>
>> Ian Lance Taylor  writes:
>>
>>> Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.  Tested
>>> both with and without -fsplit-stack support.  Committed to mainline.
>>
>> Once Go bootstrap works again on Solaris, I notice that there are many
>> 64-bit testsuite failures, which have been introduced between 2025
>> (r181724) and 2030 (r181837), so this patch is the obvious culprit.
>
> I just committed another libgo update, which will no doubt lead to
> further problems.

Only a few minor compilation problems, fixed with the following patch.

Rainer


diff --git a/libgo/go/net/fd_select.go b/libgo/go/net/fd_select.go
--- a/libgo/go/net/fd_select.go
+++ b/libgo/go/net/fd_select.go
@@ -85,7 +85,8 @@ func (p *pollster) WaitFD(s *pollServer,
 			timeout = &tv
 		}
 
-		var n, e int
+		var n int
+		var e error
 		var tmpReadFds, tmpWriteFds syscall.FdSet
 		for {
 			// Temporary syscall.FdSet's into which the values are copied
@@ -101,7 +102,7 @@ func (p *pollster) WaitFD(s *pollServer,
 break
 			}
 		}
-		if e != 0 {
+		if e != nil {
 			return -1, 0, os.NewSyscallError("select", e)
 		}
 		if n == 0 {
diff --git a/libgo/go/os/sys_uname.go b/libgo/go/os/sys_uname.go
--- a/libgo/go/os/sys_uname.go
+++ b/libgo/go/os/sys_uname.go
@@ -10,7 +10,7 @@ import "syscall"
 
 func Hostname() (name string, err error) {
 	var u syscall.Utsname
-	if errno := syscall.Uname(&u); errno != 0 {
+	if errno := syscall.Uname(&u); errno != nil {
 		return "", NewSyscallError("uname", errno)
 	}
 	b := make([]byte, len(u.Nodename))


-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [patch] add __is_final trait to fix libstdc++/51365

2011-12-13 Thread Paolo Carlini
Hi,

> So the proposed patch just adds the __is_final intrinsic for use
> internally by the library, to allow library changes so the test cases
> in the bug report will pass.  If preferred I won't even add __is_final
> to the extend.texi docs, to leave it as an undocumented extension that
> we could more easily remove (or deprecate) later if necessary.

I think this makes absolutely sense. And documenting it seems fine: after all, 
for better and worse, 'final' itself is in C++11.

For the internal library uses, on the other hand, I don't think we should 
overly document a trait like __is_empty_non_final (essentially an __and_) 
which, afaics, would usefully replace std::is_empty in various places  for 4.7.

Paolo




Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

2011-12-13 Thread Sriraman Tallam
On Mon, Dec 12, 2011 at 9:54 PM, Ira Rosen  wrote:
>
>
> gcc-patches-ow...@gcc.gnu.org wrote on 13/12/2011 04:05:57 AM:
>
>> On core2, unaligned vector load/store using movdqu is a very slow
> operation.
>> Experiments show it is six times slower than movdqa (aligned) and this is
>> irrespective of whether the resulting data happens to be aligned or not.
>> For Corei7, there is no performance difference between the two and on
> AMDs,
>> movdqu is only about 10% slower.
>>
>> This patch does not vectorize loops that need to generate the slow
> unaligned
>> memory load/stores on core2.
>>
>>
>>    Do not vectorize loops on Core2 that need to use unaligned
>>    vector load/stores.
>>    * tree-vect-stmts.c (is_slow_vect_unaligned_load_store): New function.
>>    (vect_analyze_stmt): Check if the vectorizable load/store is slow.
>>    * target.def (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New target hook.
>>    * doc/m.texi.in: Document new target hook:
>>    TARGET_SLOW_UNALIGNED_VECTOR_MEMOP
>>    * doc/m.texi: Regenerate.
>>    * config/i386/i386.c (ix86_slow_unaligned_vector_memop): New function.
>>    (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New macro.
>>
>
>
>> @@ -5065,27 +5112,43 @@ vect_analyze_stmt (gimple stmt, bool
> *need_to_vect
>>     if (!bb_vinfo
>>         && (STMT_VINFO_RELEVANT_P (stmt_info)
>>             || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
>> +    {
>>        ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
>>              || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
>>              || vectorizable_conversion (stmt, NULL, NULL, NULL)
>>              || vectorizable_shift (stmt, NULL, NULL, NULL)
>>              || vectorizable_operation (stmt, NULL, NULL, NULL)
>>              || vectorizable_assignment (stmt, NULL, NULL, NULL)
>> -            || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
>>              || vectorizable_call (stmt, NULL, NULL)
>> -            || vectorizable_store (stmt, NULL, NULL, NULL)
>> -            || vectorizable_reduction (stmt, NULL, NULL, NULL)
>> +       || vectorizable_reduction (stmt, NULL, NULL, NULL)
>>              || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
>> +
>> +      if (!ok)
>> +   {
>> +          ok = (vectorizable_load (stmt, NULL, NULL, NULL, NULL)
>> +           || vectorizable_store (stmt, NULL, NULL, NULL));
>> +
>> +     if (ok && is_slow_vect_unaligned_load_store (stmt))
>> +       ok = false;
>
> Why not call is_slow_vect_unaligned_load_store from
> vectorizable_load/store?

Yes, I should have done that. Somehow, I missed that!

>
> Ira
>
>
>> +   }
>> +    }
>>      else
>>        {
>>          if (bb_vinfo)
>> -          ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
>> -                || vectorizable_type_demotion (stmt, NULL, NULL, node)
>> -               || vectorizable_shift (stmt, NULL, NULL, node)
>> -                || vectorizable_operation (stmt, NULL, NULL, node)
>> -                || vectorizable_assignment (stmt, NULL, NULL, node)
>> -                || vectorizable_load (stmt, NULL, NULL, node, NULL)
>> -                || vectorizable_store (stmt, NULL, NULL, node));
>> +     {
>> +       ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
>> +        || vectorizable_type_demotion (stmt, NULL, NULL, node)
>> +        || vectorizable_shift (stmt, NULL, NULL, node)
>> +                  || vectorizable_operation (stmt, NULL, NULL, node)
>> +                  || vectorizable_assignment (stmt, NULL, NULL, node));
>> +            if (!ok)
>> +         {
>> +                ok = (vectorizable_load (stmt, NULL, NULL, node, NULL)
>> +            || vectorizable_store (stmt, NULL, NULL, node));
>> +           if (ok && is_slow_vect_unaligned_load_store (stmt))
>> +        ok = false;
>> +         }
>> +     }
>>        }
>


Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

2011-12-13 Thread Sriraman Tallam
On Mon, Dec 12, 2011 at 11:49 PM, Jakub Jelinek  wrote:
> On Mon, Dec 12, 2011 at 06:05:57PM -0800, Sriraman Tallam wrote:
>>       Do not vectorize loops on Core2 that need to use unaligned
>>       vector load/stores.
>>       * tree-vect-stmts.c (is_slow_vect_unaligned_load_store): New function.
>>       (vect_analyze_stmt): Check if the vectorizable load/store is slow.
>>       * target.def (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New target hook.
>>       * doc/m.texi.in: Document new target hook:
>>       TARGET_SLOW_UNALIGNED_VECTOR_MEMOP
>>       * doc/m.texi: Regenerate.
>>       * config/i386/i386.c (ix86_slow_unaligned_vector_memop): New function.
>>       (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New macro.
>
> IMHO it would be better if it didn't prevent vectorization of the loops
> altogether, but lead to using aligned stores with an alignment check
> before the vectorized loop if possible.
>
> Also, are unaligned loads equally expensive to unaligned stores?

Unaligned stores are always expensive, irrespective of whether the
data turns out to be aligned or not at run-time. Unaligned loads are
only as expensive when the data item is unaligned. For unaligned loads
of aligned data, the movdqu is still slow but by ~2x rather than 6x.

>
> See http://gcc.gnu.org/PR49442 for further info, this really should be done
> using some cost model rather than a boolean hook.
>
>        Jakub


Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

2011-12-13 Thread Sriraman Tallam
On Tue, Dec 13, 2011 at 9:58 AM, Richard Henderson  wrote:
> On 12/12/2011 06:05 PM, Sriraman Tallam wrote:
>> On core2, unaligned vector load/store using movdqu is a very slow operation.
>> Experiments show it is six times slower than movdqa (aligned) and this is
>> irrespective of whether the resulting data happens to be aligned or not.
>> For Corei7, there is no performance difference between the two and on AMDs,
>> movdqu is only about 10% slower.
>>
>> This patch does not vectorize loops that need to generate the slow unaligned
>> memory load/stores on core2.
>
> What happens if you temporarily disable
>
>      /* ??? Similar to above, only less clear because of quote
>         typeless stores unquote.  */
>      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
>          && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
>        {
>          op0 = gen_lowpart (V16QImode, op0);
>          op1 = gen_lowpart (V16QImode, op1);
>          emit_insn (gen_sse2_movdqu (op0, op1));
>          return;
>        }
>
> so that the unaligned store happens via movlps + movhps?

Cool, this works for stores!  It generates the movlps + movhps. I have
to also make a similar change to another call to gen_sse2_movdqu for
loads. Would it be ok to not do this when tune=core2?

Thanks,
-Sri.


>
>
> r~


Re: [libjava] Support 64-bit multilib for i?86-linux

2011-12-13 Thread Andrew Haley
On 12/13/2011 05:51 PM, Rainer Orth wrote:
> 2011-12-13  Rainer Orth  
> 
>   * configure.ac (i?86-*-linux*): Set SIGNAL_HANDLER_AUX.
>   * configure: Regenerate.
>   * include/i386-signal.h: Wrap in __i386__, include
>   java-signal-aux.h otherwise.

OK, thanks.

Andrew.



Go patch committed: Permit conversions between string and named slice

2011-12-13 Thread Ian Lance Taylor
The Go language permits converting between string and []byte or []rune.
It was recently changed to permit converting between string a named type
whose base type is []byte or []rune.  This patch implements that in
gccgo.  Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.
Committed to mainline.

Ian

Index: gcc/go/gofrontend/types.cc
===
--- gcc/go/gofrontend/types.cc	(revision 182143)
+++ gcc/go/gofrontend/types.cc	(working copy)
@@ -662,7 +662,7 @@ Type::are_convertible(const Type* lhs, c
 {
   if (rhs->integer_type() != NULL)
 	return true;
-  if (rhs->is_slice_type() && rhs->named_type() == NULL)
+  if (rhs->is_slice_type())
 	{
 	  const Type* e = rhs->array_type()->element_type()->forwarded();
 	  if (e->integer_type() != NULL
@@ -673,9 +673,7 @@ Type::are_convertible(const Type* lhs, c
 }
 
   // A string may be converted to []byte or []int.
-  if (rhs->is_string_type()
-  && lhs->is_slice_type()
-  && lhs->named_type() == NULL)
+  if (rhs->is_string_type() && lhs->is_slice_type())
 {
   const Type* e = lhs->array_type()->element_type()->forwarded();
   if (e->integer_type() != NULL
Index: gcc/go/gofrontend/expressions.cc
===
--- gcc/go/gofrontend/expressions.cc	(revision 182154)
+++ gcc/go/gofrontend/expressions.cc	(working copy)
@@ -3322,7 +3322,7 @@ Type_conversion_expression::do_lower(Gog
   mpfr_clear(imag);
 }
 
-  if (type->is_slice_type() && type->named_type() == NULL)
+  if (type->is_slice_type())
 {
   Type* element_type = type->array_type()->element_type()->forwarded();
   bool is_byte = element_type == Type::lookup_integer_type("uint8");
@@ -3621,20 +3621,11 @@ Type_conversion_expression::do_get_tree(
 			   integer_type_node,
 			   fold_convert(integer_type_node, expr_tree));
 }
-  else if (type->is_string_type()
-	   && (expr_type->array_type() != NULL
-	   || (expr_type->points_to() != NULL
-		   && expr_type->points_to()->array_type() != NULL)))
+  else if (type->is_string_type() && expr_type->is_slice_type())
 {
-  Type* t = expr_type;
-  if (t->points_to() != NULL)
-	{
-	  t = t->points_to();
-	  expr_tree = build_fold_indirect_ref(expr_tree);
-	}
   if (!DECL_P(expr_tree))
 	expr_tree = save_expr(expr_tree);
-  Array_type* a = t->array_type();
+  Array_type* a = expr_type->array_type();
   Type* e = a->element_type()->forwarded();
   go_assert(e->integer_type() != NULL);
   tree valptr = fold_convert(const_ptr_type_node,
Index: gcc/testsuite/go.test/test/convlit.go
===
--- gcc/testsuite/go.test/test/convlit.go	(revision 181963)
+++ gcc/testsuite/go.test/test/convlit.go	(working copy)
@@ -36,7 +36,7 @@ var good3 int = 1e9
 var good4 float64 = 1e20
 
 // explicit conversion of string is okay
-var _ = []int("abc")
+var _ = []rune("abc")
 var _ = []byte("abc")
 
 // implicit is not
@@ -47,20 +47,20 @@ var _ []byte = "abc" // ERROR "cannot us
 type Tstring string
 
 var ss Tstring = "abc"
-var _ = []int(ss)
+var _ = []rune(ss)
 var _ = []byte(ss)
 
 // implicit is still not
-var _ []int = ss  // ERROR "cannot use|incompatible|invalid"
+var _ []rune = ss // ERROR "cannot use|incompatible|invalid"
 var _ []byte = ss // ERROR "cannot use|incompatible|invalid"
 
-// named slice is not
-type Tint []int
+// named slice is now ok
+type Trune []rune
 type Tbyte []byte
 
-var _ = Tint("abc")  // ERROR "convert|incompatible|invalid"
-var _ = Tbyte("abc") // ERROR "convert|incompatible|invalid"
+var _ = Trune("abc") // ok
+var _ = Tbyte("abc") // ok
 
 // implicit is still not
-var _ Tint = "abc"  // ERROR "cannot use|incompatible|invalid"
+var _ Trune = "abc" // ERROR "cannot use|incompatible|invalid"
 var _ Tbyte = "abc" // ERROR "cannot use|incompatible|invalid"
Index: gcc/testsuite/go.test/test/named1.go
===
--- gcc/testsuite/go.test/test/named1.go	(revision 181963)
+++ gcc/testsuite/go.test/test/named1.go	(working copy)
@@ -41,7 +41,6 @@ func main() {
 	asBool(i < j)  // ERROR "cannot use.*type bool.*as type Bool"
 
 	_, b = m[2] // ERROR "cannot .* bool.*type Bool"
-	m[2] = 1, b // ERROR "cannot use.*type Bool.*as type bool"
 
 	var inter interface{}
 	_, b = inter.(Map) // ERROR "cannot .* bool.*type Bool"
@@ -55,8 +54,8 @@ func main() {
 
 	_, bb := <-c
 	asBool(bb) // ERROR "cannot use.*type bool.*as type Bool"
-	_, b = <-c // ERROR "cannot .* bool.*type Bool"
+	_, b = <-c // ERROR "cannot .* bool.*type Bool"
 	_ = b
 
-	asString(String(slice)) // ERROR "cannot .*type Slice.*type String"
+	asString(String(slice)) // ok
 }


Re: [patch] add __is_final trait to fix libstdc++/51365

2011-12-13 Thread Jonathan Wakely
On 13 December 2011 17:01, Paolo Carlini  wrote:
> Hi,
>>
>> This patch seems pretty simple and safe.  Are you (Gaby and Paolo) arguing 
>> that even so, it shouldn't go in?
>
> As far as I'm concerned, definetely not! I also think that it would be great 
> if, for 4.7, Jon could handle the library issues with EBO by exploiting it.

Yes, if this goes in for 4.7 I will definitely follow it with the
library changes to make use of it.

> I only meant to say that something seems to me more fundamentally wrong at 
> the design level about 'final' vs EBO, my hope is that for 4.8 we'll have a 
> longer term stable solution based on a ISO Committee position.

In one of the earlier bug report comments I proposed a
__gnu_cxx::is_final library trait to expose the __is_final(T)
intrinsic for users, but decided against it precisely because I don't
know how the committee will want to deal with the issue longer term.

So the proposed patch just adds the __is_final intrinsic for use
internally by the library, to allow library changes so the test cases
in the bug report will pass.  If preferred I won't even add __is_final
to the extend.texi docs, to leave it as an undocumented extension that
we could more easily remove (or deprecate) later if necessary.


Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

2011-12-13 Thread Richard Henderson
On 12/12/2011 06:05 PM, Sriraman Tallam wrote:
> On core2, unaligned vector load/store using movdqu is a very slow operation.
> Experiments show it is six times slower than movdqa (aligned) and this is
> irrespective of whether the resulting data happens to be aligned or not. 
> For Corei7, there is no performance difference between the two and on AMDs,
> movdqu is only about 10% slower.  
> 
> This patch does not vectorize loops that need to generate the slow unaligned
> memory load/stores on core2.

What happens if you temporarily disable

  /* ??? Similar to above, only less clear because of quote
 typeless stores unquote.  */
  if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
{
  op0 = gen_lowpart (V16QImode, op0);
  op1 = gen_lowpart (V16QImode, op1);
  emit_insn (gen_sse2_movdqu (op0, op1));
  return;
}

so that the unaligned store happens via movlps + movhps?


r~


Re: PR/51443: fix inline assembly in transaction_callable functions

2011-12-13 Thread Aldy Hernandez

On 12/13/11 11:49, Eric Botcazou wrote:

This fixes the PR.


You need to put the Component before the / in the ChangeLog, otherwise the
commit won't be cross-referenced in Bugzilla.



Funny, I just noticed.  I will do so from now on.

Thanks.


[libjava] Support 64-bit multilib for i?86-linux

2011-12-13 Thread Rainer Orth
This is the last patch necessary to have a full-blown
i686-unknown-linux-gnu --enable-targets=all configuration to work.  It
requires

http://gcc.gnu.org/ml/gcc-patches/2011-12/msg01011.html

and fixes a compilation failure compiling the 64-bit libgcj multilib:

$ /var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/./gcc/xgcc 
-shared-libgcc -B/var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/./gcc 
-nostdinc++ 
-L/var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/i686-unknown-linux-gnu/64/libstdc++-v3/src
 
-L/var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/i686-unknown-linux-gnu/64/libstdc++-v3/src/.libs
 -B/vol/gcc/i686-unknown-linux-gnu/bin/ -B/vol/gcc/i686-unknown-linux-gnu/lib/ 
-isystem /vol/gcc/i686-unknown-linux-gnu/include -isystem 
/vol/gcc/i686-unknown-linux-gnu/sys-include -m64 -DHAVE_CONFIG_H -I. 
-I/vol/gcc/src/hg/trunk/local/libjava -I./include -I./gcj 
-I/vol/gcc/src/hg/trunk/local/libjava -Iinclude 
-I/vol/gcc/src/hg/trunk/local/libjava/include 
-I/vol/gcc/src/hg/trunk/local/libjava/classpath/include -Iclasspath/include 
-I/vol/gcc/src/hg/trunk/local/libjava/classpath/native/fdlibm 
-I/vol/gcc/src/hg/trunk/local/libjava/../boehm-gc/include -I../boehm-gc/include 
-I/vol/gcc/src/hg/trunk/local/libjava/libltdl 
-I/vol/gcc/src/hg/trunk/local/libjava/libltdl 
-I/vol/gcc/src/hg/trunk/local/libjava/.././libjava/../libgcc 
-I/vol/gcc/src/hg/trunk/local/libjava/../zlib 
-I/vol/gcc/src/hg/trunk/local/libjava/../libffi/include -I../libffi/include 
-fno-rtti -fnon-call-exceptions -fdollars-in-identifiers -Wswitch-enum 
-D_FILE_OFFSET_BITS=64 -ffloat-store -fomit-frame-pointer -Usun -Wextra -Wall 
-D_GNU_SOURCE -DPREFIX=\"/vol/gcc\" -DTOOLEXECLIBDIR=\"/vol/gcc/lib/../lib64\" 
-DJAVA_HOME=\"/vol/gcc\" 
-DBOOT_CLASS_PATH=\"/vol/gcc/share/java/libgcj-4.7.0.jar\" 
-DJAVA_EXT_DIRS=\"/vol/gcc/share/java/ext\" 
-DGCJ_ENDORSED_DIRS=\"/vol/gcc/share/java/gcj-endorsed\" 
-DGCJ_VERSIONED_LIBDIR=\"/vol/gcc/lib/../lib64/gcj-4.7.0-13\" 
-DPATH_SEPARATOR=\":\" -DECJ_JAR_FILE=\"\" 
-DLIBGCJ_DEFAULT_DATABASE=\"/vol/gcc/lib/../lib64/gcj-4.7.0-13/classmap.db\" 
-DLIBGCJ_DEFAULT_DATABASE_PATH_TAIL=\"gcj-4.7.0-13/classmap.db\" 
-fno-omit-frame-pointer -g -O2 -D_GNU_SOURCE -m64 -MT prims.lo -MD -MP -MF 
.deps/prims.Tpo -c /vol/gcc/src/hg/trunk/local/libjava/prims.cc  -fPIC -DPIC -o 
.libs/prims.o
/vol/gcc/src/hg/trunk/local/libjava/prims.cc: In function 'void 
_Jv_catch_fpe(int, siginfo_t*, void*)':
/vol/gcc/src/hg/trunk/local/libjava/prims.cc:192:3: error: 'REG_EIP' was not 
declared in this scope
/vol/gcc/src/hg/trunk/local/libjava/prims.cc:192:3: error: 'REG_EAX' was not 
declared in this scope
/vol/gcc/src/hg/trunk/local/libjava/prims.cc:192:3: error: 'REG_EDX' was not 
declared in this scope

It applies the same technique already used for the 32-bit multilib in
the x86_64-*-linux* configuration.

It allowed the i686-unknown-linux-gnu bootstrap to finish successfully
and bootstrapped without regressions on x86_64-unknown-linux-gnu.

Ok for mainline?

Rainer


2011-12-13  Rainer Orth  

* configure.ac (i?86-*-linux*): Set SIGNAL_HANDLER_AUX.
* configure: Regenerate.
* include/i386-signal.h: Wrap in __i386__, include
java-signal-aux.h otherwise.

diff --git a/libjava/configure.ac b/libjava/configure.ac
--- a/libjava/configure.ac
+++ b/libjava/configure.ac
@@ -1737,6 +1737,7 @@ case "${host}" in
 ;;
  i?86-*-linux*)
 SIGNAL_HANDLER=include/i386-signal.h
+SIGNAL_HANDLER_AUX=include/x86_64-signal.h
 ;;
 # ia64-*)
 #SYSDEP_SOURCES=sysdep/ia64.c
diff --git a/libjava/include/i386-signal.h b/libjava/include/i386-signal.h
--- a/libjava/include/i386-signal.h
+++ b/libjava/include/i386-signal.h
@@ -1,7 +1,8 @@
 // i386-signal.h - Catch runtime signals and turn them into exceptions
 // on an i386 based Linux system.
 
-/* Copyright (C) 1998, 1999, 2001, 2002, 2006, 2007  Free Software Foundation
+/* Copyright (C) 1998, 1999, 2001, 2002, 2006, 2007, 2011
+   Free Software Foundation
 
This file is part of libgcj.
 
@@ -10,6 +11,8 @@ Libgcj License.  Please consult the file
 details.  */
 
 
+#ifdef __i386__
+
 #ifndef JAVA_SIGNAL_H
 #define JAVA_SIGNAL_H 1
 
@@ -165,3 +168,11 @@ while (0)
 
 #endif /* JAVA_SIGNAL_H */
   
+#else /* __i386__ */
+
+/* This is for the 64-bit subsystem on i386.  */
+
+#define sigcontext_struct sigcontext
+#include 
+
+#endif /* __i386__ */

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: PR/51443: fix inline assembly in transaction_callable functions

2011-12-13 Thread Eric Botcazou
> This fixes the PR.

You need to put the Component before the / in the ChangeLog, otherwise the 
commit won't be cross-referenced in Bugzilla.

-- 
Eric Botcazou


Re: [PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11

2011-12-13 Thread Dodji Seketeli
Jason Merrill  writes:

> Let's check for PTRMEM_CST separately, before any of the other tests.

Like this?  I still need to run it through bootstrap ...

gcc/cp/

PR c++/51476
* pt.c (convert_nontype_argument): Don't call maybe_constant_value
for PTRMEM_CST nodes.

gcc/testsuite/

PR c++/51476
* cpp0x/ptrmem-cst-arg1.C: New test.
---
 gcc/cp/pt.c  |   10 +++---
 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C |9 +
 2 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index bb5aa0c..d127a7b 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -5720,11 +5720,15 @@ convert_nontype_argument (tree type, tree expr, 
tsubst_flags_t complain)
  to a null value, but otherwise still need to be of a specific form.  */
   if (cxx_dialect >= cxx0x)
 {
-  if (INTEGRAL_OR_ENUMERATION_TYPE_P (type))
+  if (TREE_CODE (expr) == PTRMEM_CST)
+   /* We don't need to test if a PTRMEM_CST is a constant value
+  because maybe_constant_value might crash and because
+  [temp.arg.nontype]/1 says it's not allowed as a template
+  argument anyway.  */;
+  else if (INTEGRAL_OR_ENUMERATION_TYPE_P (type))
expr = maybe_constant_value (expr);
   else if (TYPE_PTR_P (type)
-  || (TYPE_PTR_TO_MEMBER_P (type)
-  && TREE_CODE (expr) != PTRMEM_CST))
+  || TYPE_PTR_TO_MEMBER_P (type))
{
  tree folded = maybe_constant_value (expr);
  if (TYPE_PTR_P (type) ? integer_zerop (folded)
diff --git a/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C 
b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C
new file mode 100644
index 000..b6c81d5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C
@@ -0,0 +1,9 @@
+// Origin PR c++/51476
+// { dg-options "-std=c++11" }
+
+template struct A {}; 
  
+struct B
+{
+int i;
+A<&B::i> a; // { dg-error "could not convert template argument" }
+};
-- 
1.7.6.4


-- 
Dodji


[PATCH] regmove: Fix segfault when accessing call_used_regs

2011-12-13 Thread Andreas Krebbel
Hi,

a very high pseudo register number is needed here to actually cause a
segfault.

I observed it on s390 with a high value in max-completely-peeled-insns
when compiling the href benchmark on s390.


Ok for mainline, 4.6, 4.5, and 4.4?

Bye,

-Andreas-


2011-12-13  Andreas Krebbel  

* regmove.c (fixup_match_2): Only access call_used_regs with hard
regs.

Index: gcc/regmove.c
===
*** gcc/regmove.c.orig
--- gcc/regmove.c
*** fixup_match_2 (rtx insn, rtx dst, rtx sr
*** 859,865 
  if (REG_N_CALLS_CROSSED (REGNO (src)) == 0)
break;
  
! if (call_used_regs [REGNO (dst)]
  || find_reg_fusage (p, CLOBBER, dst))
break;
}
--- 859,865 
  if (REG_N_CALLS_CROSSED (REGNO (src)) == 0)
break;
  
! if ((REGNO (dst) < FIRST_PSEUDO_REGISTER && call_used_regs [REGNO 
(dst)])
  || find_reg_fusage (p, CLOBBER, dst))
break;
}



[libffi] Build 64-bit multilib for i?86-linux

2011-12-13 Thread Rainer Orth
I've recently tried a i686-unknown-linux-gnu --enable-targets=all
bootstrap, which failed to link the 64-bit jv-convert etc.:

./.libs/libgcj.so: undefined reference to `ffi_raw_call'
./.libs/libgcj.so: undefined reference to `ffi_prep_raw_closure_loc'
./.libs/libgcj.so: undefined reference to `ffi_prep_cif_machdep'
./.libs/libgcj.so: undefined reference to `ffi_prep_closure_loc'
./.libs/libgcj.so: undefined reference to `ffi_call'
collect2: error: ld returned 1 exit status
make[5]: *** [jv-convert] Error 1

This happens because libffi doesn't build the x86_64 objects in this
configuration.  The following patch fixes this and allowed (together
with a libjava patch to be submitted shortly) the bootstrap to finish.

I've also bootstrapped x86_64-unknown-linux-gnu without regressions.

The same issue still exists in upstream libffi, and should be harmless
without --enable-targets=all since the added files are empty then.

Ok for mainline gcc?  

Rainer


2011-12-13  Rainer Orth  

* configure.ac (i?86-*-*): Set TARGET to X86_64.
* configure: Regenerate.

diff --git a/libffi/configure.ac b/libffi/configure.ac
--- a/libffi/configure.ac
+++ b/libffi/configure.ac
@@ -99,7 +99,7 @@ case "$host" in
 	TARGET=X86_64; TARGETDIR=x86
 	;;
   i?86-*-*)
-	TARGET=X86; TARGETDIR=x86
+	TARGET=X86_64; TARGETDIR=x86
 	;;
 
   ia64*-*-*)

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


[Patch, Fortran] Some fixes for polymorphic coarrays

2011-12-13 Thread Tobias Burnus

Two small fixes:

a) There was an ICE when simplifying "THIS_IMAGE(caf)" (for 
-fcoarray=single); solution: Simply use internally lcobound(), which is 
identically (for a single image).


b) There was an segfault of the compiled program when running 
"this_image(caf)" where "caf" is a corank-1 coarray. Calculating the 
extend of an assumed size array should be avoided ...


The patch has been build and regtested on x86-64-linux.
OK for the trunk?

Tobias

PS: There are still some other issues with polymorphic coarrays, see 
"Next steps" in 
http://users.physik.fu-berlin.de/~tburnus/coarray/README.txt for a list. 
For instance, there is an ICE if one tries to explicitly deallocate 
scalar polymorphic coarrays.
2011-12-13  Tobias Burnus  

	* simplify.c (gfc_simplify_image_index): Directly call
	simplify_cobound.
	* trans-intrinsic.c (trans_this_image): Fix handling of
	corank = 1 arrays.

2011-12-13  Tobias Burnus  

	* gfortran.dg/coarray/poly_run_3.f90: New.

diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c
index e82753a..282d88d 100644
--- a/gcc/fortran/simplify.c
+++ b/gcc/fortran/simplify.c
@@ -6227,10 +6227,6 @@ gfc_simplify_image_index (gfc_expr *coarray, gfc_expr *sub)
 gfc_expr *
 gfc_simplify_this_image (gfc_expr *coarray, gfc_expr *dim)
 {
-  gfc_ref *ref;
-  gfc_array_spec *as;
-  int d;
-
   if (gfc_option.coarray != GFC_FCOARRAY_SINGLE)
 return NULL;
 
@@ -6244,74 +6240,8 @@ gfc_simplify_this_image (gfc_expr *coarray, gfc_expr *dim)
   return result;
 }
 
-  gcc_assert (coarray->expr_type == EXPR_VARIABLE);
-
-  /* Follow any component references.  */
-  as = coarray->symtree->n.sym->as;
-  for (ref = coarray->ref; ref; ref = ref->next)
-if (ref->type == REF_COMPONENT)
-  as = ref->u.ar.as;
-
-  if (as->type == AS_DEFERRED)
-return NULL;
-
-  if (dim == NULL)
-{
-  /* Multi-dimensional bounds.  */
-  gfc_expr *bounds[GFC_MAX_DIMENSIONS];
-  gfc_expr *e;
-
-  /* Simplify the bounds for each dimension.  */
-  for (d = 0; d < as->corank; d++)
-	{
-	  bounds[d] = simplify_bound_dim (coarray, NULL, d + as->rank + 1, 0,
-	  as, NULL, true);
-	  if (bounds[d] == NULL || bounds[d] == &gfc_bad_expr)
-	{
-	  int j;
-
-	  for (j = 0; j < d; j++)
-		gfc_free_expr (bounds[j]);
-
-	  return bounds[d];
-	}
-	}
-
-  /* Allocate the result expression.  */
-  e = gfc_get_expr ();
-  e->where = coarray->where;
-  e->expr_type = EXPR_ARRAY;
-  e->ts.type = BT_INTEGER;
-  e->ts.kind = gfc_default_integer_kind;
-
-  e->rank = 1;
-  e->shape = gfc_get_shape (1);
-  mpz_init_set_ui (e->shape[0], as->corank);
-
-  /* Create the constructor for this array.  */
-  for (d = 0; d < as->corank; d++)
-gfc_constructor_append_expr (&e->value.constructor,
- bounds[d], &e->where);
-
-  return e;
-}
-  else
-{
-  /* A DIM argument is specified.  */
-  if (dim->expr_type != EXPR_CONSTANT)
-	return NULL;
-
-  d = mpz_get_si (dim->value.integer);
-
-  if (d < 1 || d > as->corank)
-	{
-	  gfc_error ("DIM argument at %L is out of bounds", &dim->where);
-	  return &gfc_bad_expr;
-	}
-
-  return simplify_bound_dim (coarray, NULL, d + as->rank, 0, as, NULL,
- true);
-   }
+  /* For -fcoarray=single, this_image(A) is the same as lcobound(A).  */
+  return simplify_cobound (coarray, dim, NULL, 0);
 }
 
 
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 58112e3..5c964c1 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -1054,6 +1054,11 @@ trans_this_image (gfc_se * se, gfc_expr *expr)
  one always has a dim_arg argument.
 
  m = this_images() - 1
+ if (corank == 1)
+   {
+	 sub(1) = m + lcobound(corank)
+	 return;
+   }
  i = rank
  min_var = min (rank + corank - 2, rank + dim_arg - 1)
  for (;;)
@@ -1070,15 +1075,29 @@ trans_this_image (gfc_se * se, gfc_expr *expr)
    : m + lcobound(corank)
   */
 
+  /* this_image () - 1.  */
+  tmp = fold_convert (type, gfort_gvar_caf_this_image);
+  tmp = fold_build2_loc (input_location, MINUS_EXPR, type, tmp,
+		   build_int_cst (type, 1));
+  if (corank == 1)
+{
+  /* sub(1) = m + lcobound(corank).  */
+  lbound = gfc_conv_descriptor_lbound_get (desc,
+			build_int_cst (TREE_TYPE (gfc_array_index_type),
+   corank+rank-1));
+  lbound = fold_convert (type, lbound);
+  tmp = fold_build2_loc (input_location, PLUS_EXPR, type, tmp, lbound);
+
+  se->expr = tmp;
+  return;
+}
+
   m = gfc_create_var (type, NULL); 
   ml = gfc_create_var (type, NULL); 
   loop_var = gfc_create_var (integer_type_node, NULL); 
   min_var = gfc_create_var (integer_type_node, NULL); 
 
   /* m = this_image () - 1.  */
-  tmp = fold_convert (type, gfort_gvar_caf_this_image);
-  tmp = fold_build2_loc (input_location, MINUS_EXPR, type, tmp,
-		   build_int_cst (type, 1));
   

Re: [RFC] Port libitm to powerpc

2011-12-13 Thread Richard Henderson
On 12/12/2011 07:08 PM, David Edelsohn wrote:
> If you don't want to grab the L2 cache line size directly, could you
> default to 32 bytes on PPC32 and 128 bytes on PPC64 (__powerpc64__) ?

Done.


r~


[ada] Support 64-bit libgnat multilib on i?86-linux

2011-12-13 Thread Rainer Orth
I recently set up a bi-arch environment on i686-unknown-linux-gnu with
--enable-targets=all to check which of a couple of problems I'm seeing
on i386-pc-solaris2.1? are Solaris specific and which are generic.  When
trying to bootstrap such a compiler with all languages, Ada bootstrap
failed compiling the 64-bit libada:

$ /var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/./gcc/xgcc 
-B/var/gcc/regression/trunk/2.6.18-gcc-gas-gld-32/build/./gcc/ 
-B/vol/gcc/i686-unknown-linux-gnu/bin/ -B/vol/gcc/i686-unknown-linux-gnu/lib/ 
-isystem /vol/gcc/i686-unknown-linux-gnu/include -isystem 
/vol/gcc/i686-unknown-linux-gnu/sys-include-c -g -O2 -m64 -fpic  -W -Wall 
-gnatpg -nostdinc -m64  a-finali.adb -o a-finali.o
a-finali.ads:64:09: alignment for "Controlledb64s" must be at least 8
a-finali.ads:64:09: alignment for "Controlledr62s" must be at least 8
a-finali.ads:64:09: alignment for "Controlledt59s" must be at least 8
a-finali.ads:70:09: alignment for "Limited_Controlledb98s" must be at least 8
a-finali.ads:70:09: alignment for "Limited_Controlledr96s" must be at least 8
a-finali.ads:70:09: alignment for "Limited_Controlledt93s" must be at least 8

It turned out that this configuration incorrectly used
system-linux-x86.ads for both multilibs.  The following patch fixes this
and allowed (together with a libffi and libjava patch to be submitted
shortly) the bootstrap to complete.

Ok for mainline?

Rainer


2011-12-13  Rainer Orth  

* gcc-interface/Makefile.in (%86 linux%):
(LIBGNAT_TARGET_PAIRS_32): Split off from LIBGNAT_TARGET_PAIRS.
(LIBGNAT_TARGET_PAIRS_64): New.
(LIBGNAT_TARGET_PAIRS): Add either depending on multilib.

diff --git a/gcc/ada/gcc-interface/Makefile.in b/gcc/ada/gcc-interface/Makefile.in
--- a/gcc/ada/gcc-interface/Makefile.in
+++ b/gcc/ada/gcc-interface/Makefile.in
@@ -1102,9 +1102,21 @@ ifeq ($(strip $(filter-out %86 linux%,$(
   a-exetim.ads
-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [4.7][google] Adding a new option -fstack-protector-strong. (issue5461043)

2011-12-13 Thread 沈涵
Hi, further comments? Or ok for submit?

And as suggested by Diego, I'd like to make it upstream and google branch.

Thanks,
-Han


On Thu, Dec 8, 2011 at 4:55 PM, Han Shen(沈涵)  wrote:
> Hi, Jakub, thanks! Fixed!
>
> Hi, Andrew, it's good suggestion. Done. Also modified foo10.
>
> A small c++ test case was added also.
>
> Patches (also on http://codereview.appspot.com/5461043)
> 
> diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
> index 8684721..0a7a9f7 100644
> --- a/gcc/cfgexpand.c
> +++ b/gcc/cfgexpand.c
> @@ -1507,15 +1507,38 @@ estimated_stack_frame_size (struct cgraph_node *node)
>   return size;
>  }
>
> +/* Helper routine to check if a record or union contains an array field. */
> +
> +static int
> +record_or_union_type_has_array (const_tree tree_type)
> +{
> +  tree fields = TYPE_FIELDS (tree_type);
> +  tree f;
> +  for (f = fields; f; f = DECL_CHAIN (f))
> +    {
> +      if (TREE_CODE (f) == FIELD_DECL)
> +       {
> +         tree field_type = TREE_TYPE (f);
> +         if (RECORD_OR_UNION_TYPE_P (field_type))
> +           return record_or_union_type_has_array (field_type);
> +         if (TREE_CODE (field_type) == ARRAY_TYPE)
> +           return 1;
> +       }
> +    }
> +  return 0;
> +}
> +
>  /* Expand all variables used in the function.  */
>
>  static void
>  expand_used_vars (void)
>  {
>   tree var, outer_block = DECL_INITIAL (current_function_decl);
> +  referenced_var_iterator rvi;
>   VEC(tree,heap) *maybe_local_decls = NULL;
>   unsigned i;
>   unsigned len;
> +  int gen_stack_protect_signal = 0;
>
>   /* Compute the phase of the stack frame for this function.  */
>   {
> @@ -1548,6 +1571,28 @@ expand_used_vars (void)
>        }
>     }
>
> +  FOR_EACH_REFERENCED_VAR (cfun, var, rvi)
> +    if (!is_global_var (var))
> +      {
> +       tree var_type = TREE_TYPE (var);
> +       /* Examine local variables that have been address taken. */
> +       if (TREE_ADDRESSABLE (var))
> +         {
> +           ++gen_stack_protect_signal;
> +           break;
> +         }
> +       /* Examine local referenced variables that contain an array or are
> +          arrays. */
> +       if (TREE_CODE (var) == VAR_DECL
> +           && (TREE_CODE (var_type) == ARRAY_TYPE
> +               || (RECORD_OR_UNION_TYPE_P (var_type)
> +                   && record_or_union_type_has_array (var_type
> +         {
> +           ++gen_stack_protect_signal;
> +           break;
> +         }
> +      }
> +
>   /* At this point all variables on the local_decls with TREE_USED
>      set are not associated with any block scope.  Lay them out.  */
>
> @@ -1638,12 +1683,17 @@ expand_used_vars (void)
>        dump_stack_var_partition ();
>     }
>
> -  /* There are several conditions under which we should create a
> -     stack guard: protect-all, alloca used, protected decls present.  */
> +  /* There are several conditions under which we should create a stack guard:
> +     protect-all, alloca used, protected decls present or a positive
> +     gen_stack_protect_signal. */
>   if (flag_stack_protect == 2
> -      || (flag_stack_protect
> +      || (flag_stack_protect == 1
>          && (cfun->calls_alloca || has_protected_decls)))
>     create_stack_guard ();
> +  else if (flag_stack_protect == 3
> +          && (gen_stack_protect_signal
> +              || cfun->calls_alloca || has_protected_decls))
> +    create_stack_guard ();
>
>   /* Assign rtl to each variable based on these partitions.  */
>   if (stack_vars_num > 0)
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 55d3f2d..1ad9717 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -1848,6 +1848,10 @@ fstack-protector-all
>  Common Report RejectNegative Var(flag_stack_protect, 2)
>  Use a stack protection method for every function
>
> +fstack-protector-strong
> +Common Report RejectNegative Var(flag_stack_protect, 3)
> +Use a smart stack protection method for certain functions
> +
>  fstack-usage
>  Common RejectNegative Var(flag_stack_usage)
>  Output stack usage information on a per-function basis
> diff --git a/gcc/testsuite/g++.dg/fstack-protector-strong.C
> b/gcc/testsuite/g++.dg/fstack-protector-strong.C
> new file mode 100644
> index 000..a4f0f81
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/fstack-protector-strong.C
> @@ -0,0 +1,35 @@
> +/* Test that stack protection is done on chosen functions. */
> +
> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-O2 -fstack-protector-strong" } */
> +
> +class A
> +{
> +public:
> +  A() {}
> +  ~A() {}
> +  void method();
> +  int state;
> +};
> +
> +/* Frame address exposed to A::method via "this". */
> +int
> +foo1 ()
> +{
> +  A a;
> +  a.method ();
> +  return a.state;
> +}
> +
> +/* Possible destroying foo2's stack via &a. */
> +int
> +global_func (A& a);
> +
> +/* Frame address exposed to global_func. */
> +int foo2 ()
> +{
> +  A a;
> +  return global_func (a);
> +}
> +
> +/* { dg-final { scan-assembler-times "

Re: [RFC] Port libitm to powerpc

2011-12-13 Thread Richard Henderson
On 12/13/2011 01:51 AM, Iain Sandoe wrote:
> works for me - modulo a couple of typos - probably already fixed in...
> 
>>  git://repo.or.cz/gcc/rth.git rth/tm-next
> 
> ... but I haven't got a chance to check that out and rebuild this morning..

Thanks, no I didn't have all of these fixed.
Applied to that branch and starting another run on linux.


r~


Re: [Patch] Adjust diag-scans in vect-tests to fix fails on AVX/AVX2

2011-12-13 Thread Richard Henderson
On 12/12/2011 09:44 PM, Michael Zolotukhin wrote:
> Should we introduce checks for each possible vector datatype (e.g.
> vect_8byte_int_available, vect_16byte_int_available,
> vect_32byte_int_available, vect_16byte_float_available,
> vect_32byte_float_available etc.) along with a check for
> prefer_something (e.g. vect_prefer_16byte)? Having such info, we'll be
> able to distinguish all the cases, right?

Meh.  This seems excruciatingly error prone.

I think we should simply re-write the test cases so that it doesn't
matter how many times we see a particular string.  Don't combine so
many (potentially vectorizable) loops within a test case that it matters.


r~


Re: PR/51443: fix inline assembly in transaction_callable functions

2011-12-13 Thread Richard Henderson
On 12/13/2011 07:37 AM, Aldy Hernandez wrote:
>   PR/51443
>   * trans-mem.c (struct diagnose_tm): Remove saw_unsafe.
>   (diagnose_tm_1): Same.
>   (ipa_tm_execute): Do not test tm_may_enter_irr before we set it.
>   (ipa_tm_scan_irr_function): Return gracefully when no
>   DECL_STRUCT_FUNCTION.
>   (ipa_tm_scan_irr_block): Believe the user on TM attributes.

Ok.


r~


Re: [patch] add __is_final trait to fix libstdc++/51365

2011-12-13 Thread Paolo Carlini
Hi,
> 
> This patch seems pretty simple and safe.  Are you (Gaby and Paolo) arguing 
> that even so, it shouldn't go in?

As far as I'm concerned, definetely not! I also think that it would be great 
if, for 4.7, Jon could handle the library issues with EBO by exploiting it. I 
only meant to say that something seems to me more fundamentally wrong at the 
design level about 'final' vs EBO, my hope is that for 4.8 we'll have a longer 
term stable solution based on a ISO Committee position.

Paolo


Re: [patch] add __is_final trait to fix libstdc++/51365

2011-12-13 Thread Jason Merrill

On 12/12/2011 09:14 AM, Gabriel Dos Reis wrote:

On Mon, Dec 12, 2011 at 5:25 AM, Paolo Carlini  wrote:


I think being able to detect a final class is good enough for now,
until we find out if there are real problems being encountered as
people make more use of C++11.


Maybe. But in my opinion we should not rush. Something is wrong here at a
more fundamental level.


I agree that we should wait a little bit for the dust to settle down.  Users 
should
avoid it, and implementors shouldn't go through hoops non commensurable with
the benefits of "final".  Maybe the "right" primitive is slightly different.


This patch seems pretty simple and safe.  Are you (Gaby and Paolo) 
arguing that even so, it shouldn't go in?


Jason


Re: [PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11

2011-12-13 Thread Jason Merrill

On 12/13/2011 10:50 AM, Dodji Seketeli wrote:

-  if (INTEGRAL_OR_ENUMERATION_TYPE_P (type))
+  if (INTEGRAL_OR_ENUMERATION_TYPE_P (type)
+   &&  TREE_CODE (expr) != PTRMEM_CST)
expr = maybe_constant_value (expr);
else if (TYPE_PTR_P (type)
   || (TYPE_PTR_TO_MEMBER_P (type)


This could still break for a parameter of object pointer type.  Let's 
check for PTRMEM_CST separately, before any of the other tests.


Jason


[COMMITTED, PR 50628] More careful SRA sub-access propagation accross assignments

2011-12-13 Thread Martin Jambor
Hi,

Richi approved the following patch to fix 50628 on IRC.  Details about
the problem and this particular patch are also in bugzilla.  Of
course, I bootstrapped and tested the patch today on x86_64-linux.

Thanks,

Martin


2011-12-13  Martin Jambor  

PR middle-end/50628
* tree-sra.c (propagate_subaccesses_across_link): Do not propagate
sub-accesses of scalar accesses.

Index: src/gcc/tree-sra.c
===
--- src.orig/gcc/tree-sra.c
+++ src/gcc/tree-sra.c
@@ -2234,21 +2234,23 @@ propagate_subaccesses_across_link (struc
   || racc->grp_unscalarizable_region)
 return false;
 
-  if (!lacc->first_child && !racc->first_child
-  && is_gimple_reg_type (racc->type))
+  if (is_gimple_reg_type (racc->type))
 {
-  tree t = lacc->base;
-
-  lacc->type = racc->type;
-  if (build_user_friendly_ref_for_offset (&t, TREE_TYPE (t), lacc->offset,
- racc->type))
-   lacc->expr = t;
-  else
+  if (!lacc->first_child && !racc->first_child)
{
- lacc->expr = build_ref_for_model (EXPR_LOCATION (lacc->base),
-   lacc->base, lacc->offset,
-   racc, NULL, false);
- lacc->grp_no_warning = true;
+ tree t = lacc->base;
+
+ lacc->type = racc->type;
+ if (build_user_friendly_ref_for_offset (&t, TREE_TYPE (t),
+ lacc->offset, racc->type))
+   lacc->expr = t;
+ else
+   {
+ lacc->expr = build_ref_for_model (EXPR_LOCATION (lacc->base),
+   lacc->base, lacc->offset,
+   racc, NULL, false);
+ lacc->grp_no_warning = true;
+   }
}
   return false;
 }


Re: [PATCH, PR 51362] Make IPA-CP not ICE if estimate_ipcp_clone_size_and_time returns zero size

2011-12-13 Thread Jan Hubicka
> On Tue, Dec 13, 2011 at 2:38 PM, Martin Jambor  wrote:
> > Hi,
> >
> > IPA-CP currently assumes that cloning estimates always have some
> > positive size cost.  However, there are apparently situations in which
> > estimate_ipcp_clone_size_and_time does return zero size and which then
> > mostly lead to divisions by zero or failed asserts.  This patch avoids
> > that by simply bumping the sizes to 1 in those cases.
> >
> > Bootstrapped and tested on x86_64-linux. OK for trunk?
> 
> Ok.

It sounds like some bug earlier, because offline functions are accounted to 
have size of 1,
see estimate_function_body_sizes:
  /* Estimate static overhead for function prologue/epilogue and alignment. */
  int size = 2;
(later the value is divied by two)

I will try to debug why this value is no longer accounted.

Honza


[PATCH] Tidy replace_uses_by

2011-12-13 Thread Richard Guenther

After fixing PR51481 I noticed replace_uses_by does quite some
unnecessary work despite being (one of) the core propagator routine(s).
The following patch cleans it up to avoid doing useless work.

LTO bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

I'll apply this tomorrow.

Thanks,
Richard.

2011-12-13  Richard Guenther  

* tree-cfg.c (replace_uses_by): Only mark blocks altered
that will make a difference.  Only recompute ADDR_EXPR
invariantness if it could possibly have changed.  Do so
before folding the statement.

Index: gcc/tree-cfg.c
===
--- gcc/tree-cfg.c  (revision 182285)
+++ gcc/tree-cfg.c  (working copy)
@@ -1592,7 +1596,7 @@ replace_uses_by (tree name, tree val)
  /* This can only occur for virtual operands, since
 for the real ones SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
 would prevent replacement.  */
- gcc_assert (!is_gimple_reg (name));
+ gcc_checking_assert (!is_gimple_reg (name));
  SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val) = 1;
}
}
@@ -1604,28 +1608,37 @@ replace_uses_by (tree name, tree val)
  gimple orig_stmt = stmt;
  size_t i;
 
- fold_stmt (&gsi);
- stmt = gsi_stmt (gsi);
- if (cfgcleanup_altered_bbs && !is_gimple_debug (stmt))
+ /* Mark the block if we changed the last stmt in it.  */
+ if (cfgcleanup_altered_bbs
+ && stmt_ends_bb_p (stmt))
bitmap_set_bit (cfgcleanup_altered_bbs, gimple_bb (stmt)->index);
 
- /* FIXME.  This should go in update_stmt.  */
- for (i = 0; i < gimple_num_ops (stmt); i++)
+ /* FIXME.  It shouldn't be required to keep TREE_CONSTANT
+on ADDR_EXPRs up-to-date on GIMPLE.  Propagation will
+only change sth from non-invariant to invariant, and only
+when propagating integer constants.  */
+ if (TREE_CODE (val) == INTEGER_CST)
+   for (i = 0; i < gimple_num_ops (stmt); i++)
+ {
+   tree op = gimple_op (stmt, i);
+   /* Operands may be empty here.  For example, the labels
+  of a GIMPLE_COND are nulled out following the creation
+  of the corresponding CFG edges.  */
+   if (op && TREE_CODE (op) == ADDR_EXPR)
+ recompute_tree_invariant_for_addr_expr (op);
+ }
+
+ if (fold_stmt (&gsi))
{
- tree op = gimple_op (stmt, i);
-  /* Operands may be empty here.  For example, the labels
- of a GIMPLE_COND are nulled out following the creation
- of the corresponding CFG edges.  */
- if (op && TREE_CODE (op) == ADDR_EXPR)
-   recompute_tree_invariant_for_addr_expr (op);
+ stmt = gsi_stmt (gsi);
+ maybe_clean_or_replace_eh_stmt (orig_stmt, stmt);
}
 
- maybe_clean_or_replace_eh_stmt (orig_stmt, stmt);
  update_stmt (stmt);
}
 }
 
-  gcc_assert (has_zero_uses (name));
+  gcc_checking_assert (has_zero_uses (name));
 
   /* Also update the trees stored in loop structures.  */
   if (current_loops)



Re: [C++ Patch] PR 51464

2011-12-13 Thread Jason Merrill

OK.

Jason



[PATCH] PR c++/51476 - ICE on PTRMEM_CST as template argument in c++11

2011-12-13 Thread Dodji Seketeli
Hello,

In the example of the patch, we crash because convert_nontype_argument
calls maybe_constant_value with the PTRMEM_CST for &B::i (in cxx0x
mode) which ends up trying to poke at the offset of i, but struct B is
not yet laid out.

My understanding is that using a pointer to non-static member as an
argument of the template A (in this context) is invalid anyway, so we
could avoid trying to see if it's a constant value altogether, similar
to what's done in the 'else if' right after the offending 'if'.

Thus the patch below, bootstrapped and tested on
x86_64-unknown-linux-gnu against trunk.

gcc/cp/

PR c++/51476
* pt.c (convert_nontype_argument): Don't call maybe_constant_value
for PTRMEM_CST nodes.

gcc/testsuite/

PR c++/51476
* cpp0x/ptrmem-cst-arg1.C: New test.
---
 gcc/cp/pt.c  |3 ++-
 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C |9 +
 2 files changed, 11 insertions(+), 1 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index bb5aa0c..8c9f97b 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -5720,7 +5720,8 @@ convert_nontype_argument (tree type, tree expr, 
tsubst_flags_t complain)
  to a null value, but otherwise still need to be of a specific form.  */
   if (cxx_dialect >= cxx0x)
 {
-  if (INTEGRAL_OR_ENUMERATION_TYPE_P (type))
+  if (INTEGRAL_OR_ENUMERATION_TYPE_P (type)
+ && TREE_CODE (expr) != PTRMEM_CST)
expr = maybe_constant_value (expr);
   else if (TYPE_PTR_P (type)
   || (TYPE_PTR_TO_MEMBER_P (type)
diff --git a/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C 
b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C
new file mode 100644
index 000..b6c81d5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst-arg1.C
@@ -0,0 +1,9 @@
+// Origin PR c++/51476
+// { dg-options "-std=c++11" }
+
+template struct A {}; 
  
+struct B
+{
+int i;
+A<&B::i> a; // { dg-error "could not convert template argument" }
+};
-- 
1.7.6.4


-- 
Dodji


PR/51443: fix inline assembly in transaction_callable functions

2011-12-13 Thread Aldy Hernandez
A while back we redesigned how the may go irrevocable bit was set.  In 
particular, we redesigned it so that the analysis was done after 
IPA-inline was done, so we didn't make incorrect assumptions about 
irrevocability.


This means that any set or use of the tm_may_enter_irr bit before we 
calculate it in the IPA-tm pass is incorrect.  The following patch 
removes such references.


As is customary, this unearthed a few other buglets and cleanups, which 
I have also addressed.  First, saw_unsafe is no longer needed.  I 
removed it.  Second, ipa_tm_scan_irr_function is now called with builtin 
operators such as new, so we need to exit gracefully when no cfun or CFG 
exists.  Lastly, since we are being slightly more aggressive in 
determining irrevocability, we need to make sure that user annotated 
safe functions are respected.


This fixes the PR.

OK?
PR/51443
* trans-mem.c (struct diagnose_tm): Remove saw_unsafe.
(diagnose_tm_1): Same.
(ipa_tm_execute): Do not test tm_may_enter_irr before we set it.
(ipa_tm_scan_irr_function): Return gracefully when no
DECL_STRUCT_FUNCTION.
(ipa_tm_scan_irr_block): Believe the user on TM attributes.

Index: testsuite/gcc.dg/tm/asm-1.c
===
--- testsuite/gcc.dg/tm/asm-1.c (revision 0)
+++ testsuite/gcc.dg/tm/asm-1.c (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-fgnu-tm -O1" } */
+
+static inline void asmfunc()
+{
+__asm__("");
+}
+
+__attribute__((transaction_callable))
+void push()
+{
+asmfunc();
+}
Index: testsuite/g++.dg/tm/asm-1.c
===
--- testsuite/g++.dg/tm/asm-1.c (revision 0)
+++ testsuite/g++.dg/tm/asm-1.c (revision 0)
@@ -0,0 +1,22 @@
+// { dg-do compile }
+// { dg-options "-fgnu-tm -O1" }
+
+template class shared_ptr {
+public:
+shared_ptr()  {
+  __asm__ ("");
+}
+};
+template class deque {
+public:
+void push_back() {
+  ::new _Tp();
+}
+};
+class Bar {
+  __attribute__((transaction_callable)) void push();
+  deque > events;
+};
+void Bar::push() {
+  events.push_back();
+}
Index: trans-mem.c
===
--- trans-mem.c (revision 182244)
+++ trans-mem.c (working copy)
@@ -544,7 +544,6 @@ struct diagnose_tm
   unsigned int summary_flags : 8;
   unsigned int block_flags : 8;
   unsigned int func_flags : 8;
-  unsigned int saw_unsafe : 1;
   unsigned int saw_volatile : 1;
   gimple stmt;
 };
@@ -695,8 +694,6 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi
   else if (d->func_flags & DIAG_TM_SAFE)
error_at (gimple_location (stmt),
  "asm not allowed in % function");
-  else
-   d->saw_unsafe = true;
   break;
 
 case GIMPLE_TRANSACTION:
@@ -711,8 +708,6 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi
else if (d->func_flags & DIAG_TM_SAFE)
  error_at (gimple_location (stmt),
"relaxed transaction in % function");
-   else
- d->saw_unsafe = true;
inner_flags = DIAG_TM_RELAXED;
  }
else if (gimple_transaction_subcode (stmt) & GTMA_IS_OUTER)
@@ -727,8 +722,6 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi
else if (d->func_flags & DIAG_TM_SAFE)
  error_at (gimple_location (stmt),
"outer transaction in % function");
-   else
- d->saw_unsafe = true;
inner_flags |= DIAG_TM_OUTER;
  }
 
@@ -748,8 +741,6 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi
 
walk_gimple_seq (gimple_transaction_body (stmt),
 diagnose_tm_1, diagnose_tm_1_op, &wi_inner);
-
-   d->saw_unsafe |= d_inner.saw_unsafe;
  }
   }
   break;
@@ -780,11 +771,6 @@ diagnose_tm_blocks (void)
   walk_gimple_seq (gimple_body (current_function_decl),
   diagnose_tm_1, diagnose_tm_1_op, &wi);
 
-  /* If we saw something other than a call that makes this function
- unsafe, remember it so that the IPA pass only needs to scan calls.  */
-  if (d.saw_unsafe && !is_tm_safe_or_pure (current_function_decl))
-cgraph_local_info (current_function_decl)->tm_may_enter_irr = 1;
-
   return 0;
 }
 
@@ -3696,7 +3682,11 @@ ipa_tm_scan_irr_block (basic_block bb)
break;
 
  d = get_cg_data (cgraph_get_node (fn));
- if (d->is_irrevocable)
+
+ /* Return true if irrevocable, but above all, believe
+the user.  */
+ if (d->is_irrevocable
+ && !is_tm_safe_or_pure (fn))
return true;
}
  break;
@@ -3880,6 +3870,11 @@ ipa_tm_scan_irr_function (struct cgraph_
   VEC (basic_block, heap) *queue;
   bool ret = false;
 
+  /* Builtin operators (operator new, and such).  */
+  if (DECL_STRUCT_FUNCTION (node->decl) == NULL
+  

[PATCH, i386]: Fix PR testsuite/51524

2011-12-13 Thread Uros Bizjak
Hello!

Recent RA change disturbs delicate balance of register allocation, so
a couple of scan tests begins to fail.

Attached patch fixes these scan failures by forcing arguments of test
function in registers.

2011-12-13  Uros Bizjak  

PR testsuite/51524
* gcc.target/i386/bmi2-mulx32-1.c (gen_mulx): Add attribute regparm(2).
* gcc.target/i386/bmi2-mulx32-2.c (calc_mulx_u32): Ditto.

Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN.

Uros.

Index: gcc.target/i386/bmi2-mulx32-1.c
===
--- gcc.target/i386/bmi2-mulx32-1.c (revision 182192)
+++ gcc.target/i386/bmi2-mulx32-1.c (working copy)
@@ -15,7 +15,7 @@
   return res;
 }

-__attribute__((noinline))
+__attribute__((noinline, regparm (2)))
 unsigned long long
 gen_mulx (unsigned a, unsigned b)
 {
Index: gcc.target/i386/bmi2-mulx32-2.c
===
--- gcc.target/i386/bmi2-mulx32-2.c (revision 182192)
+++ gcc.target/i386/bmi2-mulx32-2.c (working copy)
@@ -17,7 +17,7 @@
   return res;
 }

-__attribute__((noinline))
+__attribute__((noinline, regparm (2)))
 unsigned calc_mulx_u32 (unsigned x, unsigned y, unsigned *res_h)
 {
   return (unsigned) _mulx_u32 (x, y, res_h);


[PATCH] Fix PR48354

2011-12-13 Thread Richard Guenther

This fixes PR48354 by streaming DECL_ORIGINAL_TYPE.

LTO bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2011-12-13  Richard Guenther  

PR lto/48354
* tree.c (find_decls_types_r): Also walk DECL_ORIGINAL_TYPE.
* tree-streamer-in.c (lto_input_ts_decl_non_common_tree_pointers):
Stream DECL_ORIGINAL_TYPE.
* tree-streamer-out.c (write_ts_decl_non_common_tree_pointers):
Likewise.

lto/
* lto.c (lto_ft_decl_non_common): When we merged DECL_ORIGINAL_TYPE
with the type of the TYPE_DECL clear DECL_ORIGINAL_TYPE.

* g++.dg/lto/pr48354-1_0.C: New testcase.

Index: gcc/tree.c
===
*** gcc/tree.c.orig 2011-12-13 13:36:34.0 +0100
--- gcc/tree.c  2011-12-13 13:41:24.0 +0100
*** find_decls_types_r (tree *tp, int *ws, v
*** 4796,4801 
--- 4796,4802 
{
  fld_worklist_push (DECL_ARGUMENT_FLD (t), fld);
  fld_worklist_push (DECL_VINDEX (t), fld);
+ fld_worklist_push (DECL_ORIGINAL_TYPE (t), fld);
}
else if (TREE_CODE (t) == FIELD_DECL)
{
Index: gcc/testsuite/g++.dg/lto/pr48354-1_0.C
===
*** /dev/null   1970-01-01 00:00:00.0 +
--- gcc/testsuite/g++.dg/lto/pr48354-1_0.C  2011-12-13 13:41:24.0 
+0100
***
*** 0 
--- 1,16 
+ // { dg-lto-do link }
+ // { dg-lto-options { { -g -flto } } }
+ // { dg-extra-ld-options "-r -nostdlib" }
+ 
+ template struct Identity { typedef T type; };
+ struct S {
+ typedef void (S::*FP)();
+ FP fp;
+ };
+ void g();
+ void f() {
+ typedef Identity::type Dummy;
+ S s;
+ g();
+ }
+ 
Index: gcc/tree-streamer-in.c
===
*** gcc/tree-streamer-in.c.orig 2011-12-13 13:36:34.0 +0100
--- gcc/tree-streamer-in.c  2011-12-13 13:41:24.0 +0100
*** lto_input_ts_decl_non_common_tree_pointe
*** 602,607 
--- 602,609 
DECL_ARGUMENTS (expr) = stream_read_tree (ib, data_in);
DECL_RESULT (expr) = stream_read_tree (ib, data_in);
  }
+   else if (TREE_CODE (expr) == TYPE_DECL)
+ DECL_ORIGINAL_TYPE (expr) = stream_read_tree (ib, data_in);
DECL_VINDEX (expr) = stream_read_tree (ib, data_in);
  }
  
Index: gcc/tree-streamer-out.c
===
*** gcc/tree-streamer-out.c.orig2011-12-13 13:36:34.0 +0100
--- gcc/tree-streamer-out.c 2011-12-13 13:41:24.0 +0100
*** write_ts_decl_non_common_tree_pointers (
*** 508,513 
--- 508,515 
stream_write_tree (ob, DECL_ARGUMENTS (expr), ref_p);
stream_write_tree (ob, DECL_RESULT (expr), ref_p);
  }
+   else if (TREE_CODE (expr) == TYPE_DECL)
+ stream_write_tree (ob, DECL_ORIGINAL_TYPE (expr), ref_p);
stream_write_tree (ob, DECL_VINDEX (expr), ref_p);
  }
  
Index: gcc/lto/lto.c
===
*** gcc/lto/lto.c.orig  2011-12-12 16:55:19.0 +0100
--- gcc/lto/lto.c   2011-12-13 13:48:19.0 +0100
*** lto_ft_decl_non_common (tree t)
*** 381,386 
--- 381,393 
LTO_FIXUP_TREE (DECL_ARGUMENT_FLD (t));
LTO_FIXUP_TREE (DECL_RESULT_FLD (t));
LTO_FIXUP_TREE (DECL_VINDEX (t));
+   /* The C frontends may create exact duplicates for DECL_ORIGINAL_TYPE
+  like for 'typedef enum foo foo'.  We have no way of avoiding to
+  merge them and dwarf2out.c cannot deal with this,
+  so fix this up by clearing DECL_ORIGINAL_TYPE in this case.  */
+   if (TREE_CODE (t) == TYPE_DECL
+   && DECL_ORIGINAL_TYPE (t) == TREE_TYPE (t))
+ DECL_ORIGINAL_TYPE (t) = NULL_TREE;
  }
  
  /* Fix up fields of a decl_non_common T.  */


Re: Fix flags for edges from/to entry/exit basic blocks (issue5486043)

2011-12-13 Thread Diego Novillo

On 11-12-13 09:22 , Diego Novillo wrote:


When testing the patch, you should just run 'make -jN -k check' from
the toplevel build directory, do not just test a subset of the
testsuite.


Additionally, you will need to configure and build with all the default 
languages.  Your change affects everything.



Diego.


Re: Fix flags for edges from/to entry/exit basic blocks (issue5486043)

2011-12-13 Thread Diego Novillo
On Tue, Dec 13, 2011 at 05:43, Dmitry Vyukov  wrote:

> Sorry for the trouble. What should I do now?
> Just in case I've prepared a patch that rolls it back:
> http://codereview.appspot.com/5485054

Dmitry, please roll back the patch for now.  You should be able to see
the failures if you run full checking (I missed the fact that you had
not run full checking in your initial patch, sorry).

When testing the patch, you should just run 'make -jN -k check' from
the toplevel build directory, do not just test a subset of the
testsuite.


Diego.


Re: Go patch committed: Multiplex goroutines onto OS threads

2011-12-13 Thread Ian Lance Taylor
Rainer Orth  writes:

> Ian Lance Taylor  writes:
>
>> of any differences between Solaris and GNU/Linux when it comes to the
>> getcontext, setcontext, and makecontext functions?
>
> I've just found something in Solaris 11 makecontext(3C) (NOTES section),
> but that doesn't explain why 32-bit Solaris/x86 works, while 64-bit
> doesn't:
>
>   http://docs.oracle.com/cd/E23824_01/html/821-1465/makecontext-3c.html

Wow, that's annoying.  You're right, though, it seems that that ought to
affect both x86 and x86_64.

Ian


Re: [PATCH, PR 51362] Make IPA-CP not ICE if estimate_ipcp_clone_size_and_time returns zero size

2011-12-13 Thread Richard Guenther
On Tue, Dec 13, 2011 at 2:38 PM, Martin Jambor  wrote:
> Hi,
>
> IPA-CP currently assumes that cloning estimates always have some
> positive size cost.  However, there are apparently situations in which
> estimate_ipcp_clone_size_and_time does return zero size and which then
> mostly lead to divisions by zero or failed asserts.  This patch avoids
> that by simply bumping the sizes to 1 in those cases.
>
> Bootstrapped and tested on x86_64-linux. OK for trunk?

Ok.

Thanks,
Richard.

> Thanks,
>
> Martin
>
>
> 2011-12-12  Martin Jambor  
>
>        PR tree-optimization/51362
>        * ipa-cp.c (estimate_local_effects): When estimated size of a
>        specialized clone is zero, bump it to one.
>
>        * testsuite/gcc.dg/ipa/pr51362.c: New test.
>
> Index: src/gcc/ipa-cp.c
> ===
> --- src.orig/gcc/ipa-cp.c
> +++ src/gcc/ipa-cp.c
> @@ -1409,6 +1409,14 @@ estimate_local_effects (struct cgraph_no
>            + devirtualization_time_bonus (node, known_csts, known_binfos)
>            + removable_params_cost + emc;
>
> +         gcc_checking_assert (size >=0);
> +         /* The inliner-heuristics based estimates may think that in certain
> +            contexts some functions do not have any size at all but we want
> +            all specializations to have at least a tiny cost, not least not 
> to
> +            divide by zero.  */
> +         if (size == 0)
> +           size = 1;
> +
>          if (dump_file && (dump_flags & TDF_DETAILS))
>            {
>              fprintf (dump_file, " - estimates for value ");
> Index: src/gcc/testsuite/gcc.dg/ipa/pr51362.c
> ===
> --- /dev/null
> +++ src/gcc/testsuite/gcc.dg/ipa/pr51362.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fipa-cp -fipa-cp-clone" } */
> +
> +int
> +baz (void)
> +{
> +  return 0;
> +}
> +
> +int make_mess;
> +
> +__attribute__ ((noinline))
> +int bar (int x, int (*f) (void))
> +{
> +  return f ();
> +}
> +
> +int
> +foo (void)
> +{
> +  return bar (1, baz);
> +}


[Patch]: Guard the call to begin_epilogue

2011-12-13 Thread Tristan Gingold
Hi,

currently, the call to begin_epilogue debug hook in final_scan_insn is 
unconditional and therefore occurs even when DECL_IGNORED_P is true.  This is 
not what is done for other debug hooks, and leads to ICE on VMS (the only 
platform that defines the hook) as last_filename is NULL.

Tested on alpha64-dec-openvms,
committed to trunk as obvious.

Tristan.

2011-12-13  Tristan Gingold  

* final.c (final_scan_insn): Guard the call to begin_epilogue
debug hook.

Index: final.c
===
--- final.c (revision 182280)
+++ final.c (working copy)
@@ -1973,7 +1973,8 @@
  break;
 
case NOTE_INSN_EPILOGUE_BEG:
- (*debug_hooks->begin_epilogue) (last_linenum, last_filename);
+  if (!DECL_IGNORED_P (current_function_decl))
+(*debug_hooks->begin_epilogue) (last_linenum, last_filename);
  targetm.asm_out.function_begin_epilogue (file);
  break;
 



[PATCH, PR 51362] Make IPA-CP not ICE if estimate_ipcp_clone_size_and_time returns zero size

2011-12-13 Thread Martin Jambor
Hi,

IPA-CP currently assumes that cloning estimates always have some
positive size cost.  However, there are apparently situations in which
estimate_ipcp_clone_size_and_time does return zero size and which then
mostly lead to divisions by zero or failed asserts.  This patch avoids
that by simply bumping the sizes to 1 in those cases.

Bootstrapped and tested on x86_64-linux. OK for trunk?

Thanks,

Martin


2011-12-12  Martin Jambor  

PR tree-optimization/51362
* ipa-cp.c (estimate_local_effects): When estimated size of a
specialized clone is zero, bump it to one.

* testsuite/gcc.dg/ipa/pr51362.c: New test.

Index: src/gcc/ipa-cp.c
===
--- src.orig/gcc/ipa-cp.c
+++ src/gcc/ipa-cp.c
@@ -1409,6 +1409,14 @@ estimate_local_effects (struct cgraph_no
+ devirtualization_time_bonus (node, known_csts, known_binfos)
+ removable_params_cost + emc;
 
+ gcc_checking_assert (size >=0);
+ /* The inliner-heuristics based estimates may think that in certain
+contexts some functions do not have any size at all but we want
+all specializations to have at least a tiny cost, not least not to
+divide by zero.  */
+ if (size == 0)
+   size = 1;
+
  if (dump_file && (dump_flags & TDF_DETAILS))
{
  fprintf (dump_file, " - estimates for value ");
Index: src/gcc/testsuite/gcc.dg/ipa/pr51362.c
===
--- /dev/null
+++ src/gcc/testsuite/gcc.dg/ipa/pr51362.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fipa-cp -fipa-cp-clone" } */
+
+int
+baz (void)
+{
+  return 0;
+}
+
+int make_mess;
+
+__attribute__ ((noinline))
+int bar (int x, int (*f) (void))
+{
+  return f ();
+}
+
+int
+foo (void)
+{
+  return bar (1, baz);
+}


[vms/committed]: fix crash in vmsdbgout.c

2011-12-13 Thread Tristan Gingold
Hi,

there were calls in vmsdbgout.c to dwarf2out_source_line outside the normal 
flow: in end_prologue, begin_epilogue, end_epilogue.  This is useless and not 
not expected by dwarf2out.  This patch removes these extra calls, and fixes a 
crash.

Tested by building c+ada for alpha64-dec-openvms.

Committed to trunk.

Tristan.

2011-12-13  Tristan Gingold  

* vmsdbgout.c (vmsdbgout_write_source_line): New function.
(vmsdbgout_end_prologue): Call vmsdbgout_write_source_line.
(vmsdbgout_begin_epilogue): Likewise.
(vmsdbgout_end_epilogue): Likewise.
(vmsdbgout_source_line): Move code to vmsdbgout_write_source_line.

Index: vmsdbgout.c
===
--- vmsdbgout.c (revision 182280)
+++ vmsdbgout.c (working copy)
@@ -158,6 +158,7 @@
 static void vmsdbgout_end_block (unsigned int, unsigned int);
 static bool vmsdbgout_ignore_block (const_tree);
 static void vmsdbgout_source_line (unsigned int, const char *, int, bool);
+static void vmsdbgout_write_source_line (unsigned, const char *, int , bool);
 static void vmsdbgout_begin_prologue (unsigned int, const char *);
 static void vmsdbgout_end_prologue (unsigned int, const char *);
 static void vmsdbgout_end_function (unsigned int);
@@ -1162,7 +1163,7 @@
   ASM_OUTPUT_LABEL (asm_out_file, label);
 
   /* VMS PCA expects every PC range to correlate to some line and file.  */
-  vmsdbgout_source_line (line, file, 0, true);
+  vmsdbgout_write_source_line (line, file, 0, true);
 }
 }
 
@@ -1202,7 +1203,7 @@
 
 }
 
@@ -1202,7 +1203,7 @@
 
  /* VMS PCA expects every PC range to correlate to some line and
 file.  */
- vmsdbgout_source_line (line, file, 0, true);
+ vmsdbgout_write_source_line (line, file, 0, true);
}
 }
 }
@@ -1228,7 +1229,7 @@
   ASM_OUTPUT_LABEL (asm_out_file, label);
 
   /* VMS PCA expects every PC range to correlate to some line and file.  */
-  vmsdbgout_source_line (line, file, 0, true);
+  vmsdbgout_write_source_line (line, file, 0, true);
 }
 }
 
@@ -1388,6 +1389,31 @@
'line_info_table' for later output of the .debug_line section.  */
 
 static void
+vmsdbgout_write_source_line (unsigned line, const char *filename,
+ int discriminator, bool is_stmt)
+{
+  dst_line_info_ref line_info;
+
+  targetm.asm_out.internal_label (asm_out_file, LINE_CODE_LABEL,
+  line_info_table_in_use);
+
+  /* Expand the line info table if necessary.  */
+  if (line_info_table_in_use == line_info_table_allocated)
+{
+  line_info_table_allocated += LINE_INFO_TABLE_INCREMENT;
+  line_info_table = XRESIZEVEC (dst_line_info_entry, line_info_table,
+line_info_table_allocated);
+}
+
+  /* Add the new entry at the end of the line_info_table.  */
+  line_info = &line_info_table[line_info_table_in_use++];
+  line_info->dst_file_num = lookup_filename (filename);
+  line_info->dst_line_num = line;
+  if (line > file_info_table[line_info->dst_file_num].max_line)
+file_info_table[line_info->dst_file_num].max_line = line;
+}
+
+static void
-
-  targetm.asm_out.internal_label (asm_out_file, LINE_CODE_LABEL,
- line_info_table_in_use);
-
-  /* Expand the line info table if necessary.  */
-  if (line_info_table_in_use == line_info_table_allocated)
-   {
- line_info_table_allocated += LINE_INFO_TABLE_INCREMENT;
- line_info_table = XRESIZEVEC (dst_line_info_entry, line_info_table,
-   line_info_table_allocated);
-   }
-
-  /* Add the new entry at the end of the line_info_table.  */
-  line_info = &line_info_table[line_info_table_in_use++];
-  line_info->dst_file_num = lookup_filename (filename);
-  line_info->dst_line_num = line;
-  if (line > file_info_table[line_info->dst_file_num].max_line)
-   file_info_table[line_info->dst_file_num].max_line = line;
-}
+vmsdbgout_write_source_line (line, filename, discriminator, is_stmt);
 }
 
 /* Record the beginning of a new source file, for later output.


Re: [PATCH, PR51491] add CLOBBER before __builtin_stack_restore when converting vla alloca_with_align to array decl

2011-12-13 Thread Jakub Jelinek
On Tue, Dec 13, 2011 at 01:26:42PM +0100, Tom de Vries wrote:
> 2011-12-13  Tom de Vries  
> 
>   PR tree-optimization/51491
>   * tree-ssa-ccp.c (insert_clobber_before_stack_restore): New function.
>   (ccp_fold_stmt): Use insert_clobber_before_stack_restore after a
>   successful fold_builtin_alloca_with_align.

I don't think this is safe.  You don't want to look for any following
__builtin_stack_restore, but for the matching one.  Consider:

int g (int *);

int
f (int n)
{
  int tt = 0;
  int t = 4;
  {
int a[t
#ifdef DIFFERENT_BB2
  + (tt != 0 ? 6 : 0)
#endif
 ];
tt = g (a);
{
  int b[n];
  tt += g (b);
#ifdef DIFFERENT_BB
  if (n > 20)
tt += 148 * g (b);
#endif  
  tt += b[0];
}
tt += a[0];
  }
  {
int a[4];
tt += g (a);
tt += a[0];
  }
  return tt;
}

Without any defines, this shows that looking for the first
BUILT_IN_STACK_RESTORE is wrong if you ignore BUILT_IN_STACK_SAVE calls.
And with the various defines it shows that neither the corresponding
__builtin_stack_save nor __builtin_stack_restore have to be in the same
bb as __builtin_alloca_with_align that is being folded.
Perhaps you want to look for the closest enclosing __builtin_stack_save
(search backwards in current basic block, its immediate dominator etc.?),
remember what SSA_NAME it stores its result into and then just look at
where is the (single?) user of that, which ought to be
__builtin_stack_restore.

Jakub


[PATCH] Fix PR51481

2011-12-13 Thread Richard Guenther

This fixes the maybe_clean_or_replace_eh_stmt call in
replace_uses_by.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2011-12-13  Richard Guenther  

PR middle-end/51481
* tree-cfg.c (replace_uses_by): Pass proper arguments to
maybe_clean_or_replace_eh_stmt.

Index: gcc/tree-cfg.c
===
--- gcc/tree-cfg.c  (revision 182220)
+++ gcc/tree-cfg.c  (working copy)
@@ -1601,6 +1605,7 @@ replace_uses_by (tree name, tree val)
   if (gimple_code (stmt) != GIMPLE_PHI)
{
  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ gimple orig_stmt = stmt;
  size_t i;
 
  fold_stmt (&gsi);
@@ -1619,7 +1624,7 @@ replace_uses_by (tree name, tree val)
recompute_tree_invariant_for_addr_expr (op);
}
 
- maybe_clean_or_replace_eh_stmt (stmt, stmt);
+ maybe_clean_or_replace_eh_stmt (orig_stmt, stmt);
  update_stmt (stmt);
}
 }



[PATCH] Fix PR51519

2011-12-13 Thread Richard Guenther

This fixes PR51519.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2011-12-13  Richard Guenther  

PR tree-optimization/51519
* ipa-inline.c (edge_badness): Use edge growth in non-guessed
branch probability case as well.

* gcc.dg/pr51519.c: New testcase.

Index: gcc/ipa-inline.c
===
--- gcc/ipa-inline.c(revision 182220)
+++ gcc/ipa-inline.c(working copy)
@@ -861,7 +861,7 @@ edge_badness (struct cgraph_edge *edge,
   else
 {
   int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
-  badness = estimate_growth (callee) * 256;
+  badness = growth * 256;
 
   /* Decrease badness if call is nested.  */
   if (badness > 0)
Index: gcc/testsuite/gcc.dg/pr51519.c
===
--- gcc/testsuite/gcc.dg/pr51519.c  (revision 0)
+++ gcc/testsuite/gcc.dg/pr51519.c  (revision 0)
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fno-guess-branch-probability -findirect-inlining" } */
+
+void fe (void);
+int i;
+
+static inline void
+FX (void (*f) (void))
+{
+  fe ();
+  (*f) ();
+}
+
+static inline void
+f4 ()
+{
+  if (i)
+FX (fe);
+}
+
+static inline void
+f3 (void)
+{
+  f4 ();
+  if (i)
+FX (f4);
+}
+
+static inline void
+f2 (void)
+{
+  FX (&f3);
+}
+
+void
+f1 (void)
+{
+  FX (&f2);
+}


[PATCH] New testcase for -flto -g

2011-12-13 Thread Richard Guenther

Committed.

Richard.

2011-12-13  Richard Guenther  

* gcc.dg/lto/20111213-1_0.c: New testcase.

Index: gcc/testsuite/gcc.dg/lto/20111213-1_0.c
===
--- gcc/testsuite/gcc.dg/lto/20111213-1_0.c (revision 0)
+++ gcc/testsuite/gcc.dg/lto/20111213-1_0.c (revision 0)
@@ -0,0 +1,8 @@
+/* { dg-lto-do link } */
+/* { dg-lto-options { { -flto -g } } } */
+/* { dg-extra-ld-options {-r -nostdlib} } */
+
+void gfc_be_parse_file (void)
+{
+  typedef enum builtin_type builtin_type;
+}


[PATCH, PR51491] add CLOBBER before __builtin_stack_restore when converting vla alloca_with_align to array decl

2011-12-13 Thread Tom de Vries
Jakub,

I have a patch for PR51491.

Consider the following test-case, compiled with gcc -O2 -S f.c -fdump-tree-all:
...
int f(void)
{
  int tt = 0;
  int t = 4;
  {
int a[t];
tt = g(a);
tt += a[0];
  }
  {
int a[4];
tt += g(a);
tt += a[0];
  }
  return tt;
}
...

The vla a[t] is gimplified to an alloca_with_align, with a
__builtin_stack_save/__builtin_stack_restore pair inserted at the scope
boundaries. This alloca_with_align is folded by ccp into an array declaration,
resulting in this representation at f.c.149t.optimized:
...
f ()
{
   D.1726[16];
  int a[4];
  int tt;
  int D.1722;
  int D.1721;
  void * saved_stack.2;
  int D.1719;

:
  saved_stack.2_3 = __builtin_stack_save ();
  tt_18 = g (&D.1726);
  D.1719_19 = MEM[(int[0:D.1713] *)&D.1726][0];
  tt_20 = D.1719_19 + tt_18;
  __builtin_stack_restore (saved_stack.2_3);
  D.1721_21 = g (&a);
  tt_22 = D.1721_21 + tt_20;
  D.1722_23 = a[0];
  tt_24 = D.1722_23 + tt_22;
  a ={v} {CLOBBER};
  return tt_24;
}
...

The patch inserts this clobber of D.1726 when alloca_with_align is folded:
...
@@ -13,6 +13,7 @@
   tt_18 = g (&D.1726);
   D.1719_19 = MEM[(int[0:D.1713] *)&D.1726][0];
   tt_20 = D.1719_19 + tt_18;
+  D.1726 ={v} {CLOBBER};
   __builtin_stack_restore (saved_stack.2_3);
   D.1721_21 = g (&a);
   tt_22 = D.1721_21 + tt_20;
...

This allows D.1726 and a[4] to be mapped onto the same stack space, as can be
seen in expand dump:
...
Partition 0: size 16 align 16
D.1726  a
...

Bootstrapped and reg-tested (ada inclusive) on x86_64.

OK for stage3?

Thanks,
- Tom

2011-12-13  Tom de Vries  

PR tree-optimization/51491
* tree-ssa-ccp.c (insert_clobber_before_stack_restore): New function.
(ccp_fold_stmt): Use insert_clobber_before_stack_restore after a
successful fold_builtin_alloca_with_align.

* gcc.dg/pr51491.c: New test.
Index: gcc/tree-ssa-ccp.c
===
--- gcc/tree-ssa-ccp.c (revision 182098)
+++ gcc/tree-ssa-ccp.c (working copy)
@@ -1690,6 +1690,36 @@ evaluate_stmt (gimple stmt)
   return val;
 }
 
+/* Try to find a BUILT_IN_STACK_RESTORE after gsi_stmt (I) and insert a clobber
+   of T before it. */
+
+static void
+insert_clobber_before_stack_restore (gimple_stmt_iterator i, tree t)
+{
+  bool found = false;
+  tree clobber;
+
+  for (; !gsi_end_p (i); gsi_next (&i))
+{
+  gimple stmt = gsi_stmt (i);
+
+  if (!gimple_call_builtin_p (stmt, BUILT_IN_STACK_RESTORE))
+	continue;
+
+  found = true;
+  break;
+}
+
+  /* We should try harder, but for now, just return.  */
+  if (!found)
+return;
+
+  clobber = build_constructor (TREE_TYPE (t), NULL);
+  TREE_THIS_VOLATILE (clobber) = 1;
+  gsi_insert_before (&i, gimple_build_assign (t, clobber),
+		 GSI_SAME_STMT);
+}
+
 /* Detects a __builtin_alloca_with_align with constant size argument.  Declares
fixed-size array and returns the address, if found, otherwise returns
NULL_TREE.  */
@@ -1824,7 +1854,9 @@ ccp_fold_stmt (gimple_stmt_iterator *gsi
 if (new_rhs)
 	  {
 		bool res = update_call_from_tree (gsi, new_rhs);
+		tree var = TREE_OPERAND (TREE_OPERAND (new_rhs, 0),0);
 		gcc_assert (res);
+		insert_clobber_before_stack_restore (*gsi, var);
 		return true;
 	  }
   }
Index: gcc/testsuite/gcc.dg/pr51491.c
===
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/pr51491.c (revision 0)
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand" } */
+
+
+int g(int*);
+
+int f(void)
+{
+  int tt = 0;
+  int t = 4;
+  {
+int a[t];
+tt = g(a);
+tt += a[0];
+  }
+  {
+int a[4];
+tt += g(a);
+tt += a[0];
+  }
+  return tt;
+}
+
+/* { dg-final { scan-rtl-dump-times "Partition" 1 "expand"} } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */


Re: [patch] Remove occurrences of int64_t (and int32_t)

2011-12-13 Thread Eric Botcazou
> Please simply switch it to unconditional use of tree_to_double_int
> (DECL_SIZE_UNIT (t)).low and make 'size' a HOST_WIDE_INT (we properly
> require 64 bit hwi for targets that have 64bit sizes/pointers).

Sure, but we need a 64-bit container for 'size' at least, because 8 bytes are 
read from it.  So I presume HOST_WIDEST_INT should simply be used there too.

> +#if HOST_BITS_PER_WIDE_INT >= 64
> +# define host_int64 HOST_WIDE_INT
> +#elif HOST_BITS_PER_WIDEST_INT >= 64
> +# define host_int64 HOST_WIDEST_INT
> +#else
> +# error "host has no 64-bit type"
> +#endif
>
> well, as previous communication has shown we should use
> HOST_WIDEST_INT unconditionally for a 64-bit type (allowing
> the code to compile when no such type is available during stage1).
> If we really need a true 64bit type then we should amend hwint.h
> accordingly.

OK.

-- 
Eric Botcazou


Re: [PATCH] Call maybe_clean_or_redirect_eh_stmt in gimple_fold_call (PR tree-optimization/51481)

2011-12-13 Thread Richard Guenther
On Tue, Dec 13, 2011 at 1:15 PM, Jakub Jelinek  wrote:
> On Tue, Dec 13, 2011 at 01:05:30PM +0100, Richard Guenther wrote:
>> Yeah, I'm testing a followup patch that fixes the call in replace_uses_by
>> (that alone fixes the testcase in the PR).
>
> That patch looks good, but then forwprop has the same
> static void
> tidy_after_forward_propagate_addr (gimple stmt)
> {
>  /* We may have turned a trapping insn into a non-trapping insn.  */
>  if (maybe_clean_or_replace_eh_stmt (stmt, stmt)
>      && gimple_purge_dead_eh_edges (gimple_bb (stmt)))
>    cfg_changed = true;
> ...
>
> Other spots are maybe ok, so if you want, the
> maybe_cleanup_or_replace_eh_stmt call from gimple_fold_call can be nuked
> afterwards.  But what about that lookup_*/tree_could_throw_p check?
> Should it stay?

I don't think so, if we want to prevent fold_stmt from running into such
situation we should do it centrally (similar to the in-place variant).  But
I wouldn't go down this route without further evidence (read: a testcase
that we cannot fix otherwise).

Richard.

>        Jakub


Re: [PATCH] Call maybe_clean_or_redirect_eh_stmt in gimple_fold_call (PR tree-optimization/51481)

2011-12-13 Thread Jakub Jelinek
On Tue, Dec 13, 2011 at 01:05:30PM +0100, Richard Guenther wrote:
> Yeah, I'm testing a followup patch that fixes the call in replace_uses_by
> (that alone fixes the testcase in the PR).

That patch looks good, but then forwprop has the same
static void
tidy_after_forward_propagate_addr (gimple stmt)
{
  /* We may have turned a trapping insn into a non-trapping insn.  */
  if (maybe_clean_or_replace_eh_stmt (stmt, stmt)
  && gimple_purge_dead_eh_edges (gimple_bb (stmt)))
cfg_changed = true;
...

Other spots are maybe ok, so if you want, the
maybe_cleanup_or_replace_eh_stmt call from gimple_fold_call can be nuked
afterwards.  But what about that lookup_*/tree_could_throw_p check?
Should it stay?

Jakub


Re: [PATCH] Call maybe_clean_or_redirect_eh_stmt in gimple_fold_call (PR tree-optimization/51481)

2011-12-13 Thread Richard Guenther
On Tue, Dec 13, 2011 at 1:00 PM, Jakub Jelinek  wrote:
> On Tue, Dec 13, 2011 at 11:52:38AM +0100, Richard Guenther wrote:
>> On Mon, Dec 12, 2011 at 6:01 PM, Jakub Jelinek  wrote:
>> > In gimple_fold_call (called from fold_stmt from replace_uses_by from cfg
>> > cleanup) we weren't calling maybe_clean_or_replace_eh_stmt, so when
>> > we've replaced a printf call (which can throw) with puts (which can throw
>> > too), nothing would update EH stmts.  It would be problematic calling
>> > gimple_purge_dead_eh_edges, because callers might be surprised by that,
>> > especially when this happens during cfg cleanup, so instead I just assert
>> > it is not needed and don't try to fold if a throwing stmt would be replaced
>> > by non-throwing.  FAB pass can handle that instead.  No folding
>> > has been actually disabled because of that check during bootstrap/regtest,
>> > so it is there just in case.
>
> Note, I've already committed the patch yesterday.

Yes, I've seen that.  I think it's ugly and inconsistent - we can
certaily fold non-call stmts from throwing to non-throwing.
Think of -fnon-call-exceptions and x + 1.3 and folding after
propagating 1.0 to x.

It is not fold_stmts business to care about EH info or edges
(it works on statements in isolation).

>> I think that all callers of fold_stmt are supposed to handle EH
>> updating themselves, similar to how they are supposed to
>> call update_stmt.  I see that replace_uses_by does call
>
> Some do something, but others don't.  Do you think it is preferable when
> the callers do that (all of them)?  Even if that is chosen, while some could
> purge dead eh edges, other places can't do that IMHO, so either we need to
> simply not do transformations that remove EH edges in fold_stmt, or have an
> argument that is passed down whether it is ok to do so.

I think all callers can (should be able to) do that.  In the light of
-fnon-call-exceptions we probably miss quite a few - but we should
fix them.

[Unfortunately some of our propagation helpers do not, or do not have
a good way of returning whether EH/CFG cleanup is necessary,
notably gsi_remove, which removes the stmt from the EH tables
but does not purge edges.]

Richard.


Re: [PATCH] Call maybe_clean_or_redirect_eh_stmt in gimple_fold_call (PR tree-optimization/51481)

2011-12-13 Thread Richard Guenther
On Tue, Dec 13, 2011 at 1:00 PM, Jakub Jelinek  wrote:
> On Tue, Dec 13, 2011 at 11:52:38AM +0100, Richard Guenther wrote:
>> On Mon, Dec 12, 2011 at 6:01 PM, Jakub Jelinek  wrote:
>> > In gimple_fold_call (called from fold_stmt from replace_uses_by from cfg
>> > cleanup) we weren't calling maybe_clean_or_replace_eh_stmt, so when
>> > we've replaced a printf call (which can throw) with puts (which can throw
>> > too), nothing would update EH stmts.  It would be problematic calling
>> > gimple_purge_dead_eh_edges, because callers might be surprised by that,
>> > especially when this happens during cfg cleanup, so instead I just assert
>> > it is not needed and don't try to fold if a throwing stmt would be replaced
>> > by non-throwing.  FAB pass can handle that instead.  No folding
>> > has been actually disabled because of that check during bootstrap/regtest,
>> > so it is there just in case.
>
> Note, I've already committed the patch yesterday.
>
>> I think that all callers of fold_stmt are supposed to handle EH
>> updating themselves, similar to how they are supposed to
>> call update_stmt.  I see that replace_uses_by does call
>
> Some do something, but others don't.  Do you think it is preferable when
> the callers do that (all of them)?  Even if that is chosen, while some could
> purge dead eh edges, other places can't do that IMHO, so either we need to
> simply not do transformations that remove EH edges in fold_stmt, or have an
> argument that is passed down whether it is ok to do so.
>
>> maybe_clean_or_replace_eh_stmt - are you sure it is this path
>> the issue triggers on?
>
> Yes, I'm sure this was in replace_uses_by.  Apparently it calls
> maybe_clean_or_replace_eh_stmt, but incorrectly (similarly forwprop in some
> cases doesn't call it at all, in other places incorrectly):
>
>  maybe_clean_or_replace_eh_stmt (stmt, stmt);
>
> This does nothing.  It must be called with the old stmt and new stmt it was
> replaced with, otherwise, when it is called just with new stmt as in this
> place lookup_stmt_eh_lp will just return 0 (unless fold_stmt did call it 
> already
> properly).

Yeah, I'm testing a followup patch that fixes the call in replace_uses_by
(that alone fixes the testcase in the PR).

Richard.

>        Jakub


Re: [PATCH] _GCC_PICFLAG: use -fPIC for s390x targets

2011-12-13 Thread Andreas Krebbel
On 12/07/2011 10:33 AM, Rainer Orth wrote:
> Mike Frysinger  writes:
>>  s390*-*-*)
>>  $1=-fpic
>>  ;;
> 
> Perhaps it's better to remove both s390* cases and use the -fPIC default
> everywhere, as does libtool.  picflag.m4 is supposed to be usable
> everywhere.

>From a performance perspective the "fix after breakage" approach still would 
>make sense
since GCC generates slightly faster code with -fpic compared to -fPIC. On the 
other hand
the user visible shared libs like libgomp, libgfortran, and libstdc++ are 
already built
with -fPIC anyway. We recently also enabled -fPIC for libgcc_s.so. And now we 
are about to
do the same for libiberty. So I think there does not remain much anyway - or am 
I missing
something?!

So I agree that removing the s390*-*-* case entirely in picflag.m4 will be the 
better way
to fix this. We then also should revert that one:
http://gcc.gnu.org/ml/gcc-patches/2011-11/msg00643.html

Bye,

-Andreas-



Re: [PATCH] Call maybe_clean_or_redirect_eh_stmt in gimple_fold_call (PR tree-optimization/51481)

2011-12-13 Thread Jakub Jelinek
On Tue, Dec 13, 2011 at 11:52:38AM +0100, Richard Guenther wrote:
> On Mon, Dec 12, 2011 at 6:01 PM, Jakub Jelinek  wrote:
> > In gimple_fold_call (called from fold_stmt from replace_uses_by from cfg
> > cleanup) we weren't calling maybe_clean_or_replace_eh_stmt, so when
> > we've replaced a printf call (which can throw) with puts (which can throw
> > too), nothing would update EH stmts.  It would be problematic calling
> > gimple_purge_dead_eh_edges, because callers might be surprised by that,
> > especially when this happens during cfg cleanup, so instead I just assert
> > it is not needed and don't try to fold if a throwing stmt would be replaced
> > by non-throwing.  FAB pass can handle that instead.  No folding
> > has been actually disabled because of that check during bootstrap/regtest,
> > so it is there just in case.

Note, I've already committed the patch yesterday.

> I think that all callers of fold_stmt are supposed to handle EH
> updating themselves, similar to how they are supposed to
> call update_stmt.  I see that replace_uses_by does call

Some do something, but others don't.  Do you think it is preferable when
the callers do that (all of them)?  Even if that is chosen, while some could
purge dead eh edges, other places can't do that IMHO, so either we need to
simply not do transformations that remove EH edges in fold_stmt, or have an
argument that is passed down whether it is ok to do so.

> maybe_clean_or_replace_eh_stmt - are you sure it is this path
> the issue triggers on?

Yes, I'm sure this was in replace_uses_by.  Apparently it calls
maybe_clean_or_replace_eh_stmt, but incorrectly (similarly forwprop in some
cases doesn't call it at all, in other places incorrectly):

  maybe_clean_or_replace_eh_stmt (stmt, stmt);

This does nothing.  It must be called with the old stmt and new stmt it was
replaced with, otherwise, when it is called just with new stmt as in this
place lookup_stmt_eh_lp will just return 0 (unless fold_stmt did call it already
properly).

Jakub


[Ada] Add more vectorization (sub-)tests

2011-12-13 Thread Eric Botcazou
Tested on i586-suse-linux, applied on the mainline.


2011-12-13  Eric Botcazou  

* gnat.dg/vect1.ad[sb]: Add more tests.
* gnat.dg/vect2.ad[sb]: Likewise.
* gnat.dg/vect3.ad[sb]: Likewise.
* gnat.dg/vect4.ad[sb]: Likewise.
* gnat.dg/vect5.ad[sb]: Likewise.
* gnat.dg/vect6.ad[sb]: Likewise.


-- 
Eric Botcazou
Index: gnat.dg/vect4.adb
===
--- gnat.dg/vect4.adb	(revision 182203)
+++ gnat.dg/vect4.adb	(working copy)
@@ -12,6 +12,13 @@ package body Vect4 is
   return R;
end;
 
+   procedure Add (X : Varray; Y : Long_Float; R : out Varray) is
+   begin
+  for I in X'Range loop
+ R(I) := X(I) + Y;
+  end loop;
+   end;
+
procedure Add (X : not null access Varray; Y : Long_Float; R : not null access Varray) is
begin
   for I in X'Range loop
@@ -29,6 +36,13 @@ package body Vect4 is
   return R;
end;
 
+   procedure Add (X : Sarray; Y : Long_Float; R : out Sarray) is
+   begin
+  for I in Sarray'Range loop
+ R(I) := X(I) + Y;
+  end loop;
+   end;
+
procedure Add (X : not null access Sarray; Y : Long_Float; R : not null access Sarray) is
begin
   for I in Sarray'Range loop
@@ -46,6 +60,13 @@ package body Vect4 is
   return R;
end;
 
+   procedure Add (X : Darray1; Y : Long_Float; R : out Darray1) is
+   begin
+  for I in Darray1'Range loop
+ R(I) := X(I) + Y;
+  end loop;
+   end;
+
procedure Add (X : not null access Darray1; Y : Long_Float; R : not null access Darray1) is
begin
   for I in Darray1'Range loop
@@ -63,6 +84,13 @@ package body Vect4 is
   return R;
end;
 
+   procedure Add (X : Darray2; Y : Long_Float; R : out Darray2) is
+   begin
+  for I in Darray2'Range loop
+ R(I) := X(I) + Y;
+  end loop;
+   end;
+
procedure Add (X : not null access Darray2; Y : Long_Float; R : not null access Darray2) is
begin
   for I in Darray2'Range loop
@@ -80,6 +108,13 @@ package body Vect4 is
   return R;
end;
 
+   procedure Add (X : Darray3; Y : Long_Float; R : out Darray3) is
+   begin
+  for I in Darray3'Range loop
+ R(I) := X(I) + Y;
+  end loop;
+   end;
+
procedure Add (X : not null access Darray3; Y : Long_Float; R : not null access Darray3) is
begin
   for I in Darray3'Range loop
@@ -89,5 +124,5 @@ package body Vect4 is
 
 end Vect4;
 
--- { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect"  } }
+-- { dg-final { scan-tree-dump-times "vectorized 1 loops" 15 "vect"  } }
 -- { dg-final { cleanup-tree-dump "vect" } }
Index: gnat.dg/vect4.ads
===
--- gnat.dg/vect4.ads	(revision 182203)
+++ gnat.dg/vect4.ads	(working copy)
@@ -8,6 +8,7 @@ package Vect4 is
for Varray'Alignment use 16;
 
function "+" (X : Varray; Y : Long_Float) return Varray;
+   procedure Add (X : Varray; Y : Long_Float; R : out Varray);
procedure Add (X : not null access Varray; Y : Long_Float; R : not null access Varray);
 
 
@@ -16,6 +17,7 @@ package Vect4 is
for Sarray'Alignment use 16;
 
function "+" (X : Sarray; Y : Long_Float) return Sarray;
+   procedure Add (X : Sarray; Y : Long_Float; R : out Sarray);
procedure Add (X : not null access Sarray; Y : Long_Float; R : not null access Sarray);
 
 
@@ -23,6 +25,7 @@ package Vect4 is
for Darray1'Alignment use 16;
 
function "+" (X : Darray1; Y : Long_Float) return Darray1;
+   procedure Add (X : Darray1; Y : Long_Float; R : out Darray1);
procedure Add (X : not null access Darray1; Y : Long_Float; R : not null access Darray1);
 
 
@@ -30,6 +33,7 @@ package Vect4 is
for Darray2'Alignment use 16;
 
function "+" (X : Darray2; Y : Long_Float) return Darray2;
+   procedure Add (X : Darray2; Y : Long_Float; R : out Darray2);
procedure Add (X : not null access Darray2; Y : Long_Float; R : not null access Darray2);
 
 
@@ -37,6 +41,7 @@ package Vect4 is
for Darray3'Alignment use 16;
 
function "+" (X : Darray3; Y : Long_Float) return Darray3;
+   procedure Add (X : Darray3; Y : Long_Float; R : out Darray3);
procedure Add (X : not null access Darray3; Y : Long_Float; R : not null access Darray3);
 
 end Vect4;
Index: gnat.dg/vect5.adb
===
--- gnat.dg/vect5.adb	(revision 182203)
+++ gnat.dg/vect5.adb	(working copy)
@@ -12,6 +12,13 @@ package body Vect5 is
   return R;
end;
 
+   procedure Add (X : Varray; Y : Long_Float; R : out Varray) is
+   begin
+  for I in X'Range loop
+ R(I) := X(I) + Y;
+  end loop;
+   end;
+
procedure Add (X : not null access Varray; Y : Long_Float; R : not null access Varray) is
begin
   for I in X'Range loop
@@ -29,6 +36,13 @@ package body Vect5 is
   return R;
end;
 
+   procedure Add (X : Sarray; Y : Long_Float; R : out Sarray) is
+   begin
+  for 

  1   2   >