Go patch committed: assign phase of escape analysis

2016-06-13 Thread Ian Lance Taylor
This patch by Chris Manghane implements the assign phase of escape
analysis.  This builds a graph of assignments within a function.  This
is just another step toward escape analysis; it is not yet enabled.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 237286)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-054ff1ece3dd5888a445efeaf3ae197b16d4186f
+f768153eb2a7a72587c9c0997955cdbbc70322d0
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/escape.cc
===
--- gcc/go/gofrontend/escape.cc (revision 236804)
+++ gcc/go/gofrontend/escape.cc (working copy)
@@ -88,6 +88,55 @@ Node::set_encoding(int enc)
 }
 
 bool
+Node::is_big(Escape_context* context) const
+{
+  Type* t = this->type();
+  if (t == NULL
+  || t->is_call_multiple_result_type()
+  || t->is_sink_type()
+  || t->is_void_type()
+  || t->is_abstract())
+return false;
+
+  int64_t size;
+  bool ok = t->backend_type_size(context->gogo(), );
+  bool big = ok && (size < 0 || size > 10 * 1024 * 1024);
+
+  if (this->expr() != NULL)
+{
+  if (this->expr()->allocation_expression() != NULL)
+   {
+ ok = t->deref()->backend_type_size(context->gogo(), );
+ big = big || size <= 0 || size >= (1 << 16);
+   }
+  else if (this->expr()->call_expression() != NULL)
+   {
+ Call_expression* call = this->expr()->call_expression();
+ Func_expression* fn = call->fn()->func_expression();
+ if (fn != NULL
+ && fn->is_runtime_function()
+ && (fn->runtime_code() == Runtime::MAKESLICE1
+ || fn->runtime_code() == Runtime::MAKESLICE2
+ || fn->runtime_code() == Runtime::MAKESLICE1BIG
+ || fn->runtime_code() == Runtime::MAKESLICE2BIG))
+   {
+ // Second argument is length.
+ Expression_list::iterator p = call->args()->begin();
+ ++p;
+
+ Numeric_constant nc;
+ unsigned long v;
+ if ((*p)->numeric_constant_value()
+ && nc.to_unsigned_long() == Numeric_constant::NC_UL_VALID)
+   big = big || v >= (1 << 16);
+   }
+   }
+}
+
+  return big;
+}
+
+bool
 Node::is_sink() const
 {
   if (this->object() != NULL
@@ -161,6 +210,37 @@ Node::max_encoding(int e, int etype)
 // Return a modified encoding for an input parameter that flows into an
 // output parameter.
 
+int
+Node::note_inout_flows(int e, int index, Level level)
+{
+  // Flow+level is encoded in two bits.
+  // 00 = not flow, xx = level+1 for 0 <= level <= maxEncodedLevel.
+  // 16 bits for Esc allows 6x2bits or 4x3bits or 3x4bits if additional
+  // information would be useful.
+  if (level.value() <= 0 && level.suffix_value() > 0)
+return Node::max_encoding(e|ESCAPE_CONTENT_ESCAPES, Node::ESCAPE_NONE);
+  if (level.value() < 0)
+return Node::ESCAPE_HEAP;
+  if (level.value() >  ESCAPE_MAX_ENCODED_LEVEL)
+level = Level::From(ESCAPE_MAX_ENCODED_LEVEL);
+
+  int encoded = level.value() + 1;
+  int shift = ESCAPE_BITS_PER_OUTPUT_IN_TAG * index + ESCAPE_RETURN_BITS;
+  int old = (e >> shift) & ESCAPE_BITS_MASK_FOR_TAG;
+  if (old == 0
+  || (encoded != 0 && encoded < old))
+old = encoded;
+
+  int encoded_flow = old << shift;
+  if (((encoded_flow >> shift) & ESCAPE_BITS_MASK_FOR_TAG) != old)
+{
+  // Failed to encode.  Put this on the heap.
+  return Node::ESCAPE_HEAP;
+}
+
+  return (e & ~(ESCAPE_BITS_MASK_FOR_TAG << shift)) | encoded_flow;
+}
+
 // Class Escape_context.
 
 Escape_context::Escape_context(Gogo* gogo, bool recursive)
@@ -493,14 +573,1258 @@ Gogo::discover_analysis_sets()
   this->traverse();
 }
 
+// Traverse all label and goto statements and mark the underlying label
+// as looping or not looping.
+
+class Escape_analysis_loop : public Traverse
+{
+ public:
+  Escape_analysis_loop()
+: Traverse(traverse_statements)
+  { }
+
+  int
+  statement(Block*, size_t*, Statement*);
+};
+
+int
+Escape_analysis_loop::statement(Block*, size_t*, Statement* s)
+{
+  if (s->label_statement() != NULL)
+s->label_statement()->label()->set_nonlooping();
+  else if (s->goto_statement() != NULL)
+{
+  if (s->goto_statement()->label()->nonlooping())
+s->goto_statement()->label()->set_looping();
+}
+  return TRAVERSE_CONTINUE;
+}
+
+// Traversal class used to look at all interesting statements within a function
+// in order to build a connectivity graph between all nodes within a context's
+// scope.
+
+class Escape_analysis_assign : public Traverse
+{
+public:
+  Escape_analysis_assign(Escape_context* context, Named_object* fn)
+: Traverse(traverse_statements
+  | traverse_expressions),
+  context_(context), fn_(fn)

Re: CppCoreGuidelines warnings

2016-06-13 Thread Jason Merrill
On Sat, Jun 11, 2016 at 8:57 PM, Christopher Di Bella  wrote:
>> I'm currently waiting on approval from my employer before I move ahead
>with anything
>
> My employer has given me the okay to contribute to gcc, provided that I
> follow some fairly straightforward rules. Most of these things are given,
> such as "don't contribute to gcc while at work", "don't put work code in your
> contributions or vice versa", etc. Of course, my company needs to make it
> clear that I understand these rules before I'm given a green light.
> 
>> Note also that if you want to learn the process, small patches do not need
>any legal papers
>
> I'm going to start with a few minor patches, which I refrained from until I
> they gave approval, and then move up in the world.
> 
>> You can coordinate with me about front end changes.
>
> I am hoping you mean compiler front-end (i.e. syntax, semantic, static
> analysis, etc.), rather than application front-end (flags, etc.), as
> the compiler front-end is the section I'm most interested in contributing to.

Yes, that's right.

> 
>> To incorporate the checks into GCC would probably involve changes to ...
>the C++ library
>
> I'm also happy to contribute to both an improved C++ Standard Library and
> stdlibc++, but don't want to spread myself too thin (I'm gearing my career for
> compiler development, and thus would like to work on the front-end a little
> more). Would it be better to work on this before, after, or in parallel with 
> the
> front-end?

That's really up to you.  If you're most interested in the front end,
starting there makes sense.

> 
>> Currently yes, but it was supposed to be released as open source.
>> Some of the C++ Core Guidelines checks are already implemented in
>clang-tidy:
>
> Does this mean that in your opinion, we (mostly me) should contribute to
> one of those projects instead, or are they just cool projects to watch?

They might be interesting to look at, but we'd definitely like to have
support for this in GCC.

Jason


Re: [PATCH, i386]: Introduce __builtin_signbitq to use SSE4.1 PTEST insn

2016-06-13 Thread Uros Bizjak
On Tue, Jun 14, 2016 at 12:50 AM, Uros Bizjak  wrote:
> On Mon, Jun 13, 2016 at 11:54 PM, Joseph Myers  
> wrote:
>
>>> Attached patch intriduces __builtin_signbitq built-in function, so the
>>> compiler will be able to use SSE4.1 PTEST instruction to determine
>>> sign bit of __float128 value.
>>
>> The __builtin_signbit function is type-generic from GCC 6 onwards, so I
>> don't see any need for this type-specific function.  (The .md pattern may
>> still be useful, of course, for better expansion of type-generic
>> __builtin_signbit on float128 arguments.)
>>
>>> The patch introduces complete infrastructure, including fallback to
>>> __signbittf2 libgcc function for non-SSE4.1 targets.
>>
>> I don't see any need for a libgcc fallback either.  Generic code in GCC
>> should always be able to implement signbit using bit-manipulation, without
>> needing any library fallback.

After some more head scratching, I have reverted my v1 patch and
committed the following revision. It works like magic, without any
libgcc fallbacks.

Thanks for guiding me to the right direction, and sorry for the troubles!

2016-06-13  Uros Bizjak  

* config/i386/i386.md (signbittf2): New expander.
* config/i386/sse.md (ptesttf2): New insn pattern.

testsuite/ChangeLog:

2016-06-13  Uros Bizjak  

* gcc.target/i386/float128-3.c: New test.
* gcc.target/i386/quad-sse4.c: Ditto.
* gcc.target/i386/quad-sse.c: Use -msse instead of -msse2.
Update scan strings.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 237382)
+++ config/i386/i386.md (working copy)
@@ -16198,6 +16198,22 @@
   DONE;
 })
 
+(define_expand "signbittf2"
+  [(use (match_operand:SI 0 "register_operand"))
+   (use (match_operand:TF 1 "register_operand"))]
+  "TARGET_SSE4_1"
+{
+  rtx mask = ix86_build_signbit_mask (TFmode, 0, 0);
+  rtx scratch = gen_reg_rtx (QImode);
+
+  emit_insn (gen_ptesttf2 (operands[1], mask));
+  ix86_expand_setcc (scratch, NE,
+gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
+
+  emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
+  DONE;
+})
+
 (define_expand "signbitxf2"
   [(use (match_operand:SI 0 "register_operand"))
(use (match_operand:XF 1 "register_operand"))]
Index: config/i386/sse.md
===
--- config/i386/sse.md  (revision 237380)
+++ config/i386/sse.md  (working copy)
@@ -15212,6 +15212,19 @@
  (const_string "*")))
(set_attr "mode" "")])
 
+(define_insn "ptesttf2"
+  [(set (reg:CC FLAGS_REG)
+   (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
+   (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
+  UNSPEC_PTEST))]
+  "TARGET_SSE4_1"
+  "%vptest\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssecomi")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "mode" "TI")])
+
 (define_insn "_round"
   [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(unspec:VF_128_256
Index: testsuite/gcc.target/i386/float128-3.c
===
--- testsuite/gcc.target/i386/float128-3.c  (nonexistent)
+++ testsuite/gcc.target/i386/float128-3.c  (working copy)
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+
+#include "sse4_1-check.h"
+
+int signbit (__float128);
+
+extern void abort (void);
+
+static void
+sse4_1_test (void)
+{
+  static volatile __float128 a;
+
+  a = -1.2q;
+  if (!signbit (a))
+abort ();
+
+  a = 1.2q;
+  if (signbit (a))
+abort ();
+}
Index: gcc/testsuite/gcc.target/i386/quad-sse.c
===
--- gcc/testsuite/gcc.target/i386/quad-sse.c(revision 237380)
+++ gcc/testsuite/gcc.target/i386/quad-sse.c(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse" } */
 
 __float128 x, y;
 
@@ -18,4 +18,4 @@ __float128 test_3(void)
   return __builtin_copysignq (x, y);
 }
 
-/* { dg-final { scan-assembler-not "call.*(neg|fabs|copysign)" } } */
+/* { dg-final { scan-assembler-not "neg|fabs|copysign" } } */
Index: testsuite/gcc.target/i386/quad-sse4.c
===
--- testsuite/gcc.target/i386/quad-sse4.c   (nonexistent)
+++ testsuite/gcc.target/i386/quad-sse4.c   (working copy)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+
+int signbit (__float128);
+
+__float128 x;
+
+int __test_1(void)
+{
+  return signbit (x);
+}
+
+/* { dg-final { scan-assembler-not "signbit" } } */

Re: [PATCH, i386]: Introduce __builtin_signbitq to use SSE4.1 PTEST insn

2016-06-13 Thread Uros Bizjak
On Mon, Jun 13, 2016 at 11:54 PM, Joseph Myers  wrote:

>> Attached patch intriduces __builtin_signbitq built-in function, so the
>> compiler will be able to use SSE4.1 PTEST instruction to determine
>> sign bit of __float128 value.
>
> The __builtin_signbit function is type-generic from GCC 6 onwards, so I
> don't see any need for this type-specific function.  (The .md pattern may
> still be useful, of course, for better expansion of type-generic
> __builtin_signbit on float128 arguments.)
>
>> The patch introduces complete infrastructure, including fallback to
>> __signbittf2 libgcc function for non-SSE4.1 targets.
>
> I don't see any need for a libgcc fallback either.  Generic code in GCC
> should always be able to implement signbit using bit-manipulation, without
> needing any library fallback.

The problem is in fact that on x86_64 __float128 values live
exclusively in SSE registers exclusively. Apart from PTEST, there are
no convenient instructions to test bits in high part of the SSE
register. So, we would have to move SSE value to memory, load
high-part to an integer register, test the bit in the integer register
and set the flag in the output register to obtain setCC -> jCC
optimization.

Also, please note that there is no generic support for __float128 or
TFmode optimizations in the compiler. Long-double functions (e.g.
signbitl) that are supported by generic functionality correspond to
80bit XFmode. All bit manipulations involving__float128 have to be
done by hand.

Due to above reasons, I have taken the path that is already
implemented in libgcc (__builtin_fabsq and __builtin_copysignq
fallbacks when SSE is not present). Fallback functions actually
implement exactly the same functionalty as fabsq, copysignq and
signbitq functions in libquadmath. *If* we really want to avoid
fallbacks, it is possible to add RTL code to the relevant expanders,
but it will be quite some work for a questionable gain.

>> I have changed libquadmath to use __builtin_signbitq, and there were
>> numerous places, where the call to signbitq + test + conditional jump
>> reduced to e.g.:
>
> Current glibc systematically uses type-generic classification macros such
> as signbit where they exist in , rather than direct calls to
> __signbitl etc. such as were formerly used.

Please note that we are dealing with __float128 types. In contrast to
float, double and long double, this type is non-standard and not known
to glibc, as evident from the code snippet below:

/* Return nonzero value if sign of X is negative.  */
# ifdef __NO_LONG_DOUBLE_MATH
#  define signbit(x) \
 (sizeof (x) == sizeof (float) ? __signbitf (x) : __signbit (x))
# else
#  define signbit(x) \
 (sizeof (x) == sizeof (float)  \
  ? __signbitf (x)  \
  : sizeof (x) == sizeof (double)  \
  ? __signbit (x) : __signbitl (x))
# endif

> Thus, I don't think changes to use __builtin_signbitq should go into
> libquadmath.  Rather, it should be updated for the past few years' changes
> in glibc (this is long overdue), with some header used in building
> libquadmath being made to define signbit, isfinite etc. to use the
> type-generic built-in functions, and such type-generic macro calls (as in
> glibc) replacing libquadmath's calls to signbitq, finiteq, isinfq etc.

I don't see other way to instruct the compiler to overload e.g.
signbitq. This is non-standard, made-up function name, and the
compiler has no knowledge what to do with it. As far as the compiler
is concerned, it is just a function that happens to have TFmode
arguments.

Uros.


Re: [PATCH, i386]: Introduce __builtin_signbitq to use SSE4.1 PTEST insn

2016-06-13 Thread Joseph Myers
On Mon, 13 Jun 2016, Uros Bizjak wrote:

> Hello!
> 
> Attached patch intriduces __builtin_signbitq built-in function, so the
> compiler will be able to use SSE4.1 PTEST instruction to determine
> sign bit of __float128 value.

The __builtin_signbit function is type-generic from GCC 6 onwards, so I 
don't see any need for this type-specific function.  (The .md pattern may 
still be useful, of course, for better expansion of type-generic 
__builtin_signbit on float128 arguments.)

> The patch introduces complete infrastructure, including fallback to
> __signbittf2 libgcc function for non-SSE4.1 targets.

I don't see any need for a libgcc fallback either.  Generic code in GCC 
should always be able to implement signbit using bit-manipulation, without 
needing any library fallback.

> I have changed libquadmath to use __builtin_signbitq, and there were
> numerous places, where the call to signbitq + test + conditional jump
> reduced to e.g.:

Current glibc systematically uses type-generic classification macros such 
as signbit where they exist in , rather than direct calls to 
__signbitl etc. such as were formerly used.

Thus, I don't think changes to use __builtin_signbitq should go into 
libquadmath.  Rather, it should be updated for the past few years' changes 
in glibc (this is long overdue), with some header used in building 
libquadmath being made to define signbit, isfinite etc. to use the 
type-generic built-in functions, and such type-generic macro calls (as in 
glibc) replacing libquadmath's calls to signbitq, finiteq, isinfq etc.

-- 
Joseph S. Myers
jos...@codesourcery.com


[Bug middle-end/71488] [6/7 Regression] Wrong code for vector comparisons with ivybridge and westmere targets

2016-06-13 Thread glisse at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71488

--- Comment #2 from Marc Glisse  ---
Independently of the wrong code issue, we are generating pretty bad code on
Uros' testcase. It is full of operator delete(0) and operator new(0). The first
one we could drop, but the second one is forced by the C++ standard to allocate
at least one byte (or throw). It probably comes from the copy constructor of
valarray. And even when I help with the usual:
  __attribute__((returns_nonnull)) __typeof__(malloc) malloc;
  inline void* operator new(std::size_t n){return malloc(n);}
  inline void operator delete(void*p)noexcept{free(p);}
the .optimized dump still has things like
  MEM[(struct valarray *)_61]._M_data = _45;
  _46 = MEM[(struct valarray *)_61]._M_data;
because of how late other optimizations happened. Quite a common occurrence
with C++ code :-(

Re: _Bool and trap representations

2016-06-13 Thread Alexander Cherepanov

On 2016-06-14 00:13, Joseph Myers wrote:

On Tue, 14 Jun 2016, Alexander Cherepanov wrote:


The problem is that parts of representations of two different ordinary values
can form a trap representation.


Oh, you're talking about normalizing the destination rather than the
source of the copy?


Yes.

I don't see this problem with a current gcc so the problem is 
hypothetical AFAICT.


--
Alexander Cherepanov


[Bug sanitizer/71498] ubsan bounds checking influenced by surrounding code

2016-06-13 Thread jakub at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71498

--- Comment #7 from Jakub Jelinek  ---
Fixed for 6.2+ so far.

[PATCH, i386]: Introduce __builtin_signbitq to use SSE4.1 PTEST insn

2016-06-13 Thread Uros Bizjak
Hello!

Attached patch intriduces __builtin_signbitq built-in function, so the
compiler will be able to use SSE4.1 PTEST instruction to determine
sign bit of __float128 value.

The patch introduces complete infrastructure, including fallback to
__signbittf2 libgcc function for non-SSE4.1 targets.

I have changed libquadmath to use __builtin_signbitq, and there were
numerous places, where the call to signbitq + test + conditional jump
reduced to e.g.:

e0d8:66 0f 38 17 35 4f a6 ptest  0x1a64f(%rip),%xmm6
 # 28730 <_fini+0x24>
e0df:01 00
e0e1:74 19je e0fc
<__quadmath_kernel_sincosq+0x24c>

2016-06-13  Uros Bizjak  

* config/i386/i386-builtin-types.def (INT_FTYPE_FLOAT128):
New function type.
* config/i386/i386.c (enum ix86_builtins) [IX86_BUILTIN_SIGNBITQ]: New.
(ix86_init_builtins): Add __builtin_signbitq function.
(ix86_expand_args_builtin): Handle INT_FTYPE_FLOAT128.
(ix86_expand_builtin): Handle IX86_BUILTIN_SIGNBITQ.
* config/i386/i386.md (signbittf2): New expander.
* config/i386/sse.md (ptesttf2): New insn pattern.
* doc/extend.texi (x86 Built-in Functions): Document
__builtin_signbitq.

libgcc/ChangeLog:

2016-06-13  Uros Bizjak  

* config.host (i[34567]86-*-* | x86_64-*-*): Always include
i386/${host_address}/t-softfp in tmake_file.
* config/i386/32/t-softfp: Update comment for __builtin_copysignq.
* config/i386/32/tf-signs.c: Add __signbittf2 fallback function.
* config/i386/64/t-softfp: New file.
* config/i386/64/tf-signs.c: Ditto.
* config/i386/libgcc-bsd.ver: Add __signbittf2.
* config/i386/libgcc-glibc.ver: Ditto.
* config/i386/libgcc-sol2.ver: Ditto.

testsuite/ChangeLog:

2016-06-13  Uros Bizjak  

* gcc.target/i386/float128-3.c: New test.
* gcc.target/i386/quad-sse4.c: Ditto.
* gcc.target/i386/quad-sse.c: Use -msse instead of -msse2.
Update scan strings.

Patch was bootstrapped and regression tested on x86_64-linux-gnu
{,-m32} with and without "--with-arch=corei7 --with-cpu=corei7"
configured compiler. The functionality was also tested by
__builtin_signbitq amended libquadmath library, where ptest insn
generation and a fallback to __signbittf2 support function were
exercised.

Committed to mainline SVN.

Uros.
Index: gcc/config/i386/i386-builtin-types.def
===
--- gcc/config/i386/i386-builtin-types.def  (revision 237380)
+++ gcc/config/i386/i386-builtin-types.def  (working copy)
@@ -202,6 +202,7 @@ DEF_FUNCTION_TYPE (INT, V8QI)
 DEF_FUNCTION_TYPE (INT, V8SF)
 DEF_FUNCTION_TYPE (INT, V32QI)
 DEF_FUNCTION_TYPE (INT, PCCHAR)
+DEF_FUNCTION_TYPE (INT, FLOAT128)
 DEF_FUNCTION_TYPE (INT64, INT64)
 DEF_FUNCTION_TYPE (INT64, V2DF)
 DEF_FUNCTION_TYPE (INT64, V4SF)
Index: gcc/config/i386/i386.c
===
--- gcc/config/i386/i386.c  (revision 237380)
+++ gcc/config/i386/i386.c  (working copy)
@@ -32722,6 +32722,7 @@ enum ix86_builtins
   IX86_BUILTIN_NANSQ,
   IX86_BUILTIN_FABSQ,
   IX86_BUILTIN_COPYSIGNQ,
+  IX86_BUILTIN_SIGNBITQ,
 
   /* Vectorizer support builtins.  */
   IX86_BUILTIN_CPYSGNPS,
@@ -33983,6 +33984,8 @@ static const struct builtin_description bdesc_args
   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 
"__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) 
V2DI_FTYPE_V4SI_V4SI },
   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", 
IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
 
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_signbittf2, 0, IX86_BUILTIN_SIGNBITQ, 
UNKNOWN, (int) INT_FTYPE_FLOAT128 },
+
   /* SSE4.1 */
   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", 
IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", 
IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
@@ -38299,6 +38302,13 @@ ix86_init_builtins (void)
   TREE_READONLY (decl) = 1;
   ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
 
+  ftype = ix86_get_builtin_func_type (INT_FTYPE_FLOAT128);
+  decl = add_builtin_function ("__builtin_signbitq", ftype,
+  IX86_BUILTIN_SIGNBITQ, BUILT_IN_MD,
+  "__signbittf2", NULL_TREE);
+  TREE_READONLY (decl) = 1;
+  ix86_builtins[(int) IX86_BUILTIN_SIGNBITQ] = decl;
+
   ix86_init_tm_builtins ();
   ix86_init_mmx_sse_builtins ();
   ix86_init_mpx_builtins ();
@@ -39128,6 +39138,7 @@ ix86_expand_args_builtin (const struct builtin_des
 case INT_FTYPE_V4SF:
 case INT_FTYPE_V2DF:
 case INT_FTYPE_V32QI:
+case INT_FTYPE_FLOAT128:
 case V16QI_FTYPE_V16QI:
 case V8SI_FTYPE_V8SF:
 case V8SI_FTYPE_V4SI:
@@ -42638,17 +42649,27 @@ rdseed_step:
i < ARRAY_SIZE (bdesc_args);
i++, d++)
  

[Bug bootstrap/71481] [7 regression] ICE during selftest: input.c: test_reading_source_line

2016-06-13 Thread dmalcolm at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71481

--- Comment #10 from David Malcolm  ---
(In reply to Bernd Edlinger from comment #7)
> (In reply to David Malcolm from comment #3)
> > Candidate patch: https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00755.html
> 
> BTW: this patch seems not to remove the tempfile again.

I took the liberty of adding an:
  unlink (filename);
to the version of the patch I committed (r237414) as per the "obvious" rule.

With your fix (as r237383) I believe this bug is fixed.  The remaining issue is
the discussion of what to do about LANG, but maybe that's for the mailing list.

[Bug bootstrap/71481] [7 regression] ICE during selftest: input.c: test_reading_source_line

2016-06-13 Thread dmalcolm at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71481

--- Comment #9 from David Malcolm  ---
Author: dmalcolm
Date: Mon Jun 13 21:20:10 2016
New Revision: 237414

URL: https://gcc.gnu.org/viewcvs?rev=237414=gcc=rev
Log:
PR bootstrap/71481: fix input.c selftest

gcc/ChangeLog:
PR bootstrap/71481
* input.c (selftest::test_reading_source_line): Avoid reading from
__FILE__ by creating a tempfile with known content and reading
from that instead.


Modified:
trunk/gcc/ChangeLog
trunk/gcc/input.c

Re: _Bool and trap representations

2016-06-13 Thread Joseph Myers
On Tue, 14 Jun 2016, Alexander Cherepanov wrote:

> The problem is that parts of representations of two different ordinary values
> can form a trap representation.

Oh, you're talking about normalizing the destination rather than the 
source of the copy?

-- 
Joseph S. Myers
jos...@codesourcery.com


[Bug sanitizer/71498] ubsan bounds checking influenced by surrounding code

2016-06-13 Thread jakub at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71498

--- Comment #6 from Jakub Jelinek  ---
Author: jakub
Date: Mon Jun 13 21:08:36 2016
New Revision: 237412

URL: https://gcc.gnu.org/viewcvs?rev=237412=gcc=rev
Log:
PR sanitizer/71498
* c-gimplify.c (ubsan_walk_array_refs_r): Set *walk_subtrees = 0 on
all BIND_EXPRs, and on all BIND_EXPRs recurse also on BIND_EXPR_BODY.

* c-c++-common/ubsan/bounds-13.c: New test.

Added:
branches/gcc-6-branch/gcc/testsuite/c-c++-common/ubsan/bounds-13.c
Modified:
branches/gcc-6-branch/gcc/c-family/ChangeLog
branches/gcc-6-branch/gcc/c-family/c-gimplify.c
branches/gcc-6-branch/gcc/testsuite/ChangeLog

Re: _Bool and trap representations

2016-06-13 Thread Alexander Cherepanov

On 2016-06-13 22:51, Joseph Myers wrote:

On Mon, 13 Jun 2016, Alexander Cherepanov wrote:


Thanks for the info. IMHO this part of DR 260 has even more serious
consequences than the part about pointer provenance. It effectively prohibits
manual byte-by-byte (or any non-atomic) copying of objects for types like long
double. If an implementation decides to normalize a value in a variable during
copying it will see an inconsistent representation, e.g. a trap
representation. It's a sure way to get total garbage. I don't know if allowing


No, that's not the case; even if representations can change during
byte-by-byte copying, such copying of long double values is *still* safe.
All long double values for x86 long double have exactly one valid
representation in the value bits, and if the padding bits change during
copying it doesn't matter; it's only representations that are already trap
representations (unnormals, pseudo-* etc.) that might be interpreted
inconsistently.


The problem is that parts of representations of two different ordinary 
values can form a trap representation.


Suppose x = 1.0 and y = 0.0, i.e. they have the following 
representations (from high bytes to low bytes):


padding  signint & frac
 & exp
   |---| |---| |-|
x: 00 00 00 00 00 00 3f ff 80 00 00 00 00 00 00 00
y: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

Suppose that we copy from x to y byte-by-byte starting from high bytes. 
And suppose the normalization kicks in after copying 8 bytes. We have 
already copied the sign and the exponent but haven't yet overwritten the 
'Integer' bit of Significand so we have the following representation:


z: 00 00 00 00 00 00 3f ff 00 00 00 00 00 00 00 00

This is an unnormal and current gcc normalization converts it into 0.0 
throwing the exponent away. Copying the remaining 8 bytes leads to a 
pseudo-denormal:


w: 00 00 00 00 00 00 00 00 80 00 00 00 00 00 00 00

But this is already a minor detail.

The code to see how gcc normalizes 'z':

--
#include 
#include 

int main()
{
  long double d0, d;

  memcpy(, 
"\x00\x00\x00\x00\x00\x00\x00\x00\xff\x3f\x00\x00\x00\x00\x00\x00", 
sizeof d0);

  d = d0;

  printf("d = %Lf\n", d);
  for (unsigned char *p = (unsigned char *) + sizeof d; p > (unsigned 
char *))

printf("%02x ", *--p);
  printf("\n");
}
--

Results:

--
$ gcc -std=c11 -pedantic -Wall -Wextra -O3 test.c && ./a.out
d = 0.00
00 00 00 00 00 40 00 00 00 00 00 00 00 00 00 00
--

gcc version: gcc (GCC) 7.0.0 20160613 (experimental)

--
Alexander Cherepanov


[Bug sanitizer/71498] ubsan bounds checking influenced by surrounding code

2016-06-13 Thread jakub at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71498

--- Comment #5 from Jakub Jelinek  ---
Author: jakub
Date: Mon Jun 13 21:01:44 2016
New Revision: 237409

URL: https://gcc.gnu.org/viewcvs?rev=237409=gcc=rev
Log:
PR sanitizer/71498
* c-gimplify.c (ubsan_walk_array_refs_r): Set *walk_subtrees = 0 on
all BIND_EXPRs, and on all BIND_EXPRs recurse also on BIND_EXPR_BODY.

* c-c++-common/ubsan/bounds-13.c: New test.

Added:
trunk/gcc/testsuite/c-c++-common/ubsan/bounds-13.c
Modified:
trunk/gcc/c-family/ChangeLog
trunk/gcc/c-family/c-gimplify.c
trunk/gcc/testsuite/ChangeLog

[Bug preprocessor/71183] [7 Regression] gcc -E always gives __DATE__ and __TIME__ as Jan 1 1970 00:00:00

2016-06-13 Thread jakub at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71183

--- Comment #9 from Jakub Jelinek  ---
Author: jakub
Date: Mon Jun 13 21:00:07 2016
New Revision: 237408

URL: https://gcc.gnu.org/viewcvs?rev=237408=gcc=rev
Log:
PR preprocessor/71183
* c-ppoutput.c (init_pp_output): Set cb->get_source_date_epoch
to cb_get_source_date_epoch.

* gcc.dg/cpp/source_date_epoch-3.c: New test.

Added:
trunk/gcc/testsuite/gcc.dg/cpp/source_date_epoch-3.c
Modified:
trunk/gcc/c-family/ChangeLog
trunk/gcc/c-family/c-ppoutput.c
trunk/gcc/testsuite/ChangeLog

[Bug tree-optimization/71403] [7 Regression] wrong code (segfault) at -O3 on x86_64-linux-gnu

2016-06-13 Thread law at redhat dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71403

Jeffrey A. Law  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #9 from Jeffrey A. Law  ---
Fixed on the trunk.

[tree-optimization/71403] Do not allow threading to a deeper loop nest

2016-06-13 Thread Jeff Law


pr71403 (and its duplicates) show a problem where we thread a backedge 
from an outer loop to the header of an inner loop.


This looks all find and good at the CFG level, but it essentially 
combines the inner and outer loop with parts of the loop executing on 
some iterations, but not on others.  Worse yet, that will change the 
number of iterations of the loop, which wrecks havoc with the unroller.


This patch avoids those jumps threads.  It was also a good time to pull 
some path stack management out of a routine where it did not belong 
(convert_and_register_jump_thread_path).


Bootstrapped and regression tested on x86_64 linux.  Installing on the 
trunk.


jeff
commit 018f8824b168a5719defb8974efd110777b6b83b
Author: law 
Date:   Mon Jun 13 20:55:59 2016 +

PR tree-optimization/71403
* tree-ssa-threadbackward.c
(convert_and_register_jump_thread_path): No longer accept reference
to path.  Do not pop items off the path anymore.
(fsm_find_control_statement_thread_paths): Do not allow threading
to a deeper loop nest.  Pop the last item off the path here rather
than in convert_and_register_jump_thread_path.

PR tree-optimization/71403
* c-c++-common/ubsan/pr71403-1.c: New test.
* c-c++-common/ubsan/pr71403-2.c: New test.
* c-c++-common/ubsan/pr71403-3.c: New test.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237403 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 822e36f..91befb5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2016-06-13  Jeff Law  
+
+   PR tree-optimization/71403
+   * tree-ssa-threadbackward.c
+   (convert_and_register_jump_thread_path): No longer accept reference
+   to path.  Do not pop items off the path anymore.
+   (fsm_find_control_statement_thread_paths): Do not allow threading
+   to a deeper loop nest.  Pop the last item off the path here rather
+   than in convert_and_register_jump_thread_path.
+
 2016-06-13  Kelvin Nilsen  
 
* config/rs6000/rs6000.h (RS6000_BTM_COMMON): Add the
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d0dc9b7..6ba9050 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2016-06-13  Jeff Law  
+
+   PR tree-optimization/71403
+   * c-c++-common/ubsan/pr71403-1.c: New test.
+   * c-c++-common/ubsan/pr71403-2.c: New test.
+   * c-c++-common/ubsan/pr71403-3.c: New test.
+
 2016-06-13  Jakub Jelinek  
 
PR middle-end/71478
diff --git a/gcc/testsuite/c-c++-common/ubsan/pr71403-1.c 
b/gcc/testsuite/c-c++-common/ubsan/pr71403-1.c
new file mode 100644
index 000..f8f4867
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/ubsan/pr71403-1.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fsanitize=unreachable" } */
+
+char a = -97;
+int b, c, d, e;
+
+int
+main ()
+{
+  int g = d, h = 0, i = 1; 
+  for (; h < 3; h++)
+{
+  if (g > -1)
+{
+  int j;
+  g = j = 0;
+  for (; j < 5; j++)
+  L1:
+if (!i)
+  goto L1;
+  a = e;
+}
+  else
+i = 0;
+}
+  b = c / ~(a | 114);
+  __builtin_exit (0);
+}
diff --git a/gcc/testsuite/c-c++-common/ubsan/pr71403-2.c 
b/gcc/testsuite/c-c++-common/ubsan/pr71403-2.c
new file mode 100644
index 000..03b6e83
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/ubsan/pr71403-2.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fsanitize=unreachable" } */
+
+char a, c;
+short b;
+
+int
+main ()
+{
+  unsigned d = 0;
+  int e = 1;
+  for (a = 0; a < 2; a++)
+{
+  if (e)
+c--;
+  for (; d < 2; d++)
+for (b = 0; b; b++)
+  ;
+  e = 0;
+}
+  __builtin_exit (0);
+}
diff --git a/gcc/testsuite/c-c++-common/ubsan/pr71403-3.c 
b/gcc/testsuite/c-c++-common/ubsan/pr71403-3.c
new file mode 100644
index 000..1ab7736
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/ubsan/pr71403-3.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fsanitize=unreachable" } */
+
+
+int a, b, c, d;
+
+void
+fn1 ()
+{
+  for (c = 0; c < 2; c++)
+{
+  int e, f = 1;
+  for (e = 0; e < 2; e++)
+   {
+ if (!f)
+   return;
+ for (d = 0; d; d++)
+   f = b;
+   }
+}
+}
+
+int
+main ()
+{
+  for (; a < 1; a++)
+{
+  fn1 ();
+}
+  __builtin_exit (0);
+}
diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 139d376..9dd37ad 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -378,7 +378,7 @@ profitable_jump_thread_path (vec *,
register the path.   */
 
 static void
-convert_and_register_jump_thread_path (vec *,
+convert_and_register_jump_thread_path 

[Bug tree-optimization/71403] [7 Regression] wrong code (segfault) at -O3 on x86_64-linux-gnu

2016-06-13 Thread law at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71403

--- Comment #8 from Jeffrey A. Law  ---
Author: law
Date: Mon Jun 13 20:55:59 2016
New Revision: 237403

URL: https://gcc.gnu.org/viewcvs?rev=237403=gcc=rev
Log:
PR tree-optimization/71403
* tree-ssa-threadbackward.c
(convert_and_register_jump_thread_path): No longer accept reference
to path.  Do not pop items off the path anymore.
(fsm_find_control_statement_thread_paths): Do not allow threading
to a deeper loop nest.  Pop the last item off the path here rather
than in convert_and_register_jump_thread_path.

PR tree-optimization/71403
* c-c++-common/ubsan/pr71403-1.c: New test.
* c-c++-common/ubsan/pr71403-2.c: New test.
* c-c++-common/ubsan/pr71403-3.c: New test.

Added:
trunk/gcc/testsuite/c-c++-common/ubsan/pr71403-1.c
trunk/gcc/testsuite/c-c++-common/ubsan/pr71403-2.c
trunk/gcc/testsuite/c-c++-common/ubsan/pr71403-3.c
Modified:
trunk/gcc/ChangeLog
trunk/gcc/testsuite/ChangeLog
trunk/gcc/tree-ssa-threadbackward.c

Re: [PATCH] Fix ubsan handling of BIND_EXPR (PR sanitizer/71498)

2016-06-13 Thread Marek Polacek
On Mon, Jun 13, 2016 at 08:39:43PM +0200, Jakub Jelinek wrote:
> Hi!
> 
> As has been discussed in the original -fsanitize=bounds submission,
> walk_tree for BIND_EXPR walks the body and
> DECL_INITIAL/DECL_SIZE/DECL_SIZE_UNIT of all the BIND_EXPR_VARS.
> For -fsanitize=bounds instrumentation, we want to avoid walking DECL_INITIAL
> of TREE_STATIC vars, so should set *walk_subtrees to 0 and walk it all
> ourselves.  But, what the committed code actually does is that for
> BIND_EXPRs that contain no TREE_STATIC vars, it walks
> DECL_INITIAL/DECL_SIZE/DECL_SIZE_UNIT of all the BIND_EXPR_VARS, and then
> walks subtrees normally, which means walking the body (good) and all the
> DECL_INITIAL/DECL_SIZE/DECL_SIZE_UNIT exprs again (waste of time, we use
> hash_set for duplicates, so just inefficiency).
> But, if any TREE_STATIC vars appears, we set *walk_subtrees to 0 and
> forget to walk the body (the primary bug).

Ouch :(.   

> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?

Ok, thanks.

Marek


Re: [PATCH] Fix SOURCE_DATE_EPOCH handling with -E (PR preprocessor/71183)

2016-06-13 Thread Joseph Myers
On Mon, 13 Jun 2016, Jakub Jelinek wrote:

> Hi!
> 
> The SOURCE_DATE_EPOCH env var is ignored during -E, which is undesirable
> and inconsistent.  The problem is that the appropriate callback for
> libcpp is only installed when compiling and not when preprocessing only.
> 
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: _Bool and trap representations

2016-06-13 Thread Joseph Myers
On Mon, 13 Jun 2016, Alexander Cherepanov wrote:

> Thanks for the info. IMHO this part of DR 260 has even more serious
> consequences than the part about pointer provenance. It effectively prohibits
> manual byte-by-byte (or any non-atomic) copying of objects for types like long
> double. If an implementation decides to normalize a value in a variable during
> copying it will see an inconsistent representation, e.g. a trap
> representation. It's a sure way to get total garbage. I don't know if allowing

No, that's not the case; even if representations can change during 
byte-by-byte copying, such copying of long double values is *still* safe.  
All long double values for x86 long double have exactly one valid 
representation in the value bits, and if the padding bits change during 
copying it doesn't matter; it's only representations that are already trap 
representations (unnormals, pseudo-* etc.) that might be interpreted 
inconsistently.

Likewise for IBM long double; the only cases of more than one 
representation for a value are (a) a zero low part might have either sign 
(in which case an arbitrary choice of bytes from the two representations 
still gives a valid representation of the same value) and (b) the low part 
of a NaN is of no significance.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: PR 71181 Avoid rehash after reserve

2016-06-13 Thread François Dumont

Hi

I eventually would like to propose the attached patch.

In tr1 I made sure we use a special past-the-end iterator that 
makes usage of lower_bound result without check safe.


PR libstdc++/71181
* include/tr1/hashtable_policy.h
(_Prime_rehash_policy::_M_next_bkt): Make past-the-end iterator
dereferenceable to avoid check on lower_bound result.
(_Prime_rehash_policy::_M_bkt_for_elements): Call latter.
(_Prime_rehash_policy::_M_need_rehash): Likewise.
* src/c++11/hashtable_c++0x.cc (_Prime_rehash_policy::_M_next_bkt):
Always return a value greater than input value. Set _M_next_resize to
max value when reaching highest prime number.
* src/shared/hashtable-aux.cc (__prime_list): Add comment that sentinel
is useless.
* testsuite/23_containers/unordered_set/hash_policy/71181.cc: New.
* 
testsuite/23_containers/unordered_set/hash_policy/prime_rehash.cc: New.

* testsuite/23_containers/unordered_set/hash_policy/rehash.cc:
Fix indentation.

Tested under Linux x86_64.

François


On 25/05/2016 22:48, François Dumont wrote:

On 25/05/2016 16:01, Jonathan Wakely wrote:

On 22/05/16 17:16 +0200, François Dumont wrote:

Hi

   To fix 71181 problem I propose to change how we deal with reserve 
called with pivot values that is to say prime numbers. Now 
_M_next_bkt always return a value higher than the input value. This 
way when reverse(97) is called we end up with 199 buckets and so 
enough space to store 97 values without rehashing.


   I have integrated in this patch several other enhancements on the 
same subject. Improvement of _M_next_resize management when reaching 
highest bucket number. Remove sentinel value in __prime_list, just 
need to limit range when calling lower_bound.


I don't think the change to __prime_list is safe. If you compile some
code with GCC 5 and then used a libstdc++.so with this change the old
code would still be looking for the sentinel in the array, and would
not find it.

I think it would be safe to leave the old __prime_list unchanged (and
then not need to change anything in tr1/hashtable_policy.h?) and add a
new array with a different name. Existing code compiled with older
versions of GCC would still find __prime_list, but the new code would
use a different array.




What about this version ? tr1 mode still limit search range as it 
should to make sure it doesn't need to check lower_bound result. And 
sentinel is only kept for backward compatibility and commented to make 
that clear. Maybe there is a clearer way to express that sentinel can 
be removed on a future version breaking abi ?


François


diff --git a/libstdc++-v3/include/tr1/hashtable_policy.h b/libstdc++-v3/include/tr1/hashtable_policy.h
index 4ee6d45..24d1a59 100644
--- a/libstdc++-v3/include/tr1/hashtable_policy.h
+++ b/libstdc++-v3/include/tr1/hashtable_policy.h
@@ -420,8 +420,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   _Prime_rehash_policy::
   _M_next_bkt(std::size_t __n) const
   {
-const unsigned long* __p = std::lower_bound(__prime_list, __prime_list
-		+ _S_n_primes, __n);
+// Past-the-end iterator is made dereferenceable to avoid check on
+// lower_bound result.
+const unsigned long* __p
+  = std::lower_bound(__prime_list, __prime_list + _S_n_primes - 1, __n);
 _M_next_resize = 
   static_cast(__builtin_ceil(*__p * _M_max_load_factor));
 return *__p;
@@ -434,11 +436,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   _M_bkt_for_elements(std::size_t __n) const
   {
 const float __min_bkts = __n / _M_max_load_factor;
-const unsigned long* __p = std::lower_bound(__prime_list, __prime_list
-		+ _S_n_primes, __min_bkts);
-_M_next_resize =
-  static_cast(__builtin_ceil(*__p * _M_max_load_factor));
-return *__p;
+return _M_next_bkt(__builtin_ceil(__min_bkts));
   }
 
   // Finds the smallest prime p such that alpha p > __n_elt + __n_ins.
@@ -462,12 +460,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	if (__min_bkts > __n_bkt)
 	  {
 	__min_bkts = std::max(__min_bkts, _M_growth_factor * __n_bkt);
-	const unsigned long* __p =
-	  std::lower_bound(__prime_list, __prime_list + _S_n_primes,
-			   __min_bkts);
-	_M_next_resize = static_cast
-	  (__builtin_ceil(*__p * _M_max_load_factor));
-	return std::make_pair(true, *__p);
+	return std::make_pair(true,
+  _M_next_bkt(__builtin_ceil(__min_bkts)));
 	  }
 	else 
 	  {
diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
index a5e6520..7cbd364 100644
--- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc
+++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
@@ -46,22 +46,36 @@ namespace __detail
   {
 // Optimize lookups involving the first elements of __prime_list.
 // (useful to speed-up, eg, constructors)
-static const unsigned char __fast_bkt[12]
-  = { 2, 2, 2, 3, 5, 5, 7, 7, 11, 11, 11, 11 };
+static const unsigned char __fast_bkt[13]
+  = { 2, 2, 3, 5, 5, 

JonY appointed Cygwin and mingw-w64 maintainer

2016-06-13 Thread David Edelsohn
I am pleased to announce that the GCC Steering Committee has
appointed Jon Y as Cygwin and mingw-w64 maintainer.

Please join me in congratulating Jon on his new role.
Jon, please update your listing in the MAINTAINERS file.

Happy hacking!
David



Re: [PATCH 3/3][AArch64] Emit division using the Newton series

2016-06-13 Thread Evandro Menezes

On 06/13/16 05:15, James Greenhalgh wrote:
Thanks for your patience on this patch series. 


Just checked the series in.

Thank y'all for your assistance and patience.

Cheers,

--
Evandro Menezes



[Bug middle-end/71488] [6/7 Regression] Wrong code for vector comparisons with ivybridge and westmere targets

2016-06-13 Thread ubizjak at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71488

Uroš Bizjak  changed:

   What|Removed |Added

 Status|UNCONFIRMED |NEW
   Last reconfirmed||2016-06-13
  Component|target  |middle-end
   Target Milestone|--- |7.0
Summary|Wrong code on GCC trunk |[6/7 Regression] Wrong code
   |with ivybridge and westmere |for vector comparisons with
   |targets |ivybridge and westmere
   ||targets
 Ever confirmed|0   |1

--- Comment #1 from Uroš Bizjak  ---
Following minimized case will show the problem:

--cut here--
int var_4 = 1;
long long var_9 = 0;

int main() {

  std::valarray v10;

  v10.resize(1);
  v10[0].resize(4);

  for (int i = 0; i < 4; i++)
v10[0][i] = ((var_9 == 0) > unsigned (var_4 == 0)) + (var_9 == 0);

  std::cout << v10[0][0] << "\n";
}
--cut here--

This test should be compiled with "-std=c++11 -O3 -march=westmere" to obtain
wrong result:

$ ./a.out
1

The correct result can be obtained by adding -fno-tree-vectorize to compile
flags:

./a.out
2

Looking at the asm dump, the problematic loop is:

.L22:
movddup var_9(%rip), %xmm0
pxor%xmm1, %xmm1
(1) pcmpeqq %xmm1, %xmm0
salq$63, %rax
movdqa  .LC0(%rip), %xmm2
sarq$63, %rax
movq%rax, %xmm1
(2) movdqa  %xmm0, %xmm3
punpcklqdq  %xmm1, %xmm1
pand%xmm2, %xmm0
shufps  $136, %xmm0, %xmm0
(3) pcmpgtq %xmm1, %xmm3
movdqa  %xmm3, %xmm1
pand%xmm2, %xmm1
shufps  $136, %xmm1, %xmm1
paddd   %xmm1, %xmm0
pmovsxdq%xmm0, %xmm1
psrldq  $8, %xmm0
pmovsxdq%xmm0, %xmm0
movups  %xmm1, (%rdx)
movups  %xmm0, 16(%rdx)

At insn (1), vector (0xf...f,0xf...f) is generated as a result of comparison of
vector (var_9,var_9) with vector (0,0). However, this result goes through insn
(2) directly to insn (3) as its input argument. This is certainly wrong, the
result of the comparison should be masked with (0x0...1,0x0...1).

The problem already exists at RTL expand time. The corresponding insn sequence
is:

;; mask__3.59_48 = vect_cst__51 == { 0, 0 };

(insn 117 116 118 (set (reg:V2DI 179)
(vec_duplicate:V2DI (reg:DI 108 [ var_9.0_50 ]))) crash.cpp:29 4210
{*vec_dupv2di}
 (nil))

(insn 118 117 119 (set (reg:V2DI 180)
(const_vector:V2DI [
(const_int 0 [0])
(const_int 0 [0])
])) crash.cpp:29 -1
 (nil))

(insn 119 118 120 (set (reg:V2DI 181)
(eq:V2DI (reg:V2DI 179)
(reg:V2DI 180))) crash.cpp:29 -1
 (nil))

(insn 120 119 0 (set (reg:V2DI 106 [ mask__3.59 ])
(reg:V2DI 181)) crash.cpp:29 -1
 (nil))

;; vect_patt_111.61_79 = VEC_COND_EXPR  vect_cst__63, { 1, 1 },
{ 0, 0 }>;

(insn 121 120 122 (set (reg:V2DI 182)
(vec_duplicate:V2DI (reg:DI 117 [ _64 ]))) 4210 {*vec_dupv2di}
 (nil))

(insn 122 121 123 (set (reg:V2DI 183)
(mem/u/c:V2DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [5  S16 A128]))
-1
 (expr_list:REG_EQUAL (const_vector:V2DI [
(const_int 1 [0x1])
(const_int 1 [0x1])
])
(nil)))

(insn 123 122 124 (set (reg:V2DI 184)
(gt:V2DI (reg:V2DI 106 [ mask__3.59 ])
(reg:V2DI 182))) -1
 (nil))

(insn 124 123 0 (set (reg:V2DI 119 [ vect_patt_111.61 ])
(and:V2DI (reg:V2DI 184)
(reg:V2DI 183))) -1
 (nil))

Please note how the result of comparison from (insn 119) enters directly a
foolow up comparison (insn 123). It looks to me that (insn 120) needs to be AND
insn, as is the case with comparison (insn 123) and its corresponding (insn
124).

Confirmed as a middle-end problem.

Re: _Bool and trap representations

2016-06-13 Thread Alexander Cherepanov

On 2016-06-08 17:37, Martin Sebor wrote:

On 06/08/2016 12:36 AM, Alexander Cherepanov wrote:

Hi!

If a variable of type _Bool contains something different from 0 and 1
its use amounts to UB in gcc and clang. There is a couple of examples in
[1] ([2] is also interesting).

[1] https://github.com/TrustInSoft/tis-interpreter/issues/39
[2] https://github.com/TrustInSoft/tis-interpreter/issues/100

But my question is about the following example:

--
#include 

int main()
{
   _Bool b;
   *(char *) = 123;
   printf("%d\n", *(char *));
}
--

Results:

--
$ gcc -std=c11 -pedantic -Wall -Wextra test.c && ./a.out
123

$ gcc -std=c11 -pedantic -Wall -Wextra -O3 test.c && ./a.out
1
--

gcc version: gcc (GCC) 7.0.0 20160604 (experimental)


Similar example with long double:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71522


It seems that padding in _Bool is treated as permanently unspecified. Is
this behavior intentional? What's the theory behind it?

One possible explanations is C11, 6.2.6.2p1, which reads: "The values of
any padding bits are unspecified." But it's somewhat a stretch to
conclude from it that the values of padding bits cannot be specified
even with explicit assignment.

Another possible approach is to refer to Committee Response for Question
1 in DR 260 which reads: "Values may have any bit-pattern that validly
represents them and the implementation is free to move between alternate
representations (for example, it may normalize pointers, floating-point
representations etc.). [...] the actual bit-pattern may change without
direct action of the program."


There has been quite a bit of discussion among the committee on
this subject lately (the last part is the subject of DR #451,
though it's discussed in the context of uninitialized objects
with indeterminate values).


Are there notes from these discussions or something?


I would hesitate to call it
consensus but I think it would be fair to say that the opinion
of the vocal majority is that implementations aren't intended
to spontaneously change valid (i.e., determinate) representations
of objects in the absence of an access to the value of the object.


Thanks for the info. IMHO this part of DR 260 has even more serious 
consequences than the part about pointer provenance. It effectively 
prohibits manual byte-by-byte (or any non-atomic) copying of objects for 
types like long double. If an implementation decides to normalize a 
value in a variable during copying it will see an inconsistent 
representation, e.g. a trap representation. It's a sure way to get total 
garbage. I don't know if allowing implementations to normalize values is 
useful but the current language in DR 260 allows too much.


As for valid/determinate representation this is another place where 
distinction between a value and a representation is worth stressing. 
Uninitialized variables are a clear case -- both its value and 
representation are indeterminate. But what if we set some part of 
representation of a variable -- it doesn't yet have a determinate value 
but we want the part that we have set to be preserved. Another 
interesting example is a pointer after free() -- its representation is 
kinda determinate but its value is indeterminate.


--
Alexander Cherepanov


Re: [PR71478] Fix ICE in tree-ssa-reassoc.c

2016-06-13 Thread Jakub Jelinek
On Fri, Jun 10, 2016 at 01:34:07PM +0200, Richard Biener wrote:
> On Fri, Jun 10, 2016 at 4:19 AM, kugan
>  wrote:
> > Hi,
> >
> > In PR71478, for vector negation of ssa produced by call stmt, we add vector
> > (-1) and ssa to the ops list. However, in the place where we remove the (-1)
> > from ops list, we failed to do this for vector integer. As a result,
> > rewrite_expr_tree wrongly assumes that it is working with gimple_assign.
> >
> > Attached patch fixes the place where we remove the vector (-1).
> >
> > Regression tested on x86-64-linux-gnu with no new regressions. Regression
> > testing on aarc64-linux-gnu is ongoing. Is this OK for trunk?
> 
> Ok.

The testcase fails for me on i686-linux.

Requiring vect_int outside of */vect/ is bogus, it assumes the default
vectorization flags are used, which is not the case here.
But furthermore, it makes zero sense to require it, vector_size attribute
must be supported always, just without HW support lowered to scalar insns.
-Wno-psabi -w are the standard options used to turn off psabi warnings
on i?86 and powerpc.

Tested on i686-linux and x86_64-linux, committed to trunk as obvious.

2016-06-13  Jakub Jelinek  

PR middle-end/71478
* gcc.dg/pr71478.c: Remove dg-require-effective-target vect_int.
Add -Wno-psabi -w to dg-options.

--- gcc/testsuite/gcc.dg/pr71478.c  (revision 237383)
+++ gcc/testsuite/gcc.dg/pr71478.c  (working copy)
@@ -1,7 +1,6 @@
 /* PR middle-end/71478 */
-/* { dg-require-effective-target vect_int } */
 /* { dg-do compile } */
-/* { dg-options "-O3" } */
+/* { dg-options "-O3 -Wno-psabi -w" } */
 
 typedef unsigned int __attribute__ ((vector_size (8))) uv2si;
 typedef int __attribute__ ((vector_size (8))) v2si;
@@ -18,4 +17,3 @@ foo (void)
   uv2si j = k * __builtin_shuffle (z, z, (uv2si) {1, 3});
   return k * j;
 }
-


Jakub


[Bug tree-optimization/71478] [7 Regression] ICE in tree-ssa-reassoc.c after r236564

2016-06-13 Thread jakub at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71478

--- Comment #5 from Jakub Jelinek  ---
Author: jakub
Date: Mon Jun 13 18:54:25 2016
New Revision: 237392

URL: https://gcc.gnu.org/viewcvs?rev=237392=gcc=rev
Log:
PR middle-end/71478
* gcc.dg/pr71478.c: Remove dg-require-effective-target vect_int.
Add -Wno-psabi -w to dg-options.

Modified:
trunk/gcc/testsuite/ChangeLog
trunk/gcc/testsuite/gcc.dg/pr71478.c

gcc -gdwarf-2 -gstrict-dwarf produces bad debug_loc entries

2016-06-13 Thread Trevor Harrison
I'm running into an issue with start/end values of debug_loc entries
being absolute addresses instead of offsets when a file is compiled
with -gdwarf-2 and -gstrict-dwarf and there are virtual methods in
that file.  gcc 4.4.7, but I've seen it under 4.8'ish.

I'm new enough to dwarf stuff to not be 100% sure that this is a bug
vs. I'm just missing a field somewhere that indicates that these
debug_loc entries are in a different format but on the other hand,
readelf also seems confused about this.

An example file:

#include 
struct class1
{
class1() { printf("blah\n"); }
virtual ~class1() { }
};

int main()
{
class1 c1;
return 0;
}

# g++ -gdwarf-2 -gstrict-dwarf -o foo foo.cpp
# readelf -W -w -x .debug_loc foo > foo.debuginfo

from the foo.debuginfo file:

The compile_unit shows 0x400664 as its lowpc:

 <0>: Abbrev Number: 1 (DW_TAG_compile_unit)
< c>   DW_AT_producer: (indirect string, offset: 0xab): GNU
C++ 4.4.7 20120313 (Red Hat 4.4.7-17)
<10>   DW_AT_language: 4(C++)
<11>   DW_AT_name: (indirect string, offset: 0x29): foo.cpp
<15>   DW_AT_comp_dir: (indirect string, offset: 0x41): 
<19>   DW_AT_low_pc  : 0x400664
<21>   DW_AT_high_pc : 0x400693
<29>   DW_AT_stmt_list   : 0x0

But the debug_loc section shows start addresses in the 0x8 range:

Contents of the .debug_loc section:

Offset   BeginEnd  Expression
 00800cf8 00800cf9 (DW_OP_breg7: 8)
 00800cf9 00800cfc (DW_OP_breg7: 16)
 00800cfc 00800d1a (DW_OP_breg6: 16)
 00800d1a 00800d1b (DW_OP_breg7: 8)
 

(800cf8 == lowpc 0x400664 + 0x400694)

Hex dump of section '.debug_loc':
  0x 94064000  95064000  ..@...@.
  0x0010 02007708 95064000  98064000 ..w...@...@.
  0x0020  02007710 98064000  ..w...@.
  0x0030 b6064000  02007610 b6064000 ..@...v...@.
  0x0040  b7064000  02007708 ..@...w.


Is this is a known bug or issue or am I missing something?


[C++ PATCH] Fix incomplete type error recovery (PR c++/71516)

2016-06-13 Thread Jakub Jelinek
Hi!

On the following testcase we ICE during error recovery, because
a is first added to the incomplete vars vector, but then is attempted to
be initialized, which results in error and setting its type to
error_mark_node (as the type has been incomplete).
When we try to complete vars, we ICE because TYPE_MAIN_VARIANT expects to
see a type, rather than error_mark_node (with tree checking).

Ok for trunk?  Would this be reasonable to backport too (I mean, it
shouldn't break anything and accessing TYPE_MAIN_VARIANT (error_mark_node)
can crash miserably)?

Bootstrapped/regtested on x86_64-linux and i686-linux.

2016-06-13  Jakub Jelinek  

PR c++/71516
* decl.c (complete_vars): Handle gracefully type == error_mark_node.

* g++.dg/init/pr71516.C: New test.

--- gcc/cp/decl.c.jj2016-06-09 22:45:57.0 +0200
+++ gcc/cp/decl.c   2016-06-13 17:05:37.742493834 +0200
@@ -15029,8 +15029,9 @@ complete_vars (tree type)
  tree var = iv->decl;
  tree type = TREE_TYPE (var);
 
- if (TYPE_MAIN_VARIANT (strip_array_types (type))
- == iv->incomplete_type)
+ if (type != error_mark_node
+ && (TYPE_MAIN_VARIANT (strip_array_types (type))
+ == iv->incomplete_type))
{
  /* Complete the type of the variable.  The VAR_DECL itself
 will be laid out in expand_expr.  */
--- gcc/testsuite/g++.dg/init/pr71516.C.jj  2016-06-13 17:08:07.734548282 
+0200
+++ gcc/testsuite/g++.dg/init/pr71516.C 2016-06-13 17:07:20.0 +0200
@@ -0,0 +1,10 @@
+// PR c++/71516
+// { dg-do compile }
+
+struct A;  // { dg-message "forward declaration of" }
+struct B
+{ 
+  static A a;
+};
+A B::a = A();  // { dg-error "has initializer but incomplete type|invalid use 
of incomplete type" }
+struct A {};

Jakub


[PATCH] Fix ubsan handling of BIND_EXPR (PR sanitizer/71498)

2016-06-13 Thread Jakub Jelinek
Hi!

As has been discussed in the original -fsanitize=bounds submission,
walk_tree for BIND_EXPR walks the body and
DECL_INITIAL/DECL_SIZE/DECL_SIZE_UNIT of all the BIND_EXPR_VARS.
For -fsanitize=bounds instrumentation, we want to avoid walking DECL_INITIAL
of TREE_STATIC vars, so should set *walk_subtrees to 0 and walk it all
ourselves.  But, what the committed code actually does is that for
BIND_EXPRs that contain no TREE_STATIC vars, it walks
DECL_INITIAL/DECL_SIZE/DECL_SIZE_UNIT of all the BIND_EXPR_VARS, and then
walks subtrees normally, which means walking the body (good) and all the
DECL_INITIAL/DECL_SIZE/DECL_SIZE_UNIT exprs again (waste of time, we use
hash_set for duplicates, so just inefficiency).
But, if any TREE_STATIC vars appears, we set *walk_subtrees to 0 and
forget to walk the body (the primary bug).

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2016-06-13  Jakub Jelinek  

PR sanitizer/71498
* c-gimplify.c (ubsan_walk_array_refs_r): Set *walk_subtrees = 0 on
all BIND_EXPRs, and on all BIND_EXPRs recurse also on BIND_EXPR_BODY.

* c-c++-common/ubsan/bounds-13.c: New test.

--- gcc/c-family/c-gimplify.c.jj2016-01-27 19:47:27.0 +0100
+++ gcc/c-family/c-gimplify.c   2016-06-13 13:27:06.531549561 +0200
@@ -67,23 +67,23 @@ ubsan_walk_array_refs_r (tree *tp, int *
 {
   hash_set *pset = (hash_set *) data;
 
-  /* Since walk_tree doesn't call the callback function on the decls
- in BIND_EXPR_VARS, we have to walk them manually.  */
   if (TREE_CODE (*tp) == BIND_EXPR)
 {
+  /* Since walk_tree doesn't call the callback function on the decls
+in BIND_EXPR_VARS, we have to walk them manually, so we can avoid
+instrumenting DECL_INITIAL of TREE_STATIC vars.  */
+  *walk_subtrees = 0;
   for (tree decl = BIND_EXPR_VARS (*tp); decl; decl = DECL_CHAIN (decl))
{
  if (TREE_STATIC (decl))
-   {
- *walk_subtrees = 0;
- continue;
-   }
+   continue;
  walk_tree (_INITIAL (decl), ubsan_walk_array_refs_r, pset,
 pset);
  walk_tree (_SIZE (decl), ubsan_walk_array_refs_r, pset, pset);
  walk_tree (_SIZE_UNIT (decl), ubsan_walk_array_refs_r, pset,
 pset);
}
+  walk_tree (_EXPR_BODY (*tp), ubsan_walk_array_refs_r, pset, pset);
 }
   else if (TREE_CODE (*tp) == ADDR_EXPR
   && TREE_CODE (TREE_OPERAND (*tp, 0)) == ARRAY_REF)
--- gcc/testsuite/c-c++-common/ubsan/bounds-13.c.jj 2016-06-13 
13:36:25.698316271 +0200
+++ gcc/testsuite/c-c++-common/ubsan/bounds-13.c2016-06-13 
13:39:57.240586520 +0200
@@ -0,0 +1,31 @@
+/* PR sanitizer/71498 */
+/* { dg-do run } */
+/* { dg-options "-fsanitize=bounds -Wno-array-bounds" } */
+
+struct S { int a[100]; int b, c; } s;
+
+__attribute__((noinline, noclone)) int
+foo (int x)
+{
+  return s.a[x];
+}
+
+__attribute__((noinline, noclone)) int
+bar (int x)
+{
+  static int *d = [99];
+  asm volatile ("" : : "r" ());
+  return s.a[x];
+}
+
+int
+main ()
+{
+  volatile int a = 0;
+  a += foo (100);
+  a += bar (100);
+  return 0;
+}
+
+/* { dg-output "index 100 out of bounds for type 'int 
\\\[100\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*index 100 out of bounds for type 'int 
\\\[100\\\]'\[^\n\r]*(\n|\r\n|\r)" } */

Jakub


[PATCH] Improve tree-ssa-tail-merge for switches (PR tree-optimization/71520)

2016-06-13 Thread Jakub Jelinek
Hi!

Cross-jumping at GIMPLE level gives up e.g. because there are any labels
at the beginning of the block (which is always the case for bbs referenced
from switches).  While labels for non-local goto as well as computed goto
are hard to handle, after all the edges are then EDGE_ABNORMAL that can't be
redirected anyway, other labels can be handled very easily.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

In the PR, beyond this I'm also talking about switchconv pass not being able
to know that some cases could be cross-jumped and thus use better conversion
sequences.  Wonder if we shouldn't schedule either a full, or a limited
version of tailmerging before switchconv, perhaps just use the
infrastructure from tree-ssa-tail-merge.c to handle the easiest cases
where the cross-jumping in the end would end up simplifying some of the
switches.  Thoughts on this?

2016-06-13  Jakub Jelinek  

PR tree-optimization/71520
* tree-ssa-tail-merge.c (find_duplicate): Handle labels.
(replace_block_by): Move user labels from bb1 to bb2.

* gcc.dg/tree-ssa/pr71520.c: New test.

--- gcc/tree-ssa-tail-merge.c.jj2016-06-10 20:23:55.196164390 +0200
+++ gcc/tree-ssa-tail-merge.c   2016-06-13 12:08:34.691985005 +0200
@@ -1265,6 +1265,10 @@ find_duplicate (same_succ *same_succ, ba
   gimple *stmt1 = gsi_stmt (gsi1);
   gimple *stmt2 = gsi_stmt (gsi2);
 
+  if (gimple_code (stmt1) == GIMPLE_LABEL
+ && gimple_code (stmt2) == GIMPLE_LABEL)
+   break;
+
   if (!gimple_equal_p (same_succ, stmt1, stmt2))
return;
 
@@ -1277,6 +1281,20 @@ find_duplicate (same_succ *same_succ, ba
   gsi_advance_bw_nondebug_nonlocal (, , _escaped);
 }
 
+  while (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
+{
+  tree label = gimple_label_label (as_a  (gsi_stmt (gsi1)));
+  if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
+   return;
+  gsi_prev ();
+}
+  while (!gsi_end_p (gsi2) && gimple_code (gsi_stmt (gsi2)) == GIMPLE_LABEL)
+{
+  tree label = gimple_label_label (as_a  (gsi_stmt (gsi2)));
+  if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
+   return;
+  gsi_prev ();
+}
   if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
 return;
 
@@ -1555,6 +1573,23 @@ replace_block_by (basic_block bb1, basic
   e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum);
 }
 
+  /* Move over any user labels from bb1 after the bb2 labels.  */
+  gimple_stmt_iterator gsi1 = gsi_start_bb (bb1);
+  if (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
+{
+  gimple_stmt_iterator gsi2 = gsi_after_labels (bb2);
+  while (!gsi_end_p (gsi1)
+&& gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
+   {
+ tree label = gimple_label_label (as_a  (gsi_stmt (gsi1)));
+ gcc_assert (!DECL_NONLOCAL (label) && !FORCED_LABEL (label));
+ if (DECL_ARTIFICIAL (label))
+   gsi_next ();
+ else
+   gsi_move_before (, );
+   }
+}
+
   /* Clear range info from all stmts in BB2 -- this transformation
  could make them out of date.  */
   reset_flow_sensitive_info_in_bb (bb2);
--- gcc/testsuite/gcc.dg/tree-ssa/pr71520.c.jj  2016-06-13 12:26:55.251630020 
+0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr71520.c 2016-06-13 12:26:31.0 
+0200
@@ -0,0 +1,90 @@
+/* PR tree-optimization/71520 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar (int);
+
+void
+foo (int x)
+{
+  switch (x)
+{
+case 1:
+case 12:
+case 28:
+case 174:
+  bar (1);
+  bar (2);
+  break;
+case 3:
+case 7:
+case 78:
+case 96:
+case 121:
+default:
+  bar (3);
+  bar (4);
+  bar (5);
+  bar (6);
+  break;
+case 8:
+case 13:
+case 27:
+case 19:
+case 118:
+  bar (3);
+  bar (4);
+  bar (5);
+  bar (6);
+  break;
+case 4:
+  bar (7);
+  break;
+}
+}
+
+void
+baz (int x)
+{
+  switch (x)
+{
+case 1:
+case 12:
+case 28:
+case 174:
+  bar (8);
+  bar (9);
+  break;
+case 3:
+case 7:
+case 78:
+case 96:
+case 121:
+default:
+lab1:
+lab2:
+  bar (10);
+  bar (11);
+  bar (12);
+  bar (13);
+  break;
+case 8:
+case 13:
+case 27:
+case 19:
+case 118:
+lab3:
+lab4:
+  bar (10);
+  bar (11);
+  bar (12);
+  bar (13);
+  break;
+case 4:
+  bar (14);
+  break;
+}
+}
+
+/* { dg-final { scan-tree-dump-times "bar \\\(3\\\);" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "bar \\\(10\\\);" 1 "optimized" } } */

Jakub


Re: Fix pure/const discovery WRT interposition part 2

2016-06-13 Thread H.J. Lu
On Sat, Apr 16, 2016 at 9:47 AM, Jan Hubicka  wrote:
> Hi,
> this patch updates ipa-pure-const.c to only propagate PURE flag across
> calls that does not bind to local defs and are not explicitly declared const.
> This gets memory state into shape that the callee produced by other compiler
> and still accessing memory is safe.
>
> We need similar logic for -fnon-call-exceptions which I will do incrementally.
> We also want to track if the original unoptimized body did access memory but
> that needs frontend changes because memory accesses may get folded away during
> parsing.
>
> Bootstrapped/regtested x86_64-linux, will commit it shortly.
>
> Honza
>
> PR ipa/70018
> * cgraph.c (cgraph_set_const_flag_1): Only set as pure if
> function does not bind to current def.
> * ipa-pure-const.c (worse_state): Add FROM and TO parameters;
> handle conservatively calls to functions that does not need to bind
> to current def.
> (check_call): Update call of worse_state.
> (ignore_edge_for_nothrow): Update.
> (ignore_edge_for_pure_const): Likewise.
> (propagate_pure_const): Update calls to worse_state.
> (skip_function_for_local_pure_const): Reformat comments.
>

This cased:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71524

H.J.


Re: [PATCH], PowerPC: Allow DImode in Altivec registers

2016-06-13 Thread Michael Meissner
It would help if I included the patch.

On Mon, Jun 13, 2016 at 01:28:16PM -0400, Michael Meissner wrote:
> This patch goes through the PowerPC compiler and adds support to allow DImode
> (64-bit integers) into Altivec registers for VSX systems.  It also adds some
> support to allow loading some DImode constants via either ISA 2.07 or ISA 3.0
> instructions.
> 
> I have bootstrapped this with no regressions on both a big endian power7 
> system
> and a little endian power8 system.
> 
> I have run a Spec 2006 INT tests with these changes, and the run times were
> comparable between the original compiler and the compiler with the changes.
> 
> Are these changes ok to install in the trunk?  Assuming they go in the trunk,
> can I install them in the 6.2 branch if they cause no regression?
> 
> Note, I will be away from the office, starting Thursday afternoon (June 16th,
> 2016) and I will return on Monday (June 20th, 2016).  I will not have easy
> access to email during this time.

[gcc]
2016-06-13  Michael Meissner  

* config/rs6000/vsx.md (VSINT_84): Add DImode to enable loading
DImode constants with XXSPLTIB in vector registers.
(vsx_extract_, V2DImode/V2DFmode): Combine both
vsx_extract__internal{1,2} into a single insn that handles
direct move (both ISA 2.07 and ISA 3.0 versions), and optimizes
extraction of the element at the top of the register as a scalar
value.
(vsx_extract__internal1): Likewise.
(vsx_extract__internal2): Likewise.
* config/rs6000/constraints.md (wi constraint): Remove a comment
about DImode not being allowed in Altivec registers.
(wB constraint): New constraint for constants that can be
generated in Altivec registers with VSPLTISW/VUPKHSW.
* config/rs6000/predicates.md (xxspltib_constant_split): Update
comments.
(xxspltib_constant_nosplit): Likewise.
* config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Add
support for -mupper-regs-di to enable DImode to go into Altivec
registers.
(POWERPC_MASKS): Likewise.
(power7 cpu): Likewise.
* config/rs6000/rs6000.opt (-mupper-regs-di): Likewise.
* config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Add support
for DImode being allowed in Altivec registers.  Update wi/wj
constraints.  Set scalar_in_vmx_p flag.
(rs6000_option_override_internal): Add checks for -mupper-regs-di.
(xxspltib_constant_p): Allow CONST_INT's with VOIDmode.  Don't
return true if we could use VSPLTISW/VUPKHSW instead of XXSPLTIB.
(rs6000_opt_masks): Add -mupper-regs-di.
* config/rs6000/rs6000.md (lfiwax): Update clobbers that don't use
direct move to use wi and now wj.
(lfiwzx): Likewise.
(floatsi2_lfiwax_mem): Combine alternatives into a single
alternative.
(floatunssi2_lfiwzx_mem): Likewise.
(fix_truncdi2_fctidz): Change second alternative to allow
any VSX register, instead of just Altivec registers, to allow
either operand to be an Altivec register or both.
(fixuns_truncdi2_fctiduz): Likewise.
(movdi_internal32): Add support for -mupper-regs-di.  Add support
to load constants via XXSPLTIB or VSPLTISW.  Add spacing to allow
the alternatives and attributes to be lined up to be easier to
read.
(movdi_internal64): Likewise.
(64-bit DImode splitters): Change predicates to only split loading
up GPR registers.  Add splits for using XXSPLTIB or VSPLTISW to
load constants in ISA 3.0 or ISA 2.07 respectively.
* doc/invoke.texi (RS/6000 and PowerPC Options): Document
-mupper-regs-di.  Update -mupper-regs-df and -mupper-regs-sf to
mention -mcpu=power9 sets these options.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
wB constraint.

[gcc/testsuite]
2016-06-13  Michael Meissner  

* gcc.target/powerpc/p9-dimode1.c: New test.
* gcc.target/powerpc/p9-dimode2.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/vsx.md
===
--- gcc/config/rs6000/vsx.md
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000)
(revision 237222)
+++ gcc/config/rs6000/vsx.md(.../gcc/config/rs6000) (working copy)
@@ -260,7 +260,7 @@ (define_mode_attr VS_64reg [(V2DF   "ws")
(V2DI   "wi")])
 
 ;; Iterators for loading constants with xxspltib
-(define_mode_iterator VSINT_84  [V4SI V2DI])
+(define_mode_iterator VSINT_84  [V4SI V2DI DI])
 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 
 ;; Constants for creating unspecs
@@ -2095,77 +2095,69 @@ 

[Bug middle-end/71524] [7 Regression] internal compiler error: in binds_to_current_def_p, at symtab.c:2232

2016-06-13 Thread hjl.tools at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71524

H.J. Lu  changed:

   What|Removed |Added

 Status|UNCONFIRMED |NEW
   Last reconfirmed||2016-06-13
 CC||hubicka at ucw dot cz
   Target Milestone|--- |7.0
 Ever confirmed|0   |1

--- Comment #1 from H.J. Lu  ---
It is caused by r70018.

[PATCH] Fix SOURCE_DATE_EPOCH handling with -E (PR preprocessor/71183)

2016-06-13 Thread Jakub Jelinek
Hi!

The SOURCE_DATE_EPOCH env var is ignored during -E, which is undesirable
and inconsistent.  The problem is that the appropriate callback for
libcpp is only installed when compiling and not when preprocessing only.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2016-06-13  Jakub Jelinek  

PR preprocessor/71183
* c-ppoutput.c (init_pp_output): Set cb->get_source_date_epoch
to cb_get_source_date_epoch.

* gcc.dg/cpp/source_date_epoch-3.c: New test.

--- gcc/c-family/c-ppoutput.c.jj2016-01-04 14:55:58.0 +0100
+++ gcc/c-family/c-ppoutput.c   2016-06-12 19:49:50.932112947 +0200
@@ -150,6 +150,7 @@ init_pp_output (FILE *out_stream)
 }
 
   cb->has_attribute = c_common_has_attribute;
+  cb->get_source_date_epoch = cb_get_source_date_epoch;
 
   /* Initialize the print structure.  */
   print.src_line = 1;
--- gcc/testsuite/gcc.dg/cpp/source_date_epoch-3.c.jj   2016-06-12 
19:56:49.988696438 +0200
+++ gcc/testsuite/gcc.dg/cpp/source_date_epoch-3.c  2016-06-12 
19:57:36.648093343 +0200
@@ -0,0 +1,9 @@
+/* PR preprocessor/71183 */
+/* { dg-do preprocess } */
+/* { dg-set-compiler-env-var SOURCE_DATE_EPOCH "630333296" } */
+
+const char *date = __DATE__;
+const char *time = __TIME__;
+
+/* { dg-final { scan-file source_date_epoch-3.i "Dec 22 1989" } } */
+/* { dg-final { scan-file source_date_epoch-3.i "12:34:56" } } */

Jakub


Re: [PATCH 2/3] selftests: improve reported failure locations

2016-06-13 Thread Jeff Law

On 06/09/2016 12:42 PM, David Malcolm wrote:

This patch introduce a selftest::location struct to wrap up __FILE__
and __LINE__ information (and __FUNCTION__) throughout the selftests,
allowing location information to be passed around.

It updates the helper functions in pretty-print.c to pass through
the precise location of each test, so that if a failure occurs, the
correct line number is printed, rather than a line within a helper
function.

gcc/ChangeLog:
* input.c (test_reading_source_line): Use SELFTEST_LOCATION.
* pretty-print.c (assert_pp_format_va): Add location param and use
it with ASSERT_STREQ_AT.
(assert_pp_format): Add location param and pass it to
assert_pp_format_va.
(assert_pp_format_colored): Likewise.
(ASSERT_PP_FORMAT_1): New.
(ASSERT_PP_FORMAT_2): New.
(ASSERT_PP_FORMAT_3): New.
(test_pp_format): Provide SELFTEST_LOCATION throughout, either
explicitly, or implicitly via the above macros.
* selftest.c (selftest::pass): Use a selftest::location rather
than file and line.
(selftest::fail): Likewise.  Print the function name.
(selftest::fail_formatted): Likewise.
(selftest::assert_streq): Use a selftest::location rather than
file and line.
* selftest.h (selftest::location): New struct.
(SELFTEST_LOCATION): New macro.
(selftest::pass): Accept a const location & rather than file
and line.
(selftest::fail): Likewise.
(selftest::fail_formatted): Likewise.
(selftest::assert_streq): Likewise.
(ASSERT_TRUE): Update for above changes, using SELFTEST_LOCATION.
(ASSERT_FALSE): Likewise.
(ASSERT_EQ): Likewise.
(ASSERT_NE): Likewise.
(ASSERT_STREQ): Likewise.
(ASSERT_PRED1): Likewise.
(ASSERT_STREQ_AT): New macro.

OK.
jeff



Re: [PATCH 3/3] pretty-print.c: skip color selftests if GCC_COLORS is set

2016-06-13 Thread Jeff Law

On 06/09/2016 12:42 PM, David Malcolm wrote:

gcc/ChangeLog:
* pretty-print.c (assert_pp_format_colored): Skip the test if
GCC_COLORS is set.
(test_pp_format): Remove comment about GCC_COLORS.

OK.
jeff



Re: [PATCH 1/3] selftest: show values when ASSERT_STREQ fails

2016-06-13 Thread Jeff Law

On 06/09/2016 12:42 PM, David Malcolm wrote:

Rework ASSERT_STREQ so that it prints the actual and expected values
to stderr when it fails (by moving it to a helper function).

gcc/ChangeLog:
* selftest.c (selftest::fail_formatted): New function.
(selftest::assert_streq): New function.
* selftest.h (selftests::fail_formatted): New decl.
(selftest::assert_streq): New decl.
(ASSERT_STREQ): Reimplement in terms of selftest::assert_streq.

OK.
jeff



Re: [PATCH] PR bootstrap/71481: fix input.c selftest

2016-06-13 Thread Jeff Law

On 06/09/2016 03:58 PM, David Malcolm wrote:

input.c's selftest::test_reading_source_line attempted to read from
__FILE__, which doesn't work if the binary is run from a different
location than the build dir.

Fix it by rewriting the test to write out a tempfile, and read from
that, rather than from __FILE__.

I used make_temp_file to create the name for the temporary file, on
the grounds that that's what the driver uses for that purpose.

This is on top of the patch kit posted as:
  https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00735.html

Successfully bootstrapped on x86_64-pc-linux-gnu
Successful -fself-test of stage1 on powerpc-ibm-aix7.1.3.0

OK for trunk?

gcc/ChangeLog:
PR bootstrap/71481
* input.c (selftest::test_reading_source_line): Avoid reading from
__FILE__ by creating a tempfile with known content and reading
from that instead.

OK.

FWIW, I think the LANG_C vs translating is your call to make.  I can see 
arguments for both directions.


Jeff



Fix oversight in vn_reference_lookup_3

2016-06-13 Thread Eric Botcazou
The second test on shared_lookup_references in the block:

  /* We need to pre-pend vr->operands[0..i] to rhs.  */
  vec old = vr->operands;
  if (i + 1 + rhs.length () > vr->operands.length ())
{
  vr->operands.safe_grow (i + 1 + rhs.length ());
  if (old == shared_lookup_references)
shared_lookup_references = vr->operands;
}
  else
vr->operands.truncate (i + 1 + rhs.length ());
  FOR_EACH_VEC_ELT (rhs, j, vro)
vr->operands[i + 1 + j] = *vro;
  vr->operands = valueize_refs (vr->operands);
  if (old == shared_lookup_references)
shared_lookup_references = vr->operands;

is bypassed when the first test is true because "old" contains a stalled value 
of shared_lookup_references.  This may result in either memory corruption 
(when checking is disabled) or in the failure of one of the assertions:

  gcc_checking_assert (vr1.operands == shared_lookup_references);

in vn_reference_lookup_pieces or vn_reference_lookup.  This was caught on a 
big proprietary Ada application in LTO mode.

Tested on x86_64-suse-linux, approved privately by Richard B., applied on the 
mainline and 6 branch.


2016-06-13  Eric Botcazou  

* tree-ssa-sccvn.c (vn_reference_lookup_3): Use a uniform test and
update shared_lookup_references only once after changing operands.

-- 
Eric BotcazouIndex: tree-ssa-sccvn.c
===
--- tree-ssa-sccvn.c	(revision 237323)
+++ tree-ssa-sccvn.c	(working copy)
@@ -2089,11 +2089,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
   /* We need to pre-pend vr->operands[0..i] to rhs.  */
   vec old = vr->operands;
   if (i + 1 + rhs.length () > vr->operands.length ())
-	{
-	  vr->operands.safe_grow (i + 1 + rhs.length ());
-	  if (old == shared_lookup_references)
-	shared_lookup_references = vr->operands;
-	}
+	vr->operands.safe_grow (i + 1 + rhs.length ());
   else
 	vr->operands.truncate (i + 1 + rhs.length ());
   FOR_EACH_VEC_ELT (rhs, j, vro)
@@ -2244,8 +2240,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
 	{
 	  vec old = vr->operands;
 	  vr->operands.safe_grow_cleared (2);
-	  if (old == shared_lookup_references
-	  && vr->operands != old)
+	  if (old == shared_lookup_references)
 	shared_lookup_references = vr->operands;
 	}
   else


Re: [PATCH] c/69507 - bogus warning: ISO C does not allow ‘__alignof__ (expression)’

2016-06-13 Thread Joseph Myers
On Fri, 27 May 2016, Martin Sebor wrote:

> The patch below adjusts the C alignof pedantic warning to avoid
> diagnosing the GCC extension (__alignof__) and only diagnose
> _Alignof in C99 and prior modes.  This is consistent with how
> __attribute__ ((aligned)) and _Alignas is handled (among other
> extensions vs standard features).

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH PING] boehm-gc: check for execinfo.h directly

2016-06-13 Thread Mike Frysinger
The current header depends on glibc version checks to determine whether
execinfo.h exists which breaks uClibc.  Instead, add an explicit configure
check for it.

2015-08-29  Mike Frysinger  

* configure.ac: Call AC_CHECK_HEADERS([execinfo.h]).
* configure: Regenerated.
* include/gc.h [HAVE_EXECINFO_H]: Define GC_HAVE_BUILTIN_BACKTRACE.
* include/gc_config.h.in: Regenerated.
---
 boehm-gc/configure  | 105 +++-
 boehm-gc/configure.ac   |   3 ++
 boehm-gc/include/gc.h   |   2 +-
 boehm-gc/include/gc_config.h.in |   3 ++
 4 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/boehm-gc/configure b/boehm-gc/configure
index a8e11dab41b3..7d2b1f7401f7 100755
--- a/boehm-gc/configure
+++ b/boehm-gc/configure
@@ -1945,6 +1945,93 @@ $as_echo "$ac_res" >&6; }
   eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset 
as_lineno;}
 
 } # ac_fn_c_check_member
+
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# ---
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_header_compiler=yes
+else
+  ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  ac_header_preproc=yes
+else
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+  yes:no: )
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the 
compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the 
preprocessor!" >&2;}
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the 
compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+;;
+  no:yes:* )
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot 
be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for 
missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" 
>&2;}
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf 
documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section 
\"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" 
>&2;}
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the 
compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset 
as_lineno;}
+
+} # ac_fn_c_check_header_mongrel
 cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
@@ -11322,7 +11409,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; 

Re: [PATCH 3/8] nvptx -muniform-simt

2016-06-13 Thread Alexander Monakov
On Sun, 12 Jun 2016, Sandra Loosemore wrote:
> On 06/09/2016 10:53 AM, Alexander Monakov wrote:
> > +@item -muniform-simt
> > +@opindex muniform-simt
> > +Generate code that allows to keep all lanes in each warp active, even when
> 
> Allows *what* to keep?  E.g. what is doing the keeping here?  If it is the
> generated code itself, please rephrase as
> 
> Generate code that keeps

Let me try to expand and rephrase what I meant:

Allows the compiler to emit code that, at run time, may have all lanes active,
particularly in those regions of the program where observable effects from
execution must happen as if one lane is active (outside of SIMD loops).

But nevertheless generated code can run just like conventionally generated
code does: with each lane being active/inactive independently, and side
effects happening from each active lane (inside of SIMD loops).

Whether it actually runs in the former (let's call it "uniform") or the latter
("conventional") way is switchable at run time. The compiler itself is
responsible for emitting mode changes at SIMD region boundaries.

Does this help? Below I went with your suggestion, but changed "keeps" to "may
keep" because that's generally true only outside of SIMD regions.

> > +observable effects from execution should appear as if only one lane was
> 
> s/was/is/
> 
> > +active. This is achieved by instrumenting syscalls and atomic instructions
> > in
> > +a lightweight way that allows to switch behavior at runtime. This code
> 
> Same issue here  allows *what* to switch behavior?  (And how would you
> select which run-time behavior you want?)

Sorry. This gives compiler itself a way to emit code that will switch behavior
of the subsequently running code.

> Also, in the snippet above where it is used as a noun, please
> s/runtime/run time/

Thanks. Does the following look better?

@item -muniform-simt
@opindex muniform-simt
Generate code that may keep all lanes in each warp active, even when
observable effects from execution must appear as if only one lane is active.
This is achieved by instrumenting syscalls and atomic instructions in a
lightweight way, allowing the compiler to emit code that can switch at run
time between this and conventional execution modes. This code generation
variant is used for OpenMP offloading, but the option is exposed on its own
for the purpose of testing the compiler; to generate code suitable for linking
into programs using OpenMP offloading, use option @option{-mgomp}.

Alexander


Re: [patch, avr] Fix PR67353

2016-06-13 Thread Bernhard Reutner-Fischer
On June 13, 2016 5:48:43 PM GMT+02:00, Georg-Johann Lay  wrote:
>Pitchumani Sivanupandi schrieb:
>> Hi,
>> 
>> This patch introduces new flags for warning 'misspelled interrupt/
>> signal handler'. Flag -Wmisspelled-isr is enabled by default and it
>> will warn user if the interrupt/ signal handler is without '__vector'
>> prefix. Flag -Wno-misspelled-isr shall be enabled by user to allow
>> custom names, i.e. without __vector prefix.
>> 
>> // avr-gcc -c test.c
>> void custom_interruption(void) __attribute__((signal));
>> void custom_interruption(void) {}
>> 
>> Behavior after applying this patch:
>> 
>> $ avr-gcc test.c 
>> test.c: In function 'custom_interruption':
>> test.c:2:6: warning: 'custom_interruption' appears to be a misspelled
>> signal handler
>>  void custom_interruption(void) {}
>>   ^~~
>> 
>> $ avr-gcc test.c -Wmisspelled-isr
>> test.c: In function
>> 'custom_interruption':
>> test.c:2:6: warning: 'custom_interruption'
>> appears to be a misspelled signal handler
>>  void
>> custom_interruption(void) {}
>>   ^~~
>> 
>> $ avr-gcc test.c -Wno-misspelled-isr
>> $
>
>What about -Werror=misspelled-isr?
>
> > [...]
>> diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
>> index ba5cd91..587bdbc 100644
>> --- a/gcc/config/avr/avr.c
>> +++ b/gcc/config/avr/avr.c
>> @@ -753,7 +753,7 @@ avr_set_current_function (tree decl)
>>   that the name of the function is "__vector_NN" so as to
>catch
>>   when the user misspells the vector name.  */
>>  
>> -  if (!STR_PREFIX_P (name, "__vector"))
>> +  if ((!STR_PREFIX_P (name, "__vector")) &&
>(avr_warn_misspelled_isr))
>>  warning_at (loc, 0, "%qs appears to be a misspelled %s
>handler",
>
>If, instead of the "0" the respective OPT_... enum is used in the call 
>to warning_at, the -Werror= should work as expected (and explicit "&& 
>avr_warn_misspelled_isr" no more needed).

And maybe even mention __vector in the message?
thanks,



[PATCH], PowerPC: Allow DImode in Altivec registers

2016-06-13 Thread Michael Meissner
This patch goes through the PowerPC compiler and adds support to allow DImode
(64-bit integers) into Altivec registers for VSX systems.  It also adds some
support to allow loading some DImode constants via either ISA 2.07 or ISA 3.0
instructions.

I have bootstrapped this with no regressions on both a big endian power7 system
and a little endian power8 system.

I have run a Spec 2006 INT tests with these changes, and the run times were
comparable between the original compiler and the compiler with the changes.

Are these changes ok to install in the trunk?  Assuming they go in the trunk,
can I install them in the 6.2 branch if they cause no regression?

Note, I will be away from the office, starting Thursday afternoon (June 16th,
2016) and I will return on Monday (June 20th, 2016).  I will not have easy
access to email during this time.

[gcc]
2016-06-13  Michael Meissner  

* config/rs6000/vsx.md (VSINT_84): Add DImode to enable loading
DImode constants with XXSPLTIB in vector registers.
(vsx_extract_, V2DImode/V2DFmode): Combine both
vsx_extract__internal{1,2} into a single insn that handles
direct move (both ISA 2.07 and ISA 3.0 versions), and optimizes
extraction of the element at the top of the register as a scalar
value.
(vsx_extract__internal1): Likewise.
(vsx_extract__internal2): Likewise.
* config/rs6000/constraints.md (wi constraint): Remove a comment
about DImode not being allowed in Altivec registers.
(wB constraint): New constraint for constants that can be
generated in Altivec registers with VSPLTISW/VUPKHSW.
* config/rs6000/predicates.md (xxspltib_constant_split): Update
comments.
(xxspltib_constant_nosplit): Likewise.
* config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Add
support for -mupper-regs-di to enable DImode to go into Altivec
registers.
(POWERPC_MASKS): Likewise.
(power7 cpu): Likewise.
* config/rs6000/rs6000.opt (-mupper-regs-di): Likewise.
* config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Add support
for DImode being allowed in Altivec registers.  Update wi/wj
constraints.  Set scalar_in_vmx_p flag.
(rs6000_option_override_internal): Add checks for -mupper-regs-di.
(xxspltib_constant_p): Allow CONST_INT's with VOIDmode.  Don't
return true if we could use VSPLTISW/VUPKHSW instead of XXSPLTIB.
(rs6000_opt_masks): Add -mupper-regs-di.
* config/rs6000/rs6000.md (lfiwax): Update clobbers that don't use
direct move to use wi and now wj.
(lfiwzx): Likewise.
(floatsi2_lfiwax_mem): Combine alternatives into a single
alternative.
(floatunssi2_lfiwzx_mem): Likewise.
(fix_truncdi2_fctidz): Change second alternative to allow
any VSX register, instead of just Altivec registers, to allow
either operand to be an Altivec register or both.
(fixuns_truncdi2_fctiduz): Likewise.
(movdi_internal32): Add support for -mupper-regs-di.  Add support
to load constants via XXSPLTIB or VSPLTISW.  Add spacing to allow
the alternatives and attributes to be lined up to be easier to
read.
(movdi_internal64): Likewise.
(64-bit DImode splitters): Change predicates to only split loading
up GPR registers.  Add splits for using XXSPLTIB or VSPLTISW to
load constants in ISA 3.0 or ISA 2.07 respectively.
* doc/invoke.texi (RS/6000 and PowerPC Options): Document
-mupper-regs-di.  Update -mupper-regs-df and -mupper-regs-sf to
mention -mcpu=power9 sets these options.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
wB constraint.

[gcc/testsuite]
2016-06-13  Michael Meissner  

* gcc.target/powerpc/p9-dimode1.c: New test.
* gcc.target/powerpc/p9-dimode2.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797



[Bug tree-optimization/71490] [7 regression] gcc.dg/tree-ssa/slsr-8.c FAILs

2016-06-13 Thread thopre01 at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71490

--- Comment #3 from Thomas Preud'homme  ---
Differences start at sink phase:

@@ -46,17 +56,17 @@ f (int s, int * c)
   _2 = a1.0_1 * 4;
   _3 = -_2;
   x1_14 = c_13(D) + _3;
-  a2_15 = s_11(D) * 4;
-  a2.1_4 = (unsigned int) a2_15;
-  _5 = a2.1_4 * 4;
-  _6 = -_5;
-  x2_16 = c_13(D) + _6;
   if (x1_14 != 0B)
 goto ;
   else
 goto ;

   :
+  a2_15 = s_11(D) * 4;
+  a2.1_4 = (unsigned int) a2_15;
+  _5 = a2.1_4 * 4;
+  _6 = -_5;
+  x2_16 = c_13(D) + _6;
   goto ;

   :

[Bug fortran/71523] Static variables given automatic initializers with -finit-* and -fmax-stack-var-size

2016-06-13 Thread fritzoreese at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71523

Fritz Reese  changed:

   What|Removed |Added

 CC||fritzoreese at gmail dot com

--- Comment #1 from Fritz Reese  ---
Created attachment 38697
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=38697=edit
Patch for pr71523

Patch submitted, see https://gcc.gnu.org/ml/fortran/2016-06/msg00032.html

[PATCH, Fortran] PR71523 - Static variables given automatic initializers with -finit-* and -fmax-stack-var-size

2016-06-13 Thread Fritz Reese
RE: https://gcc.gnu.org/ml/fortran/2016-06/msg00023.html

On Thu, Jun 9, 2016 at 2:01 PM, Fritz Reese  wrote:
> It looks like when -fautomatic and -finit-local-zero are set with
> -fmax-stack-var-size=X, an automatic initializer is generated even for
> variables larger than X which are given static storage, causing such
> static variables to have their value re-initialized upon each entry to
> their namespace.
> ...


After doing more research I noticed PR41860
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=41860) was very similar
to this issue, so I've decided this is a bug and created PR71523
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71523). Here's a patch
for it.

The bug seems to be due to an oversight - since the size of a variable
is not known at resolution time when initializer expressions are
applied, -finit-* is too greedy in the case that the variable is large
enough to be removed from the stack according to -fmax-stack-var-size.
This patch removes automatic initializers at translation time which
were inserted by -finit-* (and inserts the appropriate static
initializer) according to -fmax-stack-var-size.

The patch passes all regression tests (on x86_64-redhat-linux),
including the two additional tests of its own demonstrating the issue.

---
Fritz Reese


0001-PR-Fortran-71523.patch
Description: Binary data


Re: Update probabilities in predict.def to match reality

2016-06-13 Thread Kyrill Tkachov

Hi Honza,

On 07/06/16 20:27, Jan Hubicka wrote:

Hello,
Maritn Liska measured branch predictor hitrates on current tree and SPEC2006.

CPU2006
HEURISTICS   BRANCHES  (REL)  HITRATE
COVERAGE COVERAGE  (REL)
loop iv compare33   0.1%  20.27% /  86.24%   
30630826   30.63M   0.0%
no prediction   10406  19.5%  33.41% /  84.76%   
139755242456  139.76G  14.1%
early return (on trees)  6328  11.9%  54.20% /  86.48%
33569991740   33.57G   3.4%
guessed loop iterations   112   0.2%  62.06% /  64.49%  
958458522  958.46M   0.1%
fail alloc595   1.1%  62.18% / 100.00%  
  595   595.00   0.0%
opcode values positive (on trees)4266   8.0%  64.30% /  91.28%
16931889792   16.93G   1.7%
opcode values nonequal (on trees)6600  12.4%  66.23% /  80.60%
71483051282   71.48G   7.2%
continue  507   0.9%  66.66% /  82.85%
10086808016   10.09G   1.0%
call11351  21.3%  67.16% /  92.24%
34680666103   34.68G   3.5%
loop iterations  2689   5.0%  67.99% /  67.99%   
408309517405  408.31G  41.3%
DS theory   26385  49.4%  68.62% /  85.44%   
146974369890  146.97G  14.9%
const return  271   0.5%  69.39% /  87.09%  
301566712  301.57M   0.0%
pointer (on trees)   6230  11.7%  69.59% /  87.18%
16667735314   16.67G   1.7%
combined53398 100.0%  70.31% /  80.36%   
989164856862  989.16G 100.0%
goto   78   0.1%  70.36% /  96.96%  
951041538  951.04M   0.1%
first match 16607  31.1%  78.00% /  78.42%   
702435244516  702.44G  71.0%
extra loop exit   141   0.3%  82.80% /  88.17% 
16969469421.70G   0.2%
null return   393   0.7%  91.47% /  93.08% 
32686781973.27G   0.3%
loop exit9909  18.6%  91.80% /  92.81%   
282927773783  282.93G  28.6%
guess loop iv compare 178   0.3%  97.81% /  97.85% 
43750864534.38G   0.4%
negative return   277   0.5%  97.94% /  99.23% 
10621190281.06G   0.1%
noreturn call2372   4.4% 100.00% / 100.00% 
83565623238.36G   0.8%
overflow 1282   2.4% 100.00% / 100.00%  
175074177  175.07M   0.0%
zero-sized array  677   1.3% 100.00% / 100.00%  
112723803  112.72M   0.0%
unconditional jump103   0.2% 100.00% / 100.00% 
491001  491.00K   0.0%

We used to track SPEC2000 until 2008 but then the infrastructure broke. The
numbers show some differences to 2008 results:

HEURISTICS BRANCHES  (REL)  HITRATE  COVERAGE  (REL)
DS theory 42611  57.1%  74.54% /  89.71%   9237799352  28.7%
combined  74578 100.0%  72.88% /  90.59%  32201983315 100.0%
opcode values nonequal (on trees)14544  19.5%  72.03% /  88.64%   
3387233627  10.5%
early return (on trees)   11078  14.9%  61.23% /  89.25%   2349499033   7.3%
first match   13249  17.8%  89.11% /  93.08%  15876522911  49.3%
guessed loop iterations2722   3.6%  86.50% /  90.76%   7308035517  22.7%
no prediction 18718  25.1%  34.36% /  86.14%   7087661052  22.0%
call  23937  32.1%  71.38% /  93.08%   3829002205  11.9%
opcode values positive (on trees) 2515   3.4%  72.77% /  86.49%
927995806   2.9%
loop branch 378   0.5%  87.61% /  95.54%   1491510452   4.6%
loop exit  8833  11.8%  91.43% /  94.52%   6538486043  20.3%
loop iterations 912   1.2%  99.11% /  99.11%396451321   1.2%
noreturn call   890   1.2%  99.99% /  99.99%205957905   0.6%
pointer (on trees) 8394  11.3%  85.09% /  94.80%   1315262058   4.1%
negative return 272   0.4%  96.47% /  99.74% 49156319   0.1%
const return551   0.7%  67.92% /  68.97% 96082001   0.3%
__builtin_expect 20   0.0%  0% /  0%0   0.0%
null return 566   0.8%  96.58% /  98.77% 87555632   0.3%

There is some degradation in the combined heuristicshitrate (72.8->70) which 
may be caused
simply by fact that new spec is harder to guess. Main decrease seems to be in 
opcode_positive/nonequal
which may be also attributed to the fact that early opts now optimize out more 
code before
we do the statistics.

There are bugs in few predictors - goto predictor is dead because the FE code 
was dropped,
return predictor is bit random because CFG is optimized (it should probably be 
done in 

Re: [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c

2016-06-13 Thread Thomas Schwinge
Hi!

On Mon, 13 Jun 2016 16:48:56 +0200, Jakub Jelinek  wrote:
> On Mon, Jun 13, 2016 at 04:43:25PM +0200, Thomas Schwinge wrote:
> > On Wed, 01 Jun 2016 17:06:42 +0200, Thomas Schwinge 
> >  wrote:
> > > Here are the OpenACC bits of .
> > 
> > In the PR, Jakub clarified that all the missing other OMP_CLAUSE_* are in
> > fact all unreachable here.

> > The "anything else" default case in fact now is just the non-clause
> > OMP_CLAUSE_ERROR, so when adding a case for that one, we could then
> > remove the default case, and thus get a compiler warning when new clauses
> > are added in the future, without handling them here.  That makes sense to
> > me (would have made apparent much earlier the original problem of missing
> > handling for certain OMP_CLAUSE_*), but based on feedback received, it
> > feels as if I'm the only supporter of such "defensive" programming
> > paradigms?

Any thoughts about that,
?

> > [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c

> Ok, [...]

As posted, committed to trunk in r237386:

commit be2a5a8e8ffd13c099d372c4fcc363d5cd3c83c2
Author: tschwinge 
Date:   Mon Jun 13 16:37:29 2016 +

[PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c

gcc/
PR middle-end/71373
* tree-nested.c (convert_nonlocal_omp_clauses)
(convert_local_omp_clauses): Document missing OMP_CLAUSE_*.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237386 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog |  4 
 gcc/tree-nested.c | 60 ++-
 2 files changed, 46 insertions(+), 18 deletions(-)

diff --git gcc/ChangeLog gcc/ChangeLog
index ff685b1..89098e7 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,5 +1,9 @@
 2016-06-13  Thomas Schwinge  
 
+   PR middle-end/71373
+   * tree-nested.c (convert_nonlocal_omp_clauses)
+   (convert_local_omp_clauses): Document missing OMP_CLAUSE_*.
+
* tree-cfg.c (edge_to_cases_cleanup): Fix CASE_CHAIN typo.
* tree.def (CASE_LABEL_EXPR): Likewise.
 
diff --git gcc/tree-nested.c gcc/tree-nested.c
index 812f619..62cb01f 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1203,17 +1203,29 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct 
walk_stmt_info *wi)
case OMP_CLAUSE_AUTO:
  break;
 
+ /* OpenACC tile clauses are discarded during gimplification.  */
case OMP_CLAUSE_TILE:
- /* OpenACC tile clauses are discarded during gimplification, so we
-don't expect to see anything here.  */
- gcc_unreachable ();
-
+ /* The following clause belongs to the OpenACC cache directive, which
+is discarded during gimplification.  */
case OMP_CLAUSE__CACHE_:
- /* These clauses belong to the OpenACC cache directive, which is
-discarded during gimplification, so we don't expect to see
-anything here.  */
- gcc_unreachable ();
-
+ /* The following clauses are only allowed in the OpenMP declare simd
+directive, so not seen here.  */
+   case OMP_CLAUSE_UNIFORM:
+   case OMP_CLAUSE_INBRANCH:
+   case OMP_CLAUSE_NOTINBRANCH:
+ /* The following clauses are only allowed on OpenMP cancel and
+cancellation point directives, which at this point have already
+been lowered into a function call.  */
+   case OMP_CLAUSE_FOR:
+   case OMP_CLAUSE_PARALLEL:
+   case OMP_CLAUSE_SECTIONS:
+   case OMP_CLAUSE_TASKGROUP:
+ /* The following clauses are only added during OMP lowering; nested
+function decomposition happens before that.  */
+   case OMP_CLAUSE__LOOPTEMP_:
+   case OMP_CLAUSE__SIMDUID_:
+   case OMP_CLAUSE__GRIDDIM_:
+ /* Anything else.  */
default:
  gcc_unreachable ();
}
@@ -1899,17 +1911,29 @@ convert_local_omp_clauses (tree *pclauses, struct 
walk_stmt_info *wi)
case OMP_CLAUSE_AUTO:
  break;
 
+ /* OpenACC tile clauses are discarded during gimplification.  */
case OMP_CLAUSE_TILE:
- /* OpenACC tile clauses are discarded during gimplification, so we
-don't expect to see anything here.  */
- gcc_unreachable ();
-
+ /* The following clause belongs to the OpenACC cache directive, which
+is discarded during gimplification.  */
case OMP_CLAUSE__CACHE_:
- /* These clauses belong to the OpenACC cache directive, which is
-discarded during gimplification, so we don't expect to see
-anything here.  */
- gcc_unreachable ();
-
+ /* The following clauses are only allowed in the 

[Bug middle-end/71524] New: [7 Regression] internal compiler error: in binds_to_current_def_p, at symtab.c:2232

2016-06-13 Thread hjl.tools at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71524

Bug ID: 71524
   Summary: [7 Regression] internal compiler error: in
binds_to_current_def_p, at symtab.c:2232
   Product: gcc
   Version: 7.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: middle-end
  Assignee: unassigned at gcc dot gnu.org
  Reporter: hjl.tools at gmail dot com
  Target Milestone: ---

[hjl@gnu-13 gcc]$ cat /tmp/x.c
static int implementation2 (void)
{
  return 2;
}

static void *resolver2 (void)
{
  return (void *)implementation2;
}

static int func2 (void) __attribute__ ((ifunc ("resolver2")));

void *
get_func2 (void)
{
  return 
}

int
call_func2 (void)
{
  if (get_func2 () != )
__builtin_abort ();

  return func2 ();
}
[hjl@gnu-13 gcc]$ ./xgcc -B./ -m32 -O3 -S /tmp/x.c -o /tmp/x.s 
func2/2 (func2) @0x7f7ce5b892e0
  Type: function definition analyzed alias
  Visibility: prevailing_def_ironly
  Address is taken.
  References: resolver2/1 (alias)
  Referring: get_func2/3 (addr)
  Availability: overwritable
  First run: 0
  Function flags:
  Called by: call_func2/4 (1.00 per call) 
  Calls: 
/tmp/x.c:26:1: internal compiler error: in binds_to_current_def_p, at
symtab.c:2232
 }
 ^
0x7469bc symtab_node::binds_to_current_def_p(symtab_node*)
../../src-trunk/gcc/symtab.c:2232
0x126c68f worse_state
../../src-trunk/gcc/ipa-pure-const.c:477
0x126c68f propagate_pure_const
../../src-trunk/gcc/ipa-pure-const.c:1343
0x126c68f execute
../../src-trunk/gcc/ipa-pure-const.c:1676
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.
[hjl@gnu-13 gcc]$

[Bug middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition

2016-06-13 Thread tschwinge at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71373

--- Comment #9 from Thomas Schwinge  ---
Author: tschwinge
Date: Mon Jun 13 16:37:29 2016
New Revision: 237386

URL: https://gcc.gnu.org/viewcvs?rev=237386=gcc=rev
Log:
[PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c

gcc/
PR middle-end/71373
* tree-nested.c (convert_nonlocal_omp_clauses)
(convert_local_omp_clauses): Document missing OMP_CLAUSE_*.

Modified:
trunk/gcc/ChangeLog
trunk/gcc/tree-nested.c

[Patch AArch64] Add some more missing intrinsics

2016-06-13 Thread James Greenhalgh

Hi,

Inspired by Jiong's recent work, here are some more missing intrinsics,
and a smoke test for each of them.

This patch covers:

  vcvt_n_f64_s64
  vcvt_n_f64_u64
  vcvt_n_s64_f64
  vcvt_n_u64_f64
  vcvt_f64_s64
  vrecpe_f64
  vcvt_f64_u64
  vrecps_f64

Tested on aarch64-none-elf, and on an internal testsuite for Neon
intrinsics.

Note that the new tests will ICE without the fixups in
https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00805.html

OK?

Thanks,
James

---
gcc/ChangeLog

2016-06-10  James Greenhalgh  

* config/aarch64/arm_neon.h (vcvt_n_f64_s64): New.
(vcvt_n_f64_u64): Likewise.
(vcvt_n_s64_f64): Likewise.
(vcvt_n_u64_f64): Likewise.
(vcvt_f64_s64): Likewise.
(vrecpe_f64): Likewise.
(vcvt_f64_u64): Likewise.
(vrecps_f64): Likewise.

gcc/testsuite/ChangeLog

2016-06-10  James Greenhalgh  

* gcc.target/aarch64/vcvt_f64_1.c: New.
* gcc.target/aarch64/vcvt_n_f64_1.c: New.
* gcc.target/aarch64/vrecp_f64_1.c: New.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index f70b6d3..2f90938 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -12447,6 +12447,20 @@ vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
   return __builtin_aarch64_ucvtfv2si_sus (__a, __b);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcvt_n_f64_s64 (int64x1_t __a, const int __b)
+{
+  return (float64x1_t)
+{ __builtin_aarch64_scvtfdi (vget_lane_s64 (__a, 0), __b) };
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcvt_n_f64_u64 (uint64x1_t __a, const int __b)
+{
+  return (float64x1_t)
+{ __builtin_aarch64_ucvtfdi_sus (vget_lane_u64 (__a, 0), __b) };
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
 {
@@ -12509,6 +12523,20 @@ vcvt_n_u32_f32 (float32x2_t __a, const int __b)
   return __builtin_aarch64_fcvtzuv2sf_uss (__a, __b);
 }
 
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vcvt_n_s64_f64 (float64x1_t __a, const int __b)
+{
+  return (int64x1_t)
+{ __builtin_aarch64_fcvtzsdf (vget_lane_f64 (__a, 0), __b) };
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcvt_n_u64_f64 (float64x1_t __a, const int __b)
+{
+  return (uint64x1_t)
+{ __builtin_aarch64_fcvtzudf_uss (vget_lane_f64 (__a, 0), __b) };
+}
+
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
 {
@@ -12571,6 +12599,18 @@ vcvt_f32_u32 (uint32x2_t __a)
   return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcvt_f64_s64 (int64x1_t __a)
+{
+  return (float64x1_t) { vget_lane_s64 (__a, 0) };
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcvt_f64_u64 (uint64x1_t __a)
+{
+  return (float64x1_t) { vget_lane_u64 (__a, 0) };
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vcvtq_f32_s32 (int32x4_t __a)
 {
@@ -20659,6 +20699,12 @@ vrecpe_f32 (float32x2_t __a)
   return __builtin_aarch64_frecpev2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrecpe_f64 (float64x1_t __a)
+{
+  return (float64x1_t) { vrecped_f64 (vget_lane_f64 (__a, 0)) };
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrecpeq_f32 (float32x4_t __a)
 {
@@ -20691,6 +20737,13 @@ vrecps_f32 (float32x2_t __a, float32x2_t __b)
   return __builtin_aarch64_frecpsv2sf (__a, __b);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrecps_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return (float64x1_t) { vrecpsd_f64  (vget_lane_f64 (__a, 0),
+   vget_lane_f64 (__b, 0)) };
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/vcvt_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vcvt_f64_1.c
new file mode 100644
index 000..b7ee7af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vcvt_f64_1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include "arm_neon.h"
+
+/* For each of these intrinsics, we're mapping to a simple C cast.
+   While the compiler has some freedom in terms of choice of instruction,
+   we'd hope that for this simple case it would always pick the single
+   instruction form given in these tests.  Anything else is likely a
+   regression, so check for an exact instruction pattern and
+   register allocation decision.  */
+
+/* Test that if we have a value already in Advanced-SIMD registers, we use
+   the scalar register forms.  */
+
+float64x1_t

[Bug fortran/71523] New: Static variables given automatic initializers with -finit-* and -fmax-stack-var-size

2016-06-13 Thread fritzoreese at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71523

Bug ID: 71523
   Summary: Static variables given automatic initializers with
-finit-* and -fmax-stack-var-size
   Product: gcc
   Version: 7.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: fortran
  Assignee: unassigned at gcc dot gnu.org
  Reporter: fritzoreese at gmail dot com
  Target Milestone: ---

This bug is similar to pr41860. When a variable becomes static due to being
larger than -fmax-stack-var-size, the variable is still given an automatic
initializer on entry to its namespace by the -finit-* flags (-finit-integer,
-finit-local-zero, etc...)

This is probably an oversight due to the fact that the initializers are
generated at resolution time in resolve.c, but the size of the variable is not
known until translation time (in trans-decl.c).

S.A. the thread at https://gcc.gnu.org/ml/fortran/2016-06/msg00023.html

Re: [PATCH][vectorizer][2/2] PR 65951: Hook up mult synthesis logic into vectorisation of mult-by-constant

2016-06-13 Thread Kyrill Tkachov


On 13/06/16 15:48, Marc Glisse wrote:

+  /* All synthesis algorithms require shifts, so bail out early if
+ target cannot vectorize them.  */
+  if (!target_has_vecop_for_code (LSHIFT_EXPR, vectype))
+return false;

Hmm, 2 points:

* Could you use vect_supportable_shift (or equivalent) instead? This way it will work even 
if a target/mode supports vector << scalar and not vector << vector.



Ok, will do.

* This means that we will refuse to vectorize x*2 as x+x, which was the goal of my patch (SPARC VIS has additions, no shift, and limited multiplications, IIRC). I guess it would be possible, as a follow-up (it doesn't have to block your 
patch), not to give up in the no-shift branch, but to handle some small factors with only additions and subtractions. Or to split the emission of shifts to a function that, when shifts are not supported, emulates them with additions. Or 
even emit shifts and rely on expand or vector lowering to turn them to additions (though the estimated cost might be off). Any idea on the best way to handle SPARC?




This is my first time touching the vectorizer so I don't know for sure what 
would be the preferred approach.
Looks like expand_shift_1 in expmed.c already has code to expand a shift as 
additions, though it's gated on rtx costs
which I suppose SPARC won't implement accurately for vector shifts since it 
doesn't support them.
I suppose that code could easily be factored out to do the right thing though.

I think splitting emission of shifts into a function that synthesises them with 
additions when appropriate
would be best.

Kyrill



Fix CASE_CHAIN typos (was: [patch] Fix CASE_LABEL_EXPR documentation in tree.def and tree-cfg.c)

2016-06-13 Thread Thomas Schwinge
Hi!

On Wed, 18 Apr 2012 17:32:08 +0200, Steven Bosscher  
wrote:
> Subject says all. Will commit as obvious.
> 
> * tree.def (CASE_LABEL_EXPR): Fix documentation, mention all operands.
> * tree-cfg.c (edge_to_cases): Fix documentation.

> --- tree.def(revision 186526)
> +++ tree.def(working copy)
> @@ -876,10 +876,16 @@ DEFTREECODE (LOOP_EXPR, "loop_expr", tcc_statement
>   of all the cases.  */
>  DEFTREECODE (SWITCH_EXPR, "switch_expr", tcc_statement, 3)
> 
> -/* Used to represent a case label. The operands are CASE_LOW and
> -   CASE_HIGH, respectively. If CASE_LOW is NULL_TREE, the label is a
> -   'default' label. If CASE_HIGH is NULL_TREE, the label is a normal case
> -   label.  CASE_LABEL is the corresponding LABEL_DECL.  */
> +/* Used to represent a case label.
> +
> +   Operand 0 is CASE_LOW.  It may be NULL_TREE, in which case the label
> + is a 'default' label.
> +   Operand 1 is CASE_HIGH.  If it is NULL_TREE, the label is a simple
> + (one-value) case label.  If it is non-NULL_TREE, the case is a range.
> +   Operand 2 is CASE_LABEL, which is is the corresponding LABEL_DECL.
> +   Operand 4 is CASE_CHAIN.  This operand is only used in tree-cfg.c to
> + speed up the lookup of case labels which use a particular edge in
> + the control flow graph.  */
>  DEFTREECODE (CASE_LABEL_EXPR, "case_label_expr", tcc_statement, 4)

Typo: the last one's operand 3 not 4.  ;-)

> --- tree-cfg.c  (revision 186526)
> +++ tree-cfg.c  (working copy)
> @@ -56,7 +56,7 @@ static const int initial_cfg_capacity = 20;
> 
>  /* This hash table allows us to efficiently lookup all CASE_LABEL_EXPRs
> which use a particular edge.  The CASE_LABEL_EXPRs are chained together
> -   via their TREE_CHAIN field, which we clear after we're done with the
> +   via their CASE_CHAIN field, which we clear after we're done with the
> hash table to prevent problems with duplication of GIMPLE_SWITCHes.
> 
> Access to this list of CASE_LABEL_EXPRs allows us to efficiently

The thing doing the "clear after we're done" likewise needs to get its
documentation updated.  ;-)

As obvious, committed to trunk in r237384:

commit 00091facd9b1a23f371a11b4c48e7a106f6d1011
Author: tschwinge 
Date:   Mon Jun 13 16:10:35 2016 +

Fix CASE_CHAIN typos

gcc/
* tree-cfg.c (edge_to_cases_cleanup): Fix CASE_CHAIN typo.
* tree.def (CASE_LABEL_EXPR): Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237384 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog  | 5 +
 gcc/tree-cfg.c | 2 +-
 gcc/tree.def   | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git gcc/ChangeLog gcc/ChangeLog
index c2f0f7e..733e512 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-06-13  Thomas Schwinge  
+
+   * tree-cfg.c (edge_to_cases_cleanup): Fix CASE_CHAIN typo.
+   * tree.def (CASE_LABEL_EXPR): Likewise.
+
 2016-06-13  Bernd Edlinger  
 
* input.c (test_builtins): Fix an assertion.
diff --git gcc/tree-cfg.c gcc/tree-cfg.c
index 40e524b..0fac49c 100644
--- gcc/tree-cfg.c
+++ gcc/tree-cfg.c
@@ -1126,7 +1126,7 @@ make_cond_expr_edges (basic_block bb)
 /* Called for each element in the hash table (P) as we delete the
edge to cases hash table.
 
-   Clear all the TREE_CHAINs to prevent problems with copying of
+   Clear all the CASE_CHAINs to prevent problems with copying of
SWITCH_EXPRs and structure sharing rules, then free the hash table
element.  */
 
diff --git gcc/tree.def gcc/tree.def
index d16575a..2c35540 100644
--- gcc/tree.def
+++ gcc/tree.def
@@ -949,7 +949,7 @@ DEFTREECODE (SWITCH_EXPR, "switch_expr", tcc_statement, 3)
Operand 1 is CASE_HIGH.  If it is NULL_TREE, the label is a simple
  (one-value) case label.  If it is non-NULL_TREE, the case is a range.
Operand 2 is CASE_LABEL, which is is the corresponding LABEL_DECL.
-   Operand 4 is CASE_CHAIN.  This operand is only used in tree-cfg.c to
+   Operand 3 is CASE_CHAIN.  This operand is only used in tree-cfg.c to
  speed up the lookup of case labels which use a particular edge in
  the control flow graph.  */
 DEFTREECODE (CASE_LABEL_EXPR, "case_label_expr", tcc_statement, 4)


Grüße
 Thomas


signature.asc
Description: PGP signature


[libiberty][PATCH] Avoid zero-length VLAs.

2016-06-13 Thread Brooks Moses
Zero-length variable-length-arrays are not allowed in standard C99,
and perhaps more importantly, they cause ASAN to complain.  (See,
e.g., https://gcc.gnu.org/ml/gcc-patches/2013-09/msg00917.html.)

With this patch, the libiberty tests, including demangler-fuzzer, are
ASAN-clean.

- Brooks



 libiberty/ChangeLog 
--- a/libiberty/ChangeLog
+++ b/libiberty/ChangeLog
@@ -1,3 +1,8 @@
+2016-06-12  Brooks Moses  
+
+   * cp-demangle.c (cplus_demangle_print_callback): Avoid zero-length
+   VLAs.
+
 2016-05-31  Alan Modra  

* xmemdup.c (xmemdup): Use xmalloc rather than xcalloc.
 libiberty/cp-demangle.c 
--- a/libiberty/cp-demangle.c
+++ b/libiberty/cp-demangle.c
@@ -4120,8 +4120,10 @@

   {
 #ifdef CP_DYNAMIC_ARRAYS
-__extension__ struct d_saved_scope scopes[dpi.num_saved_scopes];
-__extension__ struct d_print_template temps[dpi.num_copy_templates];
+__extension__ struct d_saved_scope scopes[(dpi.num_saved_scopes > 0)
+ ? dpi.num_saved_scopes : 1];
+__extension__ struct d_print_template temps[(dpi.num_copy_templates > 0)
+   ? dpi.num_copy_templates : 1];

 dpi.saved_scopes = scopes;
 dpi.copy_templates = temps;


[Bug c++/70507] integer overflow builtins not constant expressions

2016-06-13 Thread msebor at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70507

Martin Sebor  changed:

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

--- Comment #6 from Martin Sebor  ---
r237238 lets GCC 7 accept the integer overflow built-ins in constant
expressions whenever their arguments are.

[Bug c/68120] can't easily deal with integer overflow at compile time

2016-06-13 Thread msebor at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68120

Martin Sebor  changed:

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

--- Comment #8 from Martin Sebor  ---
r237238 lets GCC 7 accept the integer overflow built-ins in constant
expressions whenever their arguments are.

Re: [PATCH] Fix bootstrap when user language is not english

2016-06-13 Thread Jakub Jelinek
On Mon, Jun 13, 2016 at 03:39:21PM +, Bernd Edlinger wrote:
> On 06/13/16 17:27, David Malcolm wrote:
> > On Mon, 2016-06-13 at 14:41 +, Bernd Edlinger wrote:
> >> Hi,
> >>
> >> as noted in PR bootstrap/71481, comment#4 currently
> >> the trunk fails to bootstrap if the current language is
> >> not english.  A workaround is possible by setting LANG=C,
> >> but OTOH it is rather easy to fix, by translating the string
> >> in the assertion, as it is the only place that is affected by
> >> the language setting.
> >>
> >>
> >> Boot-strapped and reg-tested on trunk with LANG=de_DE.UTF-8.
> >> OK to commit?
> >
> > Sorry about the breakage.
> >
> > I believe I can approve this with my "libcpp"/"diagnostics" hats on, so
> > LGTM.
> >
> 
> Thanks.

Please put PR bootstrap/71481 into the ChangeLog entry though.

> > That said, should we hardcode LANG=C when running the selftests from
> > gcc/Makefile.in?
> >
> 
> Honestly, I am glad to see that there is some sort of unit test which
> runs in a different LANG setting than the rest of the testsuite.
> Because as this incident clearly shows, there _can_ be bugs that do not
> show up in the default locale.
> 
> I would put the question this way: could it be possible to run also
> some tests in the testsuite with a LANG setting different from "C"?

I think running the s-selftest in C locale is a good idea, but maybe
we should have some test in gcc.dg or where that would run -fself-tests
in some other locale.  I think right now we force LC_ALL=C for all tests,
but perhaps /* { dg-set-compiler-env-var LC_ALL "something" } */
would work.  But perhaps we'd need some tcl test for whether the locale is
supported by the system.

Jakub


Re: [patch, avr] Fix PR67353

2016-06-13 Thread Georg-Johann Lay

Pitchumani Sivanupandi schrieb:

Hi,

This patch introduces new flags for warning 'misspelled interrupt/
signal handler'. Flag -Wmisspelled-isr is enabled by default and it
will warn user if the interrupt/ signal handler is without '__vector'
prefix. Flag -Wno-misspelled-isr shall be enabled by user to allow
custom names, i.e. without __vector prefix.

// avr-gcc -c test.c
void custom_interruption(void) __attribute__((signal));
void custom_interruption(void) {}

Behavior after applying this patch:

$ avr-gcc test.c 
test.c: In function 'custom_interruption':

test.c:2:6: warning: 'custom_interruption' appears to be a misspelled
signal handler
 void custom_interruption(void) {}
  ^~~

$ avr-gcc test.c -Wmisspelled-isr
test.c: In function
'custom_interruption':
test.c:2:6: warning: 'custom_interruption'
appears to be a misspelled signal handler
 void
custom_interruption(void) {}
  ^~~

$ avr-gcc test.c -Wno-misspelled-isr
$


What about -Werror=misspelled-isr?

> [...]

diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index ba5cd91..587bdbc 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -753,7 +753,7 @@ avr_set_current_function (tree decl)
  that the name of the function is "__vector_NN" so as to catch
  when the user misspells the vector name.  */
 
-  if (!STR_PREFIX_P (name, "__vector"))

+  if ((!STR_PREFIX_P (name, "__vector")) && (avr_warn_misspelled_isr))
 warning_at (loc, 0, "%qs appears to be a misspelled %s handler",


If, instead of the "0" the respective OPT_... enum is used in the call 
to warning_at, the -Werror= should work as expected (and explicit "&& 
avr_warn_misspelled_isr" no more needed).


Johann


Re: [PATCH] Fix bootstrap when user language is not english

2016-06-13 Thread Bernd Edlinger
On 06/13/16 17:27, David Malcolm wrote:
> On Mon, 2016-06-13 at 14:41 +, Bernd Edlinger wrote:
>> Hi,
>>
>> as noted in PR bootstrap/71481, comment#4 currently
>> the trunk fails to bootstrap if the current language is
>> not english.  A workaround is possible by setting LANG=C,
>> but OTOH it is rather easy to fix, by translating the string
>> in the assertion, as it is the only place that is affected by
>> the language setting.
>>
>>
>> Boot-strapped and reg-tested on trunk with LANG=de_DE.UTF-8.
>> OK to commit?
>
> Sorry about the breakage.
>
> I believe I can approve this with my "libcpp"/"diagnostics" hats on, so
> LGTM.
>

Thanks.

> That said, should we hardcode LANG=C when running the selftests from
> gcc/Makefile.in?
>

Honestly, I am glad to see that there is some sort of unit test which
runs in a different LANG setting than the rest of the testsuite.
Because as this incident clearly shows, there _can_ be bugs that do not
show up in the default locale.

I would put the question this way: could it be possible to run also
some tests in the testsuite with a LANG setting different from "C"?



Bernd.


Re: [PATCH] Fix bootstrap when user language is not english

2016-06-13 Thread David Malcolm
On Mon, 2016-06-13 at 14:41 +, Bernd Edlinger wrote:
> Hi,
> 
> as noted in PR bootstrap/71481, comment#4 currently
> the trunk fails to bootstrap if the current language is
> not english.  A workaround is possible by setting LANG=C,
> but OTOH it is rather easy to fix, by translating the string
> in the assertion, as it is the only place that is affected by
> the language setting.
> 
> 
> Boot-strapped and reg-tested on trunk with LANG=de_DE.UTF-8.
> OK to commit?

Sorry about the breakage.

I believe I can approve this with my "libcpp"/"diagnostics" hats on, so
LGTM.

That said, should we hardcode LANG=C when running the selftests from
gcc/Makefile.in?


Dave


Re: [PATCH] Add ggc-tests.c

2016-06-13 Thread David Malcolm
On Mon, 2016-06-13 at 13:36 +0200, Ulrich Weigand wrote:
> Gerald Pfeifer wrote:
> 
> > The source code of need_finalization_p in ggc.h reads
> > 
> >template
> >static inline bool
> >need_finalization_p ()
> >{
> >#if GCC_VERSION >= 4003
> >  return !__has_trivial_destructor (T);
> >#else
> >  return true;
> >#endif
> >}
> > 
> > which means your self test is broken by design for any compiler
> > that is not GCC in at least version 4.3, isn't it?
> 
> Just to confirm that I'm seeing the same failure on my SPU
> daily build machine, which is running RHEL 5 with a host
> compiler of GCC 4.1.2.

Sorry about this.

Looks like Uros fixed this in r237381.


[Bug c++/71463] [6/7 regression] unexpected warning: ignoring function return attributes on template argument

2016-06-13 Thread msebor at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71463

Martin Sebor  changed:

   What|Removed |Added

 Status|UNCONFIRMED |NEW
  Known to work||5.4.0
   Keywords||diagnostic
   Last reconfirmed||2016-06-13
 CC||msebor at gcc dot gnu.org
 Ever confirmed|0   |1
Summary|"ignoring attributes on |[6/7 regression] unexpected
   |template argument" in -O1   |warning: ignoring function
   |and above   |return attributes on
   ||template argument
  Known to fail||6.1.0, 7.0

--- Comment #4 from Martin Sebor  ---
I looked into this a bit and I'm not sure the warning works quite as intended
in this case.

First, the warning doesn't say which of the attributes is ignored, and so when
multiple attributes are specified, it suggests that all of them are ignored. 
But tests with single attributes show that only some trigger the warning,
raising the question of whether the warning works correctly.

Second, it's unclear to me what purpose the warning is meant to serve in this
case.  Since a function attribute always applies to the instance of the
function it decorates and never affects its type the warning doesn't indicate
anything unusual or unexpected, and only serves to confuse users.  (In cases
where the function is declared in a system header it's also unclear how the
should be avoided.)

Looking at the history of the warning for the test case, it started with
r222530 committed to fix bug 50800 which has to do with type attributes, not
those of functions (or variables), and there is no test that verifies that it
should be issued for the case of functions (or variables).  I'm inclined to
agree that this is a bug.  Confirming as a 6/7 regression with the test case
below:

$ cat t.C && /home/msebor/build/gcc-6-branch/gcc/xgcc -B
/home/msebor/build/gcc-6-branch/gcc -S -Wall -Wextra -Wpedantic t.C
void* __attribute__ ((assume_aligned (32))) f0 ();
void* __attribute__ ((returns_nonnull)) f1 ();

void* __attribute__ ((const)) f2 ();
void* __attribute__ ((const, warn_unused_result)) f3 ();

template  struct S { };

S s0;
S s1;
S s2;   // no warning
S s3;   // which of the two attributes are ignored?
t.C:9:17: warning: ignoring attributes on template argument ‘void* (*)()’
[-Wignored-attributes]
 S s0;
 ^
t.C:10:17: warning: ignoring attributes on template argument ‘void* (*)()’
[-Wignored-attributes]
 S s1;
 ^
t.C:12:17: warning: ignoring attributes on template argument ‘void* (*)()’
[-Wignored-attributes]
 S s3;   // which of the two attributes are ignored?
 ^

[Bug c++/71516] [5/6/7 Regression] ICE on invalid C++ code (invalid use of forward declared type) on x86_64-linux-gnu: Segmentation fault (program cc1plus)

2016-06-13 Thread jakub at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71516

Jakub Jelinek  changed:

   What|Removed |Added

 Status|NEW |ASSIGNED
   Assignee|unassigned at gcc dot gnu.org  |jakub at gcc dot gnu.org

--- Comment #3 from Jakub Jelinek  ---
Created attachment 38696
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=38696=edit
gcc7-pr71516.patch

Untested fix.

[Bug tree-optimization/71483] [7 Regression] g++ ICE at -O3 on valid code on x86_64-linux-gnu with “Floating point exception”

2016-06-13 Thread alahay01 at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71483

alahay01 at gcc dot gnu.org changed:

   What|Removed |Added

 CC||alahay01 at gcc dot gnu.org
   Assignee|unassigned at gcc dot gnu.org  |alahay01 at gcc dot 
gnu.org

--- Comment #2 from alahay01 at gcc dot gnu.org ---
Due to error when vectorizing a live SLP operation.

[Bug tree-optimization/71490] [7 regression] gcc.dg/tree-ssa/slsr-8.c FAILs

2016-06-13 Thread thopre01 at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71490

Thomas Preud'homme  changed:

   What|Removed |Added

 CC||thopre01 at gcc dot gnu.org

--- Comment #2 from Thomas Preud'homme  ---
The FAIL started at r237185.

Re: [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c

2016-06-13 Thread Thomas Schwinge
Hi!

On Mon, 13 Jun 2016 16:43:25 +0200, Thomas Schwinge  
wrote:
> On Wed, 01 Jun 2016 17:06:42 +0200, Thomas Schwinge  
> wrote:
> > Here are the OpenACC bits of .
> 
> In the PR, Jakub clarified that all the missing other OMP_CLAUSE_* are in
> fact all unreachable here.  [...]
> 
> The "anything else" default case in fact now is just the non-clause
> OMP_CLAUSE_ERROR, so when adding a case for that one, we could then
> remove the default case, and thus get a compiler warning when new clauses
> are added in the future, without handling them here.  That makes sense to
> me (would have made apparent much earlier the original problem of missing
> handling for certain OMP_CLAUSE_*), but based on feedback received, it
> feels as if I'm the only supporter of such "defensive" programming
> paradigms?

That is, something like that:

--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1225,8 +1225,9 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct 
walk_stmt_info *wi)
case OMP_CLAUSE__LOOPTEMP_:
case OMP_CLAUSE__SIMDUID_:
case OMP_CLAUSE__GRIDDIM_:
- /* Anything else.  */
-   default:
+ /* This non-clause should never be seen outside of the front
+ends.  */
+   case OMP_CLAUSE_ERROR:
  gcc_unreachable ();
}
 }
@@ -1933,8 +1934,9 @@ convert_local_omp_clauses (tree *pclauses, struct 
walk_stmt_info *wi)
case OMP_CLAUSE__LOOPTEMP_:
case OMP_CLAUSE__SIMDUID_:
case OMP_CLAUSE__GRIDDIM_:
- /* Anything else.  */
-   default:
+ /* This non-clause should never be seen outside of the front
+ends.  */
+   case OMP_CLAUSE_ERROR:
  gcc_unreachable ();
}
 }


Grüße
 Thomas


Re: [PATCH][vectorizer][2/2] PR 65951: Hook up mult synthesis logic into vectorisation of mult-by-constant

2016-06-13 Thread Marc Glisse

+  /* All synthesis algorithms require shifts, so bail out early if
+ target cannot vectorize them.  */
+  if (!target_has_vecop_for_code (LSHIFT_EXPR, vectype))
+return false;

Hmm, 2 points:

* Could you use vect_supportable_shift (or equivalent) instead? This way 
it will work even if a target/mode supports vector << scalar and not 
vector << vector.


* This means that we will refuse to vectorize x*2 as x+x, which was the 
goal of my patch (SPARC VIS has additions, no shift, and limited 
multiplications, IIRC). I guess it would be possible, as a follow-up (it 
doesn't have to block your patch), not to give up in the no-shift branch, 
but to handle some small factors with only additions and subtractions. Or 
to split the emission of shifts to a function that, when shifts are not 
supported, emulates them with additions. Or even emit shifts and rely on 
expand or vector lowering to turn them to additions (though the estimated 
cost might be off). Any idea on the best way to handle SPARC?


--
Marc Glisse


Re: [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c

2016-06-13 Thread Jakub Jelinek
On Mon, Jun 13, 2016 at 04:43:25PM +0200, Thomas Schwinge wrote:
> On Wed, 01 Jun 2016 17:06:42 +0200, Thomas Schwinge  
> wrote:
> > Here are the OpenACC bits of .
> 
> In the PR, Jakub clarified that all the missing other OMP_CLAUSE_* are in
> fact all unreachable here.  OK to document this as follows, in trunk?
> 
> The "anything else" default case in fact now is just the non-clause
> OMP_CLAUSE_ERROR, so when adding a case for that one, we could then
> remove the default case, and thus get a compiler warning when new clauses
> are added in the future, without handling them here.  That makes sense to
> me (would have made apparent much earlier the original problem of missing
> handling for certain OMP_CLAUSE_*), but based on feedback received, it
> feels as if I'm the only supporter of such "defensive" programming
> paradigms?
> 
> commit c6b10a9bc1437395c4931d43f30e778152a28cb2
> Author: Thomas Schwinge 
> Date:   Mon Jun 13 16:29:37 2016 +0200
> 
> [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c
> 
>   gcc/
>   * tree-nested.c (convert_nonlocal_omp_clauses):
>   (convert_local_omp_clauses): Document missing OMP_CLAUSE_*.

Ok, but please mention the PR line above the ChangeLog entry.  Thanks.

Jakub


[PATCH, i386]: Use ix86_expand_setcc some more

2016-06-13 Thread Uros Bizjak
No functional changes.

2016-06-13  Uros Bizjak  

* config/i386/i386.md (paritydi2): Use ix86_expand_setcc.
(paritysi2): Ditto.
(isinfxf2): Ditto.
(isinf2): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 237381)
+++ config/i386/i386.md (working copy)
@@ -13458,15 +13458,12 @@
   "! TARGET_POPCNT"
 {
   rtx scratch = gen_reg_rtx (QImode);
-  rtx cond;
 
   emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX,
NULL_RTX, operands[1]));
 
-  cond = gen_rtx_fmt_ee (ORDERED, QImode,
-gen_rtx_REG (CCmode, FLAGS_REG),
-const0_rtx);
-  emit_insn (gen_rtx_SET (scratch, cond));
+  ix86_expand_setcc (scratch, ORDERED,
+gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
 
   if (TARGET_64BIT)
 emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
@@ -13486,14 +13483,11 @@
   "! TARGET_POPCNT"
 {
   rtx scratch = gen_reg_rtx (QImode);
-  rtx cond;
 
   emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1]));
 
-  cond = gen_rtx_fmt_ee (ORDERED, QImode,
-gen_rtx_REG (CCmode, FLAGS_REG),
-const0_rtx);
-  emit_insn (gen_rtx_SET (scratch, cond));
+  ix86_expand_setcc (scratch, ORDERED,
+gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
 
   emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
   DONE;
@@ -16159,8 +16153,6 @@
   rtx mask = GEN_INT (0x45);
   rtx val = GEN_INT (0x05);
 
-  rtx cond;
-
   rtx scratch = gen_reg_rtx (HImode);
   rtx res = gen_reg_rtx (QImode);
 
@@ -16168,10 +16160,8 @@
 
   emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
   emit_insn (gen_cmpqi_ext_3 (scratch, val));
-  cond = gen_rtx_fmt_ee (EQ, QImode,
-gen_rtx_REG (CCmode, FLAGS_REG),
-const0_rtx);
-  emit_insn (gen_rtx_SET (res, cond));
+  ix86_expand_setcc (res, EQ,
+gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
   emit_insn (gen_zero_extendqisi2 (operands[0], res));
   DONE;
 })
@@ -16186,8 +16176,6 @@
   rtx mask = GEN_INT (0x45);
   rtx val = GEN_INT (0x05);
 
-  rtx cond;
-
   rtx scratch = gen_reg_rtx (HImode);
   rtx res = gen_reg_rtx (QImode);
 
@@ -16204,10 +16192,8 @@
 
   emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
   emit_insn (gen_cmpqi_ext_3 (scratch, val));
-  cond = gen_rtx_fmt_ee (EQ, QImode,
-gen_rtx_REG (CCmode, FLAGS_REG),
-const0_rtx);
-  emit_insn (gen_rtx_SET (res, cond));
+  ix86_expand_setcc (res, EQ,
+gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
   emit_insn (gen_zero_extendqisi2 (operands[0], res));
   DONE;
 })


[PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c

2016-06-13 Thread Thomas Schwinge
Hi!

On Wed, 01 Jun 2016 17:06:42 +0200, Thomas Schwinge  
wrote:
> Here are the OpenACC bits of .

In the PR, Jakub clarified that all the missing other OMP_CLAUSE_* are in
fact all unreachable here.  OK to document this as follows, in trunk?

The "anything else" default case in fact now is just the non-clause
OMP_CLAUSE_ERROR, so when adding a case for that one, we could then
remove the default case, and thus get a compiler warning when new clauses
are added in the future, without handling them here.  That makes sense to
me (would have made apparent much earlier the original problem of missing
handling for certain OMP_CLAUSE_*), but based on feedback received, it
feels as if I'm the only supporter of such "defensive" programming
paradigms?

commit c6b10a9bc1437395c4931d43f30e778152a28cb2
Author: Thomas Schwinge 
Date:   Mon Jun 13 16:29:37 2016 +0200

[PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c

gcc/
* tree-nested.c (convert_nonlocal_omp_clauses):
(convert_local_omp_clauses): Document missing OMP_CLAUSE_*.
---
 gcc/tree-nested.c | 60 ++-
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git gcc/tree-nested.c gcc/tree-nested.c
index 812f619..62cb01f 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1203,17 +1203,29 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct 
walk_stmt_info *wi)
case OMP_CLAUSE_AUTO:
  break;
 
+ /* OpenACC tile clauses are discarded during gimplification.  */
case OMP_CLAUSE_TILE:
- /* OpenACC tile clauses are discarded during gimplification, so we
-don't expect to see anything here.  */
- gcc_unreachable ();
-
+ /* The following clause belongs to the OpenACC cache directive, which
+is discarded during gimplification.  */
case OMP_CLAUSE__CACHE_:
- /* These clauses belong to the OpenACC cache directive, which is
-discarded during gimplification, so we don't expect to see
-anything here.  */
- gcc_unreachable ();
-
+ /* The following clauses are only allowed in the OpenMP declare simd
+directive, so not seen here.  */
+   case OMP_CLAUSE_UNIFORM:
+   case OMP_CLAUSE_INBRANCH:
+   case OMP_CLAUSE_NOTINBRANCH:
+ /* The following clauses are only allowed on OpenMP cancel and
+cancellation point directives, which at this point have already
+been lowered into a function call.  */
+   case OMP_CLAUSE_FOR:
+   case OMP_CLAUSE_PARALLEL:
+   case OMP_CLAUSE_SECTIONS:
+   case OMP_CLAUSE_TASKGROUP:
+ /* The following clauses are only added during OMP lowering; nested
+function decomposition happens before that.  */
+   case OMP_CLAUSE__LOOPTEMP_:
+   case OMP_CLAUSE__SIMDUID_:
+   case OMP_CLAUSE__GRIDDIM_:
+ /* Anything else.  */
default:
  gcc_unreachable ();
}
@@ -1899,17 +1911,29 @@ convert_local_omp_clauses (tree *pclauses, struct 
walk_stmt_info *wi)
case OMP_CLAUSE_AUTO:
  break;
 
+ /* OpenACC tile clauses are discarded during gimplification.  */
case OMP_CLAUSE_TILE:
- /* OpenACC tile clauses are discarded during gimplification, so we
-don't expect to see anything here.  */
- gcc_unreachable ();
-
+ /* The following clause belongs to the OpenACC cache directive, which
+is discarded during gimplification.  */
case OMP_CLAUSE__CACHE_:
- /* These clauses belong to the OpenACC cache directive, which is
-discarded during gimplification, so we don't expect to see
-anything here.  */
- gcc_unreachable ();
-
+ /* The following clauses are only allowed in the OpenMP declare simd
+directive, so not seen here.  */
+   case OMP_CLAUSE_UNIFORM:
+   case OMP_CLAUSE_INBRANCH:
+   case OMP_CLAUSE_NOTINBRANCH:
+ /* The following clauses are only allowed on OpenMP cancel and
+cancellation point directives, which at this point have already
+been lowered into a function call.  */
+   case OMP_CLAUSE_FOR:
+   case OMP_CLAUSE_PARALLEL:
+   case OMP_CLAUSE_SECTIONS:
+   case OMP_CLAUSE_TASKGROUP:
+ /* The following clauses are only added during OMP lowering; nested
+function decomposition happens before that.  */
+   case OMP_CLAUSE__LOOPTEMP_:
+   case OMP_CLAUSE__SIMDUID_:
+   case OMP_CLAUSE__GRIDDIM_:
+ /* Anything else.  */
default:
  gcc_unreachable ();
}


Grüße
 Thomas


[PATCH] Fix bootstrap when user language is not english

2016-06-13 Thread Bernd Edlinger
Hi,

as noted in PR bootstrap/71481, comment#4 currently
the trunk fails to bootstrap if the current language is
not english.  A workaround is possible by setting LANG=C,
but OTOH it is rather easy to fix, by translating the string
in the assertion, as it is the only place that is affected by
the language setting.


Boot-strapped and reg-tested on trunk with LANG=de_DE.UTF-8.
OK to commit?


Thanks
Bernd.2016-06-13  Bernd Edlinger  

	* input.c (test_builtins): Fix an assertion.

Index: gcc/input.c
===
--- gcc/input.c	(Revision 237379)
+++ gcc/input.c	(Arbeitskopie)
@@ -1210,7 +1210,7 @@ test_unknown_location ()
 static void
 test_builtins ()
 {
-  assert_loceq ("", 0, 0, BUILTINS_LOCATION);
+  assert_loceq (_(""), 0, 0, BUILTINS_LOCATION);
   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
 }
 


[Bug bootstrap/71510] [7 Regression] Failed to bootstrap with --with-arch=corei7 --with-cpu=intel

2016-06-13 Thread hjl.tools at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71510

--- Comment #4 from H.J. Lu  ---
(In reply to Richard Biener from comment #2)
> Created attachment 38694 [details]
> this probably fixes it (didnt' try to reproduce)

Yes, it fixes it.  Thanks.

[Bug tree-optimization/71520] Missing cross-jumping of switch cases

2016-06-13 Thread tschwinge at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71520

Thomas Schwinge  changed:

   What|Removed |Added

 CC||tschwinge at gcc dot gnu.org

--- Comment #3 from Thomas Schwinge  ---
Out of interest, I've also started to look into GIMPLE_SWITCH issues a bit, at
the end of last week (low priority for me, though).  One of my test cases
should be similar to the one you're addressing with your patch; will test.

[Bug tree-optimization/71522] [5/6/7 Regression] Wrong optimization of memcpy through a var of type long double

2016-06-13 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71522

Richard Biener  changed:

   What|Removed |Added

   Priority|P3  |P2
  Known to work||4.9.3
   Target Milestone|--- |5.5
Summary|Wrong optimization of   |[5/6/7 Regression] Wrong
   |memcpy through a var of |optimization of memcpy
   |type long double|through a var of type long
   ||double
  Known to fail||5.1.0, 7.0

[Bug tree-optimization/71522] Wrong optimization of memcpy through a var of type long double

2016-06-13 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71522

Richard Biener  changed:

   What|Removed |Added

   Keywords||wrong-code
  Component|c   |tree-optimization

--- Comment #3 from Richard Biener  ---
So it goes "wrong" when transforming 

  long double d;
  __int128 unsigned _3;

  MEM[(char * {ref-all})] = 0x414141414141414141414141414141;
  _3 = MEM[(char * {ref-all})];

into SSA as

  d_9 = 4.35573826932891467758901725805789285479666446831741854231e+96;
  _3 = VIEW_CONVERT_EXPR<__int128 unsigned>(d_9);

similar as to how elsewhere we avoid using FP representation for what is
accessed as integers we should probably do so in update-address-taken
(otherwise FP normalization will apply via build_real and friends).

Re: [Patch AArch64] Fixup to fcvt patterns added in r237200

2016-06-13 Thread Kyrill Tkachov


On 10/06/16 13:29, James Greenhalgh wrote:

Hi,

My autotester picked up some issues with the vcvt{ds}_n_* intrinsics
added in r237200.

The iterators in this pattern do not resolve, as they have not been
explicitly tied to the mode iterator (rather than the code iterator)
used by the pattern.

This fixup adds the attribute tags, allowing the patterns to work
correctly.

Additionally, the types assigned to these instructions were wrong, and
would permit the immediate operand to be in a register. This will then
develop in to an ICE as the patterns require an immediate operand, and so
won't match. The ICE can be exposed by writing a wrapping function around
the vcvtd_n_* intrinsics, which forces the immediate operand to a register.
We have the infrastructure to error to the user rather than ICEing, but it
needs some different types, which this patch adds.

I've checked this with an aarch64-none-elf test run, and run it through
several rounds of my autotester for aarch64-none-elf and
aarch64_be-none-elf.

OK?

Thanks,
James

---
2016-06-10  James Greenhalgh  

* config/aarch64/aarch64.md
(3): Add attributes to
iterators.
(3): Likewise.  Correct
attributes.
* config/aarch64/aarch64-builtins.c
(aarch64_types_binop_uss_qualifiers): Delete.
(TYPES_BINOP_USS): Likewise.
(aarch64_types_binop_sus_qualifiers): Likewise.
(TYPES_BINOP_SUS): Likewise.
(aarch64_types_fcvt_from_unsigned_qualifiers): New.
(TYPES_FCVTIMM_SUS): Likewise.
* config/aarch64/aarch64-simd-builtins.def (scvtf): Use SHIFTIMM
rather than BINOP.
(ucvtf): Use FCVTIMM_SUS rather than BINOP_SUS.
(fcvtzs): Use SHIFTIMM rather than BINOP.
(fcvtzu): Use SHIFTIMM_USS rather than BINOP_USS.



LGTM (but I can't approve).

Kyrill



Re: [PATCH][1/2] Move mult synthesis definitions into a separate file

2016-06-13 Thread Kyrill Tkachov

Hi Richard,

On 13/06/16 15:07, Richard Biener wrote:

On Mon, Jun 13, 2016 at 2:23 PM, Kyrill Tkachov
 wrote:

Hi all,

There are other places besides expand where we might want to synthesize an
integer
multiplication by a constant.  Thankfully the algorithm selection code in
expmed.c
is already quite well separated from the RTL implementation, so if we can
just factor
out the prototype of choose_mult_variant and some enums and structs that it
needs into
a separate header file we can reuse them from other parts of the compiler.

I need this for patch 2/2 which hooks up the vectorizer to synthesize vector
multiplications using sequences of shifts and other arithmetic ops when
appropriate.

The new header is called mult-synthesis.h. Should I add it to some makefile?
grepping around for a bit I'm not sure what to do about it.

Possibly PLUGIN_HEADERS.


Ok.


You could have included expmed.h from the vectorizer, no?  After all this
patch now breaks that things declared in A.h are defined in A.c as you
didn't move choose_mult_variant itself.


I think including expmed.h would work. I thought it defined too many
irrelevant RTL-specific things that you wouldn't want in the vectoriser.
If you don't mind I'm happy to just include expmed.h.
Do we have a rule for defining things delcared in A.h in A.c?
I notice we declare various extern things in rtl.h that aren't defined in
rtl.c, though I suppose that would be an exception...

Thanks,
Kyrill


Thanks,
Richard.


Bootstrapped and tested on arm, aarch64, x86_64.

Thanks,
Kyrill

2016-06-13  Kyrylo Tkachov  

 * mult-synthesis.h: New file.  Add choose_mult_variant prototype.
 * expmed.h: Include mult-synthesis.h
 (enum alg_code): Move to mult-synthesis.h
 (struct mult_cost): Likewise.
 (struct algorithm): Likewise.
 * expmed.c (enum mult_variant): Move to mult-synthesis.h
 (choose_mult_variant): Delete prototype.  Remove static qualifier.




[Bug c/71522] Wrong optimization of memcpy through a var of type long double

2016-06-13 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71522

Richard Biener  changed:

   What|Removed |Added

 Status|UNCONFIRMED |ASSIGNED
   Last reconfirmed||2016-06-13
   Assignee|unassigned at gcc dot gnu.org  |rguenth at gcc dot 
gnu.org
 Ever confirmed|0   |1

--- Comment #2 from Richard Biener  ---
Mine.

Re: [PATCH][1/2] Move mult synthesis definitions into a separate file

2016-06-13 Thread Richard Biener
On Mon, Jun 13, 2016 at 2:23 PM, Kyrill Tkachov
 wrote:
> Hi all,
>
> There are other places besides expand where we might want to synthesize an
> integer
> multiplication by a constant.  Thankfully the algorithm selection code in
> expmed.c
> is already quite well separated from the RTL implementation, so if we can
> just factor
> out the prototype of choose_mult_variant and some enums and structs that it
> needs into
> a separate header file we can reuse them from other parts of the compiler.
>
> I need this for patch 2/2 which hooks up the vectorizer to synthesize vector
> multiplications using sequences of shifts and other arithmetic ops when
> appropriate.
>
> The new header is called mult-synthesis.h. Should I add it to some makefile?
> grepping around for a bit I'm not sure what to do about it.

Possibly PLUGIN_HEADERS.

You could have included expmed.h from the vectorizer, no?  After all this
patch now breaks that things declared in A.h are defined in A.c as you
didn't move choose_mult_variant itself.

Thanks,
Richard.

>
> Bootstrapped and tested on arm, aarch64, x86_64.
>
> Thanks,
> Kyrill
>
> 2016-06-13  Kyrylo Tkachov  
>
> * mult-synthesis.h: New file.  Add choose_mult_variant prototype.
> * expmed.h: Include mult-synthesis.h
> (enum alg_code): Move to mult-synthesis.h
> (struct mult_cost): Likewise.
> (struct algorithm): Likewise.
> * expmed.c (enum mult_variant): Move to mult-synthesis.h
> (choose_mult_variant): Delete prototype.  Remove static qualifier.


Re: RFC (gimplify, openmp): PATCH to is_gimple_reg to check DECL_HAS_VALUE_EXPR_P

2016-06-13 Thread Jason Merrill
On Mon, Jun 13, 2016 at 5:03 AM, Richard Biener
 wrote:
> On Sat, Jun 11, 2016 at 9:30 PM, Jakub Jelinek  wrote:
>> On Sat, Jun 11, 2016 at 08:43:06PM +0200, Richard Biener wrote:
>>> On June 10, 2016 9:48:45 PM GMT+02:00, Jason Merrill  
>>> wrote:
>>> >While working on another issue I noticed that is_gimple_reg was happily
>>> >
>>> >accepting VAR_DECLs with DECL_VALUE_EXPR even when later gimplification
>>> >
>>> >would replace them with something that is_gimple_reg doesn't like,
>>> >leading to trouble.  So I've modified is_gimple_reg to check the
>>> >VALUE_EXPR.
>>>
>>> Can you instead try rejecting them?  I've run into similar issues lately 
>>> with is_gimple_val.
>>
>> I'm afraid that would break OpenMP badly.
>> During gimplification, outside of OpenMP contexts we always replace decls
>> for their DECL_VALUE_EXPR, but inside of OpenMP contexts we do it only for
>> some decls.  In particular, omp_notice_variable returns whether the
>> DECL_VALUE_EXPR should be temporarily ignored (if it returns true) or not.
>> If DECL_VALUE_EXPR is temporarily ignored, it is only for a short time,
>> in particular until the omplower pass, which makes sure that the right thing
>> is done with it and everything is regimplified.
>
> Ugh :/  Feels like OMP lowering should happen during gimplification then.
> The PR71104 fix (yes, still pending...) runs into this generally with the
> change to first gimplify the RHS and then the LHS for assignments

Yep, that's what led me here, too.

Jason

> as it affects how rhs_predicate_for works - I've adjusted rhs_predicate_for 
> like

> @@ -3771,7 +3771,9 @@ gimplify_init_ctor_eval (tree object, ve
>  gimple_predicate
>  rhs_predicate_for (tree lhs)
>  {
> -  if (is_gimple_reg (lhs))
> +  if (is_gimple_reg (lhs)
> +  && (! DECL_P (lhs)
> + || ! DECL_HAS_VALUE_EXPR_P (lhs)))
>  return is_gimple_reg_rhs_or_call;
>else
>  return is_gimple_mem_rhs_or_call;
>
> but I don't like this very much either (it's Jasons change but rejecting
> decls with value expr instead).
>
> Richard.
>
>> Anyway, looking at Jason's patch, I'm really surprised it didn't break far
>> more, it is fine if such an ignored DECL_VALUE_EXPR is considered
>> is_gimple_reg.  And I have no idea how else to express this in the IL,
>> the DECL_VALUE_EXPR is often something already the FEs set, and we really
>> want to replace it with the values in most uses, just can't allow it if we
>> want to replace it by something different instead (e.g. privatize in some
>> OpenMP/OpenACC region).
>>
>> Jakub


Re: [PATCH][vectorizer][2/2] PR 65951: Hook up mult synthesis logic into vectorisation of mult-by-constant

2016-06-13 Thread Kyrill Tkachov


On 13/06/16 14:58, Marc Glisse wrote:

On Mon, 13 Jun 2016, Kyrill Tkachov wrote:

This patch allows the vectoriser to synthesize multiplications by an integer constant using the algorithms determined by choose_mult_variant from expmed.c. choose_mult_variant returns an algorithm structure that is a linked list of steps 
describing how to synthesize an integer multiplication by any constant using shifts, adds, subs, and negation.


The new function vect_synth_mult_by_constant that does all the hard work is 
very similar in structure to expand_mult_const from expmed.c but it operates on 
gimple SSA rather than RTL.

Note that we synthesize the multiplications if the target does not support a vector multiplication in the current vector mode we're processing. So, for aarch64 this effectively means V2DI (aarch64 has a vector multiply instruction for 
narrower inner modes).


I guess I should drop my patch 
https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00881.html

This one seems much better.



Sorry for the conflict. I had actually worked on this back in November as a 
quick prototype and then
got swamped with bug fixing for GCC 6 and only just got around to taking this 
up again.

Kyrill


[Bug c++/71513] [6/7 Regression] ICE on valid C++11 code (with alignas specifier) on x86_64-linux-gnu: Segmentation fault

2016-06-13 Thread msebor at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71513

Martin Sebor  changed:

   What|Removed |Added

 CC||msebor at gcc dot gnu.org
  Known to fail||6.1.0, 7.0

--- Comment #2 from Martin Sebor  ---
The regression was introduced in the following change:

r232701 | jason | 2016-01-21 15:26:02 -0500 (Thu, 21 Jan 2016) | 5 lines

PR c++/43407
* decl.c (start_enum): Add attributes parameter.
* parser.c (cp_parser_enum_specifier): Pass it.
* pt.c (lookup_template_class_1): Pass it.
* cp-tree.h: Adjust.

[Bug c/71522] Wrong optimization of memcpy through a var of type long double

2016-06-13 Thread ch3root at openwall dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71522

--- Comment #1 from Alexander Cherepanov  ---
This is run on x86-64, sizeof(long double) == 16. The string literal is 15
letters 'A' and a terminating 0.

To get the same effect on x86-32 the string should be shortened by four 'A'.

Re: [PATCH][vectorizer][2/2] PR 65951: Hook up mult synthesis logic into vectorisation of mult-by-constant

2016-06-13 Thread Marc Glisse

On Mon, 13 Jun 2016, Kyrill Tkachov wrote:

This patch allows the vectoriser to synthesize multiplications by an 
integer constant using the algorithms determined by choose_mult_variant 
from expmed.c. choose_mult_variant returns an algorithm structure that 
is a linked list of steps describing how to synthesize an integer 
multiplication by any constant using shifts, adds, subs, and negation.


The new function vect_synth_mult_by_constant that does all the hard work 
is very similar in structure to expand_mult_const from expmed.c but it 
operates on gimple SSA rather than RTL.


Note that we synthesize the multiplications if the target does not 
support a vector multiplication in the current vector mode we're 
processing. So, for aarch64 this effectively means V2DI (aarch64 has a 
vector multiply instruction for narrower inner modes).


I guess I should drop my patch 
https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00881.html


This one seems much better.

--
Marc Glisse


[PATCH] Fix code emission for FAIL_ALLOC predictor

2016-06-13 Thread Martin Liška
Hello.

Following patch fixes Fortran FAIL_ALLOC predictor in a way that it introduces 
a new one (PRED_FORTRAN_REALLOC)
and it properly marks returned values as described in the following 2 examples:

A) allocate_allocatable

original annotation:

if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) (overflow.343 != 
0), 0, 33)) // overflow
  {
stat.341 = 5014;
  }
else
  {
if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) (bx_ilow.data 
!= 0B), 0, 34)) // fail alloc
  {
stat.341 = 5014;
  }
else
  {
stat.341 = 0;
bx_ilow.data = (void * restrict) __builtin_malloc (MAX_EXPR 
);
if (bx_ilow.data == 0B)
  {
stat.341 = 5014;
  }
  }
  }  
if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) (stat.341 == 0), 
1, 34)) // fail alloc
  {
bx_ilow.dtype = 539;
bx_ilow.dim[0].lbound = (integer(kind=8)) xstart;
bx_ilow.dim[0].ubound = 1;
bx_ilow.dim[0].stride = 1;
bx_ilow.dim[1].lbound = (integer(kind=8)) ystart;
bx_ilow.dim[1].ubound = D.5342;
bx_ilow.dim[1].stride = D.5341;
bx_ilow.dim[2].lbound = (integer(kind=8)) zstart;
bx_ilow.dim[2].ubound = D.5346;
bx_ilow.dim[2].stride = D.5345;
bx_ilow.offset = D.5352;
  }


I changed it to:

if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) (overflow.343 != 
0), 0, 33)) // overflow
  {
stat.341 = 5014;
  }
else
  {
if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) (bx_ilow.data 
!= 0B), 0, 35)) // repeated allocation/deallocation
  {
stat.341 = 5014;
  }
else
  {
stat.341 = 0;
bx_ilow.data = (void * restrict) __builtin_malloc (MAX_EXPR 
);
if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) 
(bx_ilow.data == 0B), 0, 34)) // fail alloc
  {
stat.341 = 5014;
  }
  }
  }
if (stat.341 == 0) // no expectation
  {
bx_ilow.dtype = 539;
bx_ilow.dim[0].lbound = (integer(kind=8)) xstart;
bx_ilow.dim[0].ubound = 1;
bx_ilow.dim[0].stride = 1;
bx_ilow.dim[1].lbound = (integer(kind=8)) ystart;
bx_ilow.dim[1].ubound = D.5342;
bx_ilow.dim[1].stride = D.5341;
bx_ilow.dim[2].lbound = (integer(kind=8)) zstart;
bx_ilow.dim[2].ubound = D.5346;
bx_ilow.dim[2].stride = D.5345;
bx_ilow.offset = D.5352;
  }

B) array allocation

  :
  # size.1478_3210 = PHI <0(7), size.1478_3743(8)>
  _21 = _3740 != 0;
  _22 = (integer(kind=8)) _21;
  _23 = BUILTIN_EXPECT (_22, 0, 33); // overflow
  _24 = (logical(kind=4)) _23;
  if (_24 != 0)
goto ;
  else
goto ;

  :
  _25 = hrval.data;
  _26 = _25 != 0B;
  _27 = (integer(kind=8)) _26;
  _28 = BUILTIN_EXPECT (_27, 0, 34); // fail malloc
  _29 = (logical(kind=4)) _28;
  if (_29 != 0)
goto ;
  else
goto ;

  :
  _30 = MAX_EXPR ;
  _31 = __builtin_malloc (_30);
  hrval.data = _31;
  if (_31 == 0B)
goto ;
  else
goto ;

  :

  :
  # stat.1477_3202 = PHI <5014(9), 5014(10), 0(11), 5014(12)>
  _33 = stat.1477_3202 == 0;
  _34 = (integer(kind=8)) _33;
  _35 = BUILTIN_EXPECT (_34, 1, 34); // fail malloc
  _36 = (logical(kind=4)) _35;
  if (_36 != 0)
goto ;
  else
goto ;

currently looks as follows:

  :
  # size.1478_3210 = PHI <0(7), size.1478_3743(8)>
  _21 = _3740 != 0;
  _22 = (integer(kind=8)) _21;
  _23 = BUILTIN_EXPECT (_22, 0, 33); // overflow
  _24 = (logical(kind=4)) _23;
  if (_24 != 0)
goto ;
  else
goto ;

  :
  _25 = hrval.data;
  _26 = _25 != 0B;
  _27 = (integer(kind=8)) _26;
  _28 = BUILTIN_EXPECT (_27, 0, 35); // repeated allocation/deallocation
  _29 = (logical(kind=4)) _28;
  if (_29 != 0)
goto ;
  else
goto ;

  :
  _30 = MAX_EXPR ;
  _31 = __builtin_malloc (_30);
  hrval.data = _31;
  _33 = _31 == 0B;
  _34 = (integer(kind=8)) _33;
  _35 = BUILTIN_EXPECT (_34, 0, 34); // fail alloc
  _36 = (logical(kind=4)) _35;
  if (_36 != 0)
goto ;
  else
goto ;

  :

  :
  # stat.1477_3202 = PHI <5014(9), 5014(10), 0(11), 5014(12)>
  if (stat.1477_3202 == 0) // no prediction
goto ;
  else
goto ;

I get following numbers with the patch applied:

1) polyhedron benchmark (aermod.f90.061i.profile):
HEURISTICS   BRANCHES  (REL)  HITRATE
COVERAGE COVERAGE  (REL)
repeated allocation/deallocation  194   4.1% 100.00% / 100.00%  
  194   194.00   0.0%
fail alloc377   7.9% 100.00% / 100.00%  
  377   377.00   0.0%

b) 459.GemsFDTD SPEC2006 benchmark:
HEURISTICS   BRANCHES  (REL)  HITRATE
COVERAGE COVERAGE  (REL)
repeated allocation/deallocation 

[Bug c/71522] New: Wrong optimization of memcpy through a var of type long double

2016-06-13 Thread ch3root at openwall dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71522

Bug ID: 71522
   Summary: Wrong optimization of memcpy through a var of type
long double
   Product: gcc
   Version: 7.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c
  Assignee: unassigned at gcc dot gnu.org
  Reporter: ch3root at openwall dot com
  Target Milestone: ---

Source code:

--
#include 
#include 

int main()
{
  long double d;
  char s[sizeof d];

  memcpy(, "AAA", sizeof d);
  memcpy(, , sizeof s);

  printf("%s\n", s);
}
--

Results:

--
$ gcc -std=c11 -pedantic -Wall -Wextra test.c && ./a.out
AAA

$ gcc -std=c11 -pedantic -Wall -Wextra -O3 test.c && ./a.out

--

gcc version: gcc (GCC) 7.0.0 20160613 (experimental)

The effect is very similar to bug 71452 so, presumably, this also considered a
bug.

[Bug bootstrap/71510] [7 Regression] Failed to bootstrap with --with-arch=corei7 --with-cpu=intel

2016-06-13 Thread hjl.tools at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71510

--- Comment #3 from H.J. Lu  ---
(In reply to Segher Boessenkool from comment #1)
> I cannot reproduce this problem; bootstrap went without a hitch, both
> with and without those --with-*.

Did you build i386 GCC or x86-64 GCC?

[Bug driver/68463] Offloading fails when some objects are compiled with LTO and some without

2016-06-13 Thread jnorris at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68463

--- Comment #8 from jnorris at gcc dot gnu.org ---
Author: jnorris
Date: Mon Jun 13 13:17:22 2016
New Revision: 237379

URL: https://gcc.gnu.org/viewcvs?rev=237379=gcc=rev
Log:
Backport from mainline r236098.
2016-05-10  James Norris  

PR driver/68463
* config/rs6000/sysv4.h (CRTOFFLOADBEGIN): Define. Add
crtoffloadbegin.o
if offloading is enabled and -fopenacc or -fopenmp is specified.
(CRTOFFLOADEND): Likewise.
(STARTFILE_LINUX_SPEC): Add CRTOFFLOADBEGIN.
(ENDFILE_LINUX_SPEC): Add CRTOFFLOADEND.

Modified:
branches/gomp-4_0-branch/gcc/ChangeLog.gomp
branches/gomp-4_0-branch/gcc/config/rs6000/sysv4.h

[Bug c/66682] Lots of macro expansion, very slow compilation

2016-06-13 Thread avibl at cadence dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66682

Avi Bloch  changed:

   What|Removed |Added

 CC||avibl at cadence dot com

--- Comment #10 from Avi Bloch  ---
(In reply to Mikhail Maltsev from comment #8)
> (In reply to Brendan G Bohannon from comment #7)
> > The code in question creates about 7k internal functions, which could be a
> > factor?
> Well, yes probably the number of functions does matter here. But again, this
> is a bug in GCC, which has been fixed in later versions (4.9.x and later).
> I can even point out the relevant commit. I prepared a slightly reduced
> testcase (all 6-argument thunks, ~2000 functions). Bisection shows that
> r208113 compiles the testcase in 7.5s (at -O0), while the previous revision
> requires 71 second (and over 6 minutes for full testcase).
> BTW, r208113 is a fix for PR60291 - a similar issue with C code which is
> generated by glasgow haskell compiler.

If I can't upgrade to a newer compiler, do you know if it's possible to get
around this problem by using specific gcc options?

Thanks

Re: [PATCH] Allow fwprop to undo vectorization harm (PR68961)

2016-06-13 Thread Richard Biener
On Mon, 13 Jun 2016, Richard Biener wrote:

> On Fri, 10 Jun 2016, Richard Biener wrote:
> 
> > 
> > With the proposed cost change for vector construction we will end up
> > vectorizing the testcase in PR68961 again (on x86_64 and likely
> > on ppc64le as well after that target gets adjustments).  Currently
> > we can't optimize that away again noticing the direct overlap of
> > argument and return registers.  The obstackle is
> > 
> > (insn 7 4 8 2 (set (reg:V2DF 93)
> > (vec_concat:V2DF (reg/v:DF 91 [ a ])
> > (reg/v:DF 92 [ aa ]))) 
> > ...
> > (insn 21 8 24 2 (set (reg:DI 97 [ D.1756 ])
> > (subreg:DI (reg:TI 88 [ D.1756 ]) 0))
> > (insn 24 21 11 2 (set (reg:DI 100 [+8 ])
> > (subreg:DI (reg:TI 88 [ D.1756 ]) 8))
> > 
> > which we eventually optimize to DFmode subregs of (reg:V2DF 93).
> > 
> > First of all simplify_subreg doesn't handle the subregs of a vec_concat
> > (easy fix below).
> > 
> > Then combine doesn't like to simplify the multi-use (it tries some
> > parallel it seems).  So I went to forwprop which eventually manages
> > to do this but throws away the result (reg:DF 91) or (reg:DF 92)
> > because it is not a constant.  Thus I allow arbitrary simplification
> > results for SUBREGs of [VEC_]CONCAT operations.  There doesn't seem
> > to be a magic flag to tell it to restrict to the case where all
> > uses can be simplified or so, nor to restrict simplifications to a REG.
> > But I don't see any undesirable simplifications of (subreg 
> > ([vec_]concat)).
> > 
> > For the testcase I'm not sure if I have to exclude some ABIs (mingw?).
> > 
> > Boostrap and regtest in progress on x86_64-unknown-linux-gnu, I'll
> > install the simplify-rtx.c if that succeeds but like to have opinions
> > on the fwprop.c change.
> 
> So the bootstrap exposes a latent issue in simplify-rtx.c in the changed
> hunk via gcc.target/i386/mmx-8.c on i?86 which ends up with a 
> 
> (vec_concat:V2SI (reg:SI 103)
> (const_int 0 [0]))
> 
> and thus a VOIDmode 2nd operand (I'm sure this can happen for
> complex integer concat as well, thus latent).  I am adjusting the
> simplify_subreg hunk to always pass GET_MODE_INNER (innermode)
> (that hopefully exercises it a bit more than just using that
> if GET_MODE (part) == VOIDmode - and hopefully they should always
> agree).
> 
> Re-bootstrap / regtest running on x86_64-unknown-linux-gnu.

That works worse given that vec_concat can be

(vec_concat:V16QI (us_truncate:V8QI (reg:V8HI 159))
(us_truncate:V8QI (reg:V8HI 160)))

... now I think the VOIDmode case can only happen for scalar vec_concat
and thus

  enum machine_mode part_mode = GET_MODE (part);
  if (part_mode == VOIDmode)
part_mode = GET_MODE_INNER (GET_MODE (op));

should work.  Re-testing with that... (ok, I know it has coverage of
exactly one testcase on x86_64 as it would otherwise ICE).

Richard.


2016-06-13  Richard Biener  

PR rtl-optimization/68961
* simplify-rtx.c (simplify_subreg): Handle VEC_CONCAT like CONCAT.
* fwprop.c (propagate_rtx): Allow SUBREGs of VEC_CONCAT and CONCAT
to simplify to a non-constant.

* gcc.target/i386/pr68961.c: New testcase.

Index: gcc/simplify-rtx.c
===
*** gcc/simplify-rtx.c  (revision 237372)
--- gcc/simplify-rtx.c  (working copy)
*** simplify_subreg (machine_mode outermode,
*** 6108,6116 
&& GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (GET_MODE (op)))
  return adjust_address_nv (op, outermode, byte);
  
!   /* Handle complex values represented as CONCAT
!  of real and imaginary part.  */
!   if (GET_CODE (op) == CONCAT)
  {
unsigned int part_size, final_offset;
rtx part, res;
--- 6108,6117 
&& GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (GET_MODE (op)))
  return adjust_address_nv (op, outermode, byte);
  
!   /* Handle complex or vector values represented as CONCAT or VEC_CONCAT
!  of two parts.  */
!   if (GET_CODE (op) == CONCAT
!   || GET_CODE (op) == VEC_CONCAT)
  {
unsigned int part_size, final_offset;
rtx part, res;
*** simplify_subreg (machine_mode outermode,
*** 6130,6139 
if (final_offset + GET_MODE_SIZE (outermode) > part_size)
return NULL_RTX;
  
!   res = simplify_subreg (outermode, part, GET_MODE (part), final_offset);
if (res)
return res;
!   if (validate_subreg (outermode, GET_MODE (part), part, final_offset))
return gen_rtx_SUBREG (outermode, part, final_offset);
return NULL_RTX;
  }
--- 6131,6143 
if (final_offset + GET_MODE_SIZE (outermode) > part_size)
return NULL_RTX;
  
!   enum machine_mode part_mode = GET_MODE (part);
!   if (part_mode == VOIDmode)
!   part_mode = GET_MODE_INNER (GET_MODE (op));
!   res = simplify_subreg (outermode, part, part_mode, final_offset);

[Bug c++/71516] [5/6/7 Regression] ICE on invalid C++ code (invalid use of forward declared type) on x86_64-linux-gnu: Segmentation fault (program cc1plus)

2016-06-13 Thread jakub at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71516

Jakub Jelinek  changed:

   What|Removed |Added

   Priority|P3  |P4
 CC||jakub at gcc dot gnu.org
Summary|[7 Regression] ICE on   |[5/6/7 Regression] ICE on
   |invalid C++ code (invalid   |invalid C++ code (invalid
   |use of forward declared |use of forward declared
   |type) on x86_64-linux-gnu:  |type) on x86_64-linux-gnu:
   |Segmentation fault (program |Segmentation fault (program
   |cc1plus)|cc1plus)

--- Comment #2 from Jakub Jelinek  ---
Started with r214353.

[Bug c++/71515] [4.9/5/6/7 Regression] ICE on valid C++ code on x86_64-linux-gnu: Segmentation fault (program cc1plus)

2016-06-13 Thread jakub at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71515

Jakub Jelinek  changed:

   What|Removed |Added

 CC||jakub at gcc dot gnu.org,
   ||jason at gcc dot gnu.org

--- Comment #2 from Jakub Jelinek  ---
I bet this started with r148915 (at least, r148906 works, and r148977 ICEs with
infinite recursion).

Re: [PATCH][C] Avoid reading from FUNCTION_DECL with atomics

2016-06-13 Thread Jakub Jelinek
On Mon, Jun 13, 2016 at 01:25:35PM +0200, Richard Biener wrote:
> The following avoids creating IL that accesses a FUNCTION_DECLs memory
> directly rather than indirectly through an address based on it.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu, ok for trunk?

I think the problem is that for these generic builtins we perform no sanity
checking, except for checking TYPE_SIZE_UNIT equality.  But as you show
in the PR, even that doesn't work, as while we check for VLAs on the first
argument, we don't check for that on the second and following arguments.

The question is what all should we reject.

We accept:

void foo (void);
void bar (void);
void baz (void);
void
test (void)
{
  __atomic_exchange (, , , __ATOMIC_RELAXED);
}

which IMHO we definitely should not, what does it mean to exchange
functions?
So, at least diagnose if any of the arguments is pointer to
FUNCTION_TYPE/METHOD_TYPE and handle gracefully VLAs in 2nd+ argument.

Should we perform some further type checking though, like e.g.
complain if one pointer is pointer to integral type and another to
floating, or one to struct, another to union, or do we just keep the
builtins very forgiving and assume that on the C++ side the templates
make sure the arguments are type compatible and for C _Atomic handling is
done differently anyway?

Jakub


Re: [PATCH] Add ggc-tests.c

2016-06-13 Thread Ulrich Weigand
Gerald Pfeifer wrote:

> The source code of need_finalization_p in ggc.h reads
> 
>template
>static inline bool
>need_finalization_p ()
>{
>#if GCC_VERSION >= 4003
>  return !__has_trivial_destructor (T);
>#else
>  return true;
>#endif
>}
> 
> which means your self test is broken by design for any compiler
> that is not GCC in at least version 4.3, isn't it?

Just to confirm that I'm seeing the same failure on my SPU
daily build machine, which is running RHEL 5 with a host
compiler of GCC 4.1.2.

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU/Linux compilers and toolchain
  ulrich.weig...@de.ibm.com



[PATCH][1/2] Move mult synthesis definitions into a separate file

2016-06-13 Thread Kyrill Tkachov

Hi all,

There are other places besides expand where we might want to synthesize an 
integer
multiplication by a constant.  Thankfully the algorithm selection code in 
expmed.c
is already quite well separated from the RTL implementation, so if we can just 
factor
out the prototype of choose_mult_variant and some enums and structs that it 
needs into
a separate header file we can reuse them from other parts of the compiler.

I need this for patch 2/2 which hooks up the vectorizer to synthesize vector
multiplications using sequences of shifts and other arithmetic ops when 
appropriate.

The new header is called mult-synthesis.h. Should I add it to some makefile?
grepping around for a bit I'm not sure what to do about it.

Bootstrapped and tested on arm, aarch64, x86_64.

Thanks,
Kyrill

2016-06-13  Kyrylo Tkachov  

* mult-synthesis.h: New file.  Add choose_mult_variant prototype.
* expmed.h: Include mult-synthesis.h
(enum alg_code): Move to mult-synthesis.h
(struct mult_cost): Likewise.
(struct algorithm): Likewise.
* expmed.c (enum mult_variant): Move to mult-synthesis.h
(choose_mult_variant): Delete prototype.  Remove static qualifier.
diff --git a/gcc/expmed.h b/gcc/expmed.h
index 1a32e9f1b664f250c5092022eb965237ed0342fc..304ce02d78a9e3e024c13caee7869d67dfdab65c 100644
--- a/gcc/expmed.h
+++ b/gcc/expmed.h
@@ -21,35 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #define EXPMED_H 1
 
 #include "insn-codes.h"
-
-enum alg_code {
-  alg_unknown,
-  alg_zero,
-  alg_m, alg_shift,
-  alg_add_t_m2,
-  alg_sub_t_m2,
-  alg_add_factor,
-  alg_sub_factor,
-  alg_add_t2_m,
-  alg_sub_t2_m,
-  alg_impossible
-};
-
-/* This structure holds the "cost" of a multiply sequence.  The
-   "cost" field holds the total rtx_cost of every operator in the
-   synthetic multiplication sequence, hence cost(a op b) is defined
-   as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
-   The "latency" field holds the minimum possible latency of the
-   synthetic multiply, on a hypothetical infinitely parallel CPU.
-   This is the critical path, or the maximum height, of the expression
-   tree which is the sum of rtx_costs on the most expensive path from
-   any leaf to the root.  Hence latency(a op b) is defined as zero for
-   leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
-
-struct mult_cost {
-  short cost; /* Total rtx_cost of the multiplication sequence.  */
-  short latency;  /* The latency of the multiplication sequence.  */
-};
+#include "mult-synthesis.h"
 
 /* This macro is used to compare a pointer to a mult_cost against an
single integer "rtx_cost" value.  This is equivalent to the macro
@@ -65,38 +37,6 @@ struct mult_cost {
  || ((X)->cost == (Y)->cost	\
  && (X)->latency < (Y)->latency))
 
-/* This structure records a sequence of operations.
-   `ops' is the number of operations recorded.
-   `cost' is their total cost.
-   The operations are stored in `op' and the corresponding
-   logarithms of the integer coefficients in `log'.
-
-   These are the operations:
-   alg_zero		total := 0;
-   alg_m		total := multiplicand;
-   alg_shift		total := total * coeff
-   alg_add_t_m2		total := total + multiplicand * coeff;
-   alg_sub_t_m2		total := total - multiplicand * coeff;
-   alg_add_factor	total := total * coeff + total;
-   alg_sub_factor	total := total * coeff - total;
-   alg_add_t2_m		total := total * coeff + multiplicand;
-   alg_sub_t2_m		total := total * coeff - multiplicand;
-
-   The first operand must be either alg_zero or alg_m.  */
-
-struct algorithm
-{
-  struct mult_cost cost;
-  short ops;
-  /* The size of the OP and LOG fields are not directly related to the
- word size, but the worst-case algorithms will be if we have few
- consecutive ones or zeros, i.e., a multiplicand like 10101010101...
- In that case we will generate shift-by-2, add, shift-by-2, add,...,
- in total wordsize operations.  */
-  enum alg_code op[MAX_BITS_PER_WORD];
-  char log[MAX_BITS_PER_WORD];
-};
-
 /* The entry for our multiplication cache/hash table.  */
 struct alg_hash_entry {
   /* The number we are multiplying by.  */
diff --git a/gcc/expmed.c b/gcc/expmed.c
index 6645a535b3eef9624e6f3ce61d2fcf864d1cf574..22564fa423aec52febef6220d3f59a82e09b118a 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -2482,16 +2482,9 @@ expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
 }
 
 
-/* Indicates the type of fixup needed after a constant multiplication.
-   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
-   the result should be negated, and ADD_VARIANT means that the
-   multiplicand should be added to the result.  */
-enum mult_variant {basic_variant, negate_variant, add_variant};
 
 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
 			const struct mult_cost *, machine_mode mode);
-static bool choose_mult_variant (machine_mode, 

  1   2   3   >