[PATCH] c++: Fix ICE during constexpr virtual call evaluation [PR93633]

2020-02-08 Thread Jakub Jelinek
Hi!

The first (valid) testcase ICEs because for
  A *a = new B ();
  a->foo (); // virtual method call
we actually see   and the "heap " objects don't have the class or
whatever else type was used in new expression, but an array type containing
one (or more of those for array new) and so when using TYPE_BINFO (objtype)
on it we ICE.
This patch handles this special case, and otherwise punts (as shown e.g. in
the second testcase, where because the heap object is already deleted,
we don't really want to allow it to be used.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-02-08  Jakub Jelinek  

PR c++/93633
* constexpr.c (cxx_eval_constant_expression): If obj is heap var with
ARRAY_TYPE, use the element type.  Punt if objtype after that is not
a class type.

* g++.dg/cpp2a/constexpr-new11.C: New test.
* g++.dg/cpp2a/constexpr-new12.C: New test.
* g++.dg/cpp2a/constexpr-new13.C: New test.

--- gcc/cp/constexpr.c.jj   2020-02-08 10:58:15.434064005 +0100
+++ gcc/cp/constexpr.c  2020-02-08 14:15:55.356768622 +0100
@@ -6027,6 +6027,17 @@ cxx_eval_constant_expression (const cons
   && DECL_FIELD_IS_BASE (TREE_OPERAND (obj, 1)))
  obj = TREE_OPERAND (obj, 0);
tree objtype = TREE_TYPE (obj);
+   if (VAR_P (obj)
+   && DECL_NAME (obj) == heap_identifier
+   && TREE_CODE (objtype) == ARRAY_TYPE)
+ objtype = TREE_TYPE (objtype);
+   if (!CLASS_TYPE_P (objtype))
+ {
+   if (!ctx->quiet)
+ error_at (loc, "expression %qE is not a constant expression", t);
+   *non_constant_p = true;
+   return t;
+ }
/* Find the function decl in the virtual functions list.  TOKEN is
   the DECL_VINDEX that says which function we're looking for.  */
tree virtuals = BINFO_VIRTUALS (TYPE_BINFO (objtype));
--- gcc/testsuite/g++.dg/cpp2a/constexpr-new11.C.jj 2020-02-08 
14:22:34.740789172 +0100
+++ gcc/testsuite/g++.dg/cpp2a/constexpr-new11.C2020-02-08 
14:21:50.689448695 +0100
@@ -0,0 +1,31 @@
+// PR c++/93633
+// { dg-do compile { target c++2a } }
+
+struct A {
+  constexpr A () : a (0) {}
+  constexpr virtual int foo () { return 1 + a * 4; }
+  int a;
+};
+
+struct B : A {
+  constexpr B () : b (0) {}
+  constexpr virtual int foo () { return 0 + b * 4; }
+  int b;
+};
+
+constexpr int
+foo ()
+{
+  A *a = new B ();
+  a->a = 4;
+  int r = a->foo ();
+  delete a;
+  return r;
+}
+
+int
+main ()
+{
+  constexpr auto a = foo ();
+  static_assert (a == 0);
+}
--- gcc/testsuite/g++.dg/cpp2a/constexpr-new12.C.jj 2020-02-08 
14:22:38.043739717 +0100
+++ gcc/testsuite/g++.dg/cpp2a/constexpr-new12.C2020-02-08 
14:24:04.119451025 +0100
@@ -0,0 +1,26 @@
+// PR c++/93633
+// { dg-do compile { target c++2a } }
+
+struct A {
+  constexpr A () : a (0) {}
+  constexpr virtual int foo () { return 1 + a * 4; }
+  int a;
+};
+
+struct B : A {
+  constexpr B () : b (0) {}
+  constexpr virtual int foo () { return 0 + b * 4; }
+  int b;
+};
+
+constexpr int
+foo ()
+{
+  A *a = new B ();
+  a->a = 4;
+  delete a;
+  int r = a->foo ();
+  return r;
+}
+
+constexpr auto a = foo (); // { dg-error "is not a constant expression" }
--- gcc/testsuite/g++.dg/cpp2a/constexpr-new13.C.jj 2020-02-08 
14:22:41.060694553 +0100
+++ gcc/testsuite/g++.dg/cpp2a/constexpr-new13.C2020-02-08 
14:25:38.783033750 +0100
@@ -0,0 +1,26 @@
+// PR c++/93633
+// { dg-do compile { target c++2a } }
+
+struct A {
+  constexpr A () : a (0) {}
+  virtual int foo () { return 1 + a * 4; }
+  int a;
+};
+
+struct B : A {
+  constexpr B () : b (0) {}
+  virtual int foo () { return 0 + b * 4; } // { dg-message "declared here" 
}
+  int b;
+};
+
+constexpr int
+foo ()
+{
+  A *a = new B ();
+  a->a = 4;
+  int r = a->foo ();   // { dg-error "call to non-.constexpr. function" }
+  delete a;
+  return r;
+}
+
+constexpr auto a = foo ();

Jakub



[committed] openmp: Optimize DECL_IN_CONSTANT_POOL vars in target regions

2020-02-08 Thread Jakub Jelinek
Hi!

DECL_IN_CONSTANT_POOL are shared and thus don't really get emitted in the
BLOCK where they are used, so for OpenMP target regions that have initializers
gimplified into copying from them we actually map them at runtime from host to
offload devices.  This patch instead marks them as "omp declare target", so
that they are on the target device from the beginning and don't need to be
copied there.

Bootstrapped/regtested on x86_64-linux and i686-linux, tested also with
x86_64-linux to nvptx-none offloading, committed to trunk.

2020-02-09  Jakub Jelinek  

* gimplify.c (gimplify_adjust_omp_clauses_1): Promote
DECL_IN_CONSTANT_POOL variables into "omp declare target" to avoid
copying them around between host and target.

* testsuite/libgomp.c/target-38.c: New test.

--- gcc/gimplify.c.jj   2020-01-15 11:05:19.248141349 +0100
+++ gcc/gimplify.c  2020-02-07 19:04:05.646917169 +0100
@@ -9902,6 +9902,22 @@ gimplify_adjust_omp_clauses_1 (splay_tre
  error ("%<_Atomic%> %qD in implicit % clause", decl);
  return 0;
}
+  if (VAR_P (decl)
+ && DECL_IN_CONSTANT_POOL (decl)
+  && !lookup_attribute ("omp declare target",
+   DECL_ATTRIBUTES (decl)))
+   {
+ tree id = get_identifier ("omp declare target");
+ DECL_ATTRIBUTES (decl)
+   = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (decl));
+ varpool_node *node = varpool_node::get (decl);
+ if (node)
+   {
+ node->offloadable = 1;
+ if (ENABLE_OFFLOADING)
+   g->have_offload = true;
+   }
+   }
 }
   else if (flags & GOVD_SHARED)
 {
--- libgomp/testsuite/libgomp.c/target-38.c.jj  2020-02-07 19:06:58.717373079 
+0100
+++ libgomp/testsuite/libgomp.c/target-38.c 2020-02-07 19:06:32.862753134 
+0100
@@ -0,0 +1,28 @@
+#define A(n) n##0, n##1, n##2, n##3, n##4, n##5, n##6, n##7, n##8, n##9
+#define B(n) A(n##0), A(n##1), A(n##2), A(n##3), A(n##4), A(n##5), A(n##6), 
A(n##7), A(n##8), A(n##9)
+
+int
+foo (int x)
+{
+  int b[] = { B(4), B(5), B(6) };
+  return b[x];
+}
+
+int v[] = { 1, 2, 3, 4, 5, 6 };
+#pragma omp declare target to (foo, v)
+
+int
+main ()
+{
+  int i = 5;
+  asm ("" : "+g" (i));
+  #pragma omp target map(tofrom:i)
+  {
+int a[] = { B(1), B(2), B(3) };
+asm ("" : : "m" (a) : "memory");
+i = a[i] + foo (i) + v[i & 63];
+  }
+  if (i != 105 + 405 + 6)
+__builtin_abort ();
+  return 0;
+}

Jakub



[COMMITTED] aarch64: fix strict alignment for vector load/stores (PR 91927)

2020-02-08 Thread apinski
From: Andrew Pinski 

Hi,
  The problem here is that the vector mode version of movmisalign
was only conditionalized on if SIMD was enabled instead of being
also conditionalized on STRICT_ALIGNMENT too.

Applied as pre-approved in the bug report by Richard Sandiford
after a bootstrap/test on aarch64-linux-gnu.

Thanks,
Andrew Pinski

ChangeLog:
PR target/91927
* config/aarch64/aarch64-simd.md (movmisalign): Check
STRICT_ALIGNMENT also.

testsuite/ChangeLog:
PR target/91927
* gcc.target/aarch64/pr91927.c: New testcase.
---
 gcc/config/aarch64/aarch64-simd.md |  2 +-
 gcc/testsuite/gcc.target/aarch64/pr91927.c | 38 ++
 2 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr91927.c

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index c8e1012bd7f..4c651f45d0c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -41,7 +41,7 @@ (define_expand "mov"
 (define_expand "movmisalign"
   [(set (match_operand:VALL 0 "nonimmediate_operand")
 (match_operand:VALL 1 "general_operand"))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !STRICT_ALIGNMENT"
 {
   /* This pattern is not permitted to fail during expansion: if both arguments
  are non-registers (e.g. memory := constant, which can be created by the
diff --git a/gcc/testsuite/gcc.target/aarch64/pr91927.c 
b/gcc/testsuite/gcc.target/aarch64/pr91927.c
new file mode 100644
index 000..f5cde1a5336
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr91927.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mstrict-align -O3" } */
+
+#define NULL 0
+
+typedef unsigned uint32_t;
+typedef struct __attribute__((__packed__))
+{
+   uint32_t nTagID;
+   uint32_t nValueBufferSize;
+   uint32_t nValueLength;
+   
+}  PropertyTags_t;
+
+typedef struct
+{
+   char *szName;
+   uint32_t nBufferSize;
+   uint32_t nLength;
+   
+}  Something_t;
+
+void SetTag(PropertyTags_t *pTag, uint32_t nBufferSize, uint32_t nLength);
+
+void TestCase(Something_t *pSome, uint32_t nBufferSize, uint32_t nLength)
+{
+   if (pSome != NULL)
+   {
+   PropertyTags_t sTag = { 0 };
+   
+   SetTag(, nBufferSize, nLength);
+   
+   pSome->nBufferSize = sTag.nValueBufferSize;
+   pSome->nLength = sTag.nValueLength;
+   }
+}
+
+/* { dg-final { scan-assembler-not "ldr\td" } } */
-- 
2.17.1



Re: [PATCH] rs6000: Fix PR93136, gcc.dg/vmx/ops.c and several other test break after r279772

2020-02-08 Thread Peter Bergner
On 2/8/20 11:13 AM, Segher Boessenkool wrote:
>> +/* { dg-final { scan-assembler-times {\mvperm[r]?\M} 1 } } */
> 
> You can write this without the square brackets, fwiw.
> 
> Okay for trunk.  Thank you!

Ok, I pushed this change with your suggestion to remove the square brackets
on the above regex.  Thanks.

Peter




Re: [PATCH] Use a non-empty test program to test ability to link

2020-02-08 Thread Sandra Loosemore

On 2/7/20 3:24 PM, Joseph Myers wrote:

On Wed, 5 Feb 2020, Sandra Loosemore wrote:


This patch is for PR 79193 and 88999, problems where libstdc++ is
mis-configuring itself when building for a bare-metal target because it thinks
it can link programs without pulling in the BSP that provides low-level I/O
support.  (Specifically, this was observed on nios2-elf with Newlib and GDB
semihosting.)  It'll build just fine if it recognizes that it can only compile
programs and not link them, but it's confused because using an empty program
to test for ability to link succeeds.

Is this configure change OK, and suitable for stage 4?

BTW, I did run autoconf in every subdirectory that contains a configure.ac,
but it appears only libstc++-v3 actually uses this test; all the other
regenerated configure scripts were unchanged.


There's some problem with that regeneration, then; every directory where
configure.ac mentions GCC_NO_EXECUTABLES should have a resulting change (I
tested regenerating in libgcc/ with this patch applied and got such a
change, for example).


Hmmm.  I just tried that again and got nothing.  Do I have to do 
something other than just run "autoconf" in that directory?  I'm always 
confused by anything involving configuration changes.  :-(


-Sandra


[committed] RISC-V: Improve caller-save code generation.

2020-02-08 Thread Jim Wilson
Avoid paradoxical subregs when caller save.  This reduces stack frame size
due to smaller loads and stores, and more frequent rematerialization.

Tested with cross riscv32-elf and riscv64-linux build and check, with no
regressions.

Committed.

Jim

PR target/93532
* config/riscv/riscv.h (HARD_REGNO_CALLER_SAVE_MODE): Define.
---
 gcc/config/riscv/riscv.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 19438e28fe8..567c23380fe 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -268,6 +268,13 @@ along with GCC; see the file COPYING3.  If not see
   1, 1 \
 }
 
+/* Select a register mode required for caller save of hard regno REGNO.
+   Contrary to what is documented, the default is not the smallest suitable
+   mode but the largest suitable mode for the given (REGNO, NREGS) pair and
+   it quickly creates paradoxical subregs that can be problematic.  */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+  ((MODE) == VOIDmode ? choose_hard_reg_mode (REGNO, NREGS, NULL) : (MODE))
+
 /* Internal macros to classify an ISA register's type.  */
 
 #define GP_REG_FIRST 0
-- 
2.17.1



Re: [PATCH, rs6000]: mark clobber for registers changed by untpyed_call

2020-02-08 Thread Segher Boessenkool
On Sat, Feb 08, 2020 at 10:17:42AM -0600, Segher Boessenkool wrote:
> And we do not know which of the register will be used for the return, in
> untyped_call (only untyped-return knows).  But we can add clobbers of all
> registers that *might* be used for the return, we do know that here, see
> operands[2] of untyped_call.

Clobbers in parallel to the call, I mean, not as separate insns later.


Segher


Re: [PATCH] Improve splitX passes management

2020-02-08 Thread Segher Boessenkool
Hi again,

On Sat, Feb 08, 2020 at 11:54:48AM +0100, Uros Bizjak wrote:
> On Fri, Feb 7, 2020 at 5:41 PM Segher Boessenkool
>  wrote:
> > On Thu, Feb 06, 2020 at 12:13:35PM +0100, Uros Bizjak wrote:
> > > The names of split_before_sched2 ("split4") and split_before_regstack
> > > ("split3") do not reflect their insertion point in the sequence of passes,
> > > where split_before_regstack follows split_before_sched2. Reorder the code
> > > and rename the passes to reflect the reality.
> >
> > Renaming them to other splitN doesn't help much :-/  Having stable names
> > is more important (some archs actually use these names), I'd say.  But
> 
> True, this is why I took care to found and fix all references to
> existing names. It is a simple substitution of old name with a new.

Yeah, and reading the patch is a bit challenging ;-)  If we had real
names here, this would be simpler.  Something for the future.

> I'm not familiar with tree passes, how the situation is handled there.
> If a new instance of the same pass is inserted before existing pass,
> does name of the existing pass get changed?

I don't know either.

> > > +bool
> > > +pass_split_before_regstack::gate (function *)
> > > +{
> > > +#if HAVE_ATTR_length && defined (STACK_REGS)
> > > +  /* If flow2 creates new instructions which need splitting
> > > + and scheduling after reload is not done, they might not be
> > > + split until final which doesn't allow splitting
> > > + if HAVE_ATTR_length.  */
> > > +  return !enable_split_before_sched2 ();
> > > +#else
> > > +  return false;
> > > +#endif
> > > +}
> >
> > flow.c was deleted in 2006...  Is this split still needed at all?  If
> > so, change the comment please?  :-)
> 
> Hm, I don't know in general, but x86 needs a late split at the point
> where instantiated registers won't change in the insn.

Right, but the explanation given in the comment is very out of date.

> As you suggested in the other mail, the issue with many late split
> passes (passes 3-5) should be rewiewed, considering also other (e.g.
> non-stack regs) targets.

Yeah.  But that is not a stage 4 thing.  Your patch is safe for stage 4
as far as I can see.


Segher


Re: [PATCH] rs6000: Fix PR93136, gcc.dg/vmx/ops.c and several other test break after r279772

2020-02-08 Thread Segher Boessenkool
Hi!

On Fri, Feb 07, 2020 at 06:22:56PM -0600, Peter Bergner wrote:
> On 1/9/20 6:29 PM, Peter Bergner wrote:
> > On 1/9/20 4:51 PM, Segher Boessenkool wrote:
> >> Splitting out separate functions in the testcase shouldn't be so much
> >> work?  Or am I too optimistic :-)
> >>
> >> This should make the test a good deal less prone to random changes in
> >> output caused by the lunar cycle.
> > 
> > Ok, let me take a stab at rewriting the tests to be more similar to the
> > pr92923-[12].c and see how much work that is.  I do agree that it would
> > be nice not having the insn counts be so fragile.
> 
> Sorry for taking so long to get back to this.  I split the functions into
> smaller chunks, but didn't need to go all the way to one function per
> builtin call.  Does this look better?

>   PR target/93136
>   * gcc.dg/vmx/ops.c: Add -flax-vector-conversions to dg-options.

Eww.  Well, I guess we want to test that flag as well ;-)

>   * gcc.target/powerpc/vsx-vector-6.h: Split tests into smaller functions.
>   * gcc.target/powerpc/vsx-vector-6.p7.c: Adjust scan-assembler-times
>   regex directives.  Adjust expected instruction counts.
>   * gcc.target/powerpc/vsx-vector-6.p8.c: Likewise.
>   * gcc.target/powerpc/vsx-vector-6.p9.c: Likewise.

> --- a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.p7.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.p7.c
> @@ -1,41 +1,43 @@
> -/* { dg-do compile { target { lp64 && be } } } */
> +/* { dg-do compile { target lp64 } } */

This worked for LE as well?  Nice :-)  That isn't supported of course,
but there is no reason to explicitly not test it (or break it more than
it is) :-)

> +/* { dg-final { scan-assembler-times {\mvperm[r]?\M} 1 } } */

You can write this without the square brackets, fwiw.

Okay for trunk.  Thank you!


Segher


Re: [RFA] [PR rtl-optimization/90275] Handle nop reg->reg copies in cse

2020-02-08 Thread Segher Boessenkool
On Fri, Feb 07, 2020 at 09:00:40AM -0700, Jeff Law wrote:
> On Thu, 2020-02-06 at 07:56 -0600, Segher Boessenkool wrote:
> > On Wed, Feb 05, 2020 at 11:48:23AM -0700, Jeff Law wrote:
> > > Yea, it's closely related.  In your case you need to effectively ignore
> > > the nop insn to get the optimization you want.  In mine that nop insn
> > > causes an ICE.
> > > 
> > > I think we could take your cse bits + adding a !CALL_P separately from
> > > the simplify-rtx stuff which Segher objected to.  THat'd likely solve
> > > the ARM ICEs and take you a tiny step forward on optimizing that SVE
> > > case.  Thoughts?
> > 
> > CSE should consistently keep track of what insns are no-op moves (in its
> > definition, all passes have a slightly different definition of this),
> > and use that everywhere consistently.
> So does that mean you object to the cse.c portion of Richard's patch?

It's more a "what we need to do in the future" thing, it is stage 4 now,
it is too big a change to do now.

What patch?  The "34" patch?  https://gcc.gnu.org/r278411 .

I don't think each stanza of code should use it's own "noop-ness", no.

I don't know if this patch makes matters worse or not.  It doesn't seem
suitable for stage 4 though.  And Richard said the cse.c part breaks
rs6000, if that is true, yes I do object ;-)


Segher


[PATCH] configure: Re-disable building cross-gdbserver

2020-02-08 Thread Maciej W. Rozycki
Correct fallout from commit 919adfe84092 ("Move gdbserver to top level") 
and revert to not building `gdbserver' in a cross-configuration, that is 
where host != target, matching the documented behaviour.  We have no way 
to support non-native `gdbserver', and native `gdbserver' is usually of 
no use with cross-GDB of the chosen host.

* configure.ac: Do not build `gdbserver' if $is_cross_compiler.
* configure: Regenerate.
---
Hi,

 Verified with a native build, a crossed build of a native configuration 
and a build of a cross-debugger; Canadian Cross not checked.  OK to apply?

 NB I have noticed that the gdbserver(1) man page is still being built and 
installed (as ${target_alias}-gdbserver.1, if building a cross-debugger), 
even if `gdbserver' itself is not (and it is not built if only `gdbserver' 
is while GDB is not).  Obviously this is due to the man page still living 
under gdb/doc/, and I presume it will be addressed sometime soon, by 
moving the man page somewhere under gdbserver/, right?

 Also there are currently a number of mismatches in configure.ac between 
the gcc and the binutils-gdb repositories; what is the plan to eliminate 
them?

  Maciej
---
 configure|5 +++--
 configure.ac |5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

gdbserver-no-cross.diff
Index: binutils-gdb/configure
===
--- binutils-gdb.orig/configure
+++ binutils-gdb/configure
@@ -3538,12 +3538,13 @@ case "${target}" in
 ;;
 esac
 
-# Only allow gdbserver on some systems.
+# Only allow native gdbserver and then only on some systems.
 if test -d ${srcdir}/gdbserver; then
 if test x$enable_gdbserver = x; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for gdbserver 
support" >&5
 $as_echo_n "checking for gdbserver support... " >&6; }
-   if (srcdir=${srcdir}/gdbserver; \
+   if test x${is_cross_compiler} = xyes \
+   || (srcdir=${srcdir}/gdbserver; \
. ${srcdir}/configure.srv; \
test -n "$UNSUPPORTED")
then
Index: binutils-gdb/configure.ac
===
--- binutils-gdb.orig/configure.ac
+++ binutils-gdb/configure.ac
@@ -782,11 +782,12 @@ case "${target}" in
 ;;
 esac
 
-# Only allow gdbserver on some systems.
+# Only allow native gdbserver and then only on some systems.
 if test -d ${srcdir}/gdbserver; then
 if test x$enable_gdbserver = x; then
AC_MSG_CHECKING([for gdbserver support])
-   if (srcdir=${srcdir}/gdbserver; \
+   if test x${is_cross_compiler} = xyes \
+   || (srcdir=${srcdir}/gdbserver; \
. ${srcdir}/configure.srv; \
test -n "$UNSUPPORTED")
then


Re: [PATCH, rs6000]: mark clobber for registers changed by untpyed_call

2020-02-08 Thread Segher Boessenkool
Hi!

On Fri, Feb 07, 2020 at 03:10:05PM +0800, Jiufu Guo wrote:
> Segher Boessenkool  writes:
> > On Thu, Feb 06, 2020 at 10:49:36AM +0800, Jiufu Guo wrote:
> >> > ... and nothing in the rtl stream says that those return registers are
> >> > actually set by that call.  Maybe we should use gen_call_value?  Can we
> >> > ever be asked to return more than a single thing here?
> >> I was also thinking about using "gen_call_value" or "emit_clobber (r3)"
> >> which could generate rtl: "%3:DI=call [foo]" or "call [foo]; clobber
> >> r3".  This could tell optimizer that %3 is changed.
> >
> > The problem with "call ; clobber r3" is that some set+use of a pseudo can
> > be moved between these, and then rnreg can rename that to r3 again.  We
> > really need to show the call sets r3, in the general case (or that r3 is
> > live after the call, at least).
> Thanks! More careful thought You are right: set+use maybe able to move 
> between "call ;
> clobber". "%3=call" is ok without this issue.

Yeah.

And we do not know which of the register will be used for the return, in
untyped_call (only untyped-return knows).  But we can add clobbers of all
registers that *might* be used for the return, we do know that here, see
operands[2] of untyped_call.

This really should be done in generic code though?


Segher


[COMMITTED] c++: Use constexpr to avoid wrong -Wsign-compare

2020-02-08 Thread Jason Merrill
We would like to do constexpr evaluation to avoid false positives on 
warnings, but constexpr evaluation can involve function body copying 
that changes DECL_UID, which breaks -fcompare-debug.  In the PR Jakub 
suggested that one possibility would be to avoid operations that might 
affect DECL_UID; this patch implements that suggestion.


The other three patches are to avoid regressions from this change.
We recently started unsharing values we find in cv_cache, but we 
shouldn't do that if it's the same as the argument.  No longer unsharing 
regresses the diagnostic locations in decomp48.C.  And the digest_init 
patch avoids a regression on desig14.C.


All tested x86_64-pc-linux-gnu, applying to trunk.
>From 00c458ede4df6ff37d709b8a8d3135e2aa4c9d14 Mon Sep 17 00:00:00 2001
From: Jason Merrill 
Date: Sun, 26 Jan 2020 22:58:32 -0500
Subject: [PATCH 4/4] c++: Use constexpr to avoid wrong -Wsign-compare
 (PR90691).
To: gcc-patches@gcc.gnu.org

We would like to do constexpr evaluation to avoid false positives on
warnings, but constexpr evaluation can involve function body copying that
changes DECL_UID, which breaks -fcompare-debug.  So let's remember
that we need to avoid that.

	PR c++/90691
	* expr.c (fold_for_warn): Call maybe_constant_value.
	* constexpr.c (struct constexpr_ctx): Add uid_sensitive field.
	(maybe_constant_value): Add uid_sensitive parm.
	(get_fundef_copy): Don't copy if it's true.
	(cxx_eval_call_expression): Don't instantiate if it's true.
	(cxx_eval_outermost_constant_expr): Likewise.
---
 gcc/cp/cp-tree.h|  2 +-
 gcc/cp/constexpr.c  | 42 +++--
 gcc/cp/expr.c   |  2 +
 gcc/testsuite/g++.dg/warn/Walways-true-3.C  |  4 +-
 gcc/testsuite/g++.dg/warn/Wsign-compare-9.C | 22 +++
 5 files changed, 58 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/warn/Wsign-compare-9.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index c46d1e92b3b..037d3b64538 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7930,7 +7930,7 @@ extern bool require_potential_rvalue_constant_expression (tree);
 extern tree cxx_constant_value			(tree, tree = NULL_TREE);
 extern void cxx_constant_dtor			(tree, tree);
 extern tree cxx_constant_init			(tree, tree = NULL_TREE);
-extern tree maybe_constant_value		(tree, tree = NULL_TREE, bool = false);
+extern tree maybe_constant_value		(tree, tree = NULL_TREE, bool = false, bool = false);
 extern tree maybe_constant_init			(tree, tree = NULL_TREE, bool = false);
 extern tree fold_non_dependent_expr		(tree,
 		 tsubst_flags_t = tf_warning_or_error,
diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 152142f0b31..03ab9ae2045 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -1074,13 +1074,18 @@ struct constexpr_ctx {
   /* If inside SWITCH_EXPR.  */
   constexpr_switch_state *css_state;
 
-  /* Whether we should error on a non-constant expression or fail quietly.  */
+  /* Whether we should error on a non-constant expression or fail quietly.
+ This flag needs to be here, but some of the others could move to global
+ if they get larger than a word.  */
   bool quiet;
   /* Whether we are strictly conforming to constant expression rules or
  trying harder to get a constant value.  */
   bool strict;
   /* Whether __builtin_is_constant_evaluated () should be true.  */
   bool manifestly_const_eval;
+  /* Whether we want to avoid doing anything that will cause extra DECL_UID
+ generation.  */
+  bool uid_sensitive;
 };
 
 /* A table of all constexpr calls that have been evaluated by the
@@ -1145,7 +1150,7 @@ static GTY(()) hash_map *fundef_copies_table;
is parms, TYPE is result.  */
 
 static tree
-get_fundef_copy (constexpr_fundef *fundef)
+get_fundef_copy (const constexpr_ctx *ctx, constexpr_fundef *fundef)
 {
   tree copy;
   bool existed;
@@ -1162,6 +1167,9 @@ get_fundef_copy (constexpr_fundef *fundef)
 }
   else if (*slot == NULL_TREE)
 {
+  if (ctx->uid_sensitive)
+	return NULL_TREE;
+
   /* We've already used the function itself, so make a copy.  */
   copy = build_tree_list (NULL, NULL);
   tree saved_body = DECL_SAVED_TREE (fundef->decl);
@@ -2232,6 +2240,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t,
 
   /* We can't defer instantiating the function any longer.  */
   if (!DECL_INITIAL (fun)
+  && !ctx->uid_sensitive
   && DECL_TEMPLOID_INSTANTIATION (fun))
 {
   location_t save_loc = input_location;
@@ -2378,13 +2387,12 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t,
 	  gcc_assert (at_eof >= 2 && ctx->quiet);
 	  *non_constant_p = true;
 	}
-  else
+  else if (tree copy = get_fundef_copy (ctx, new_call.fundef))
 	{
 	  tree body, parms, res;
 	  releasing_vec ctors;
 
 	  /* Reuse or create a new unshared copy of this function's body.  */
-	  tree copy = get_fundef_copy (new_call.fundef);
 	  body = TREE_PURPOSE (copy);
 	

Re: [PATCH v2] c++: Handle CONSTRUCTORs without indexes in find_array_ctor_elt [PR93549]

2020-02-08 Thread Jason Merrill

On 2/8/20 5:14 AM, Jakub Jelinek wrote:

Hi!

On Thu, Feb 06, 2020 at 05:04:49PM +0100, Jakub Jelinek wrote:

On Thu, Feb 06, 2020 at 10:38:25AM -0500, Jason Merrill wrote:

I don't know, can try to add some instrumentation and do bootstrap/regtest
with it.  The handling of the CONSTRUCTORs with missing or present or mixed
indexes is what I found in various middle-end routines.
The only thing I see in our verifiers is that in GIMPLE function bodies,
we don't allow non-VECTOR_TYPE CONSTRUCTORs with any elements, and for
VECTOR_TYPE CONSTRUCTORs we require that indexes are NULL for elements with
VECTOR_TYPE and for others require that it is either NULL or INTEGER_CST
matching the position (so effectively for those direct access is still
possible).



Where are these verifiers?  I'm not finding them.


tree-cfg.c (verify_gimple_assign_single).
Though, we don't really have verifiers for initializers of global variables,
guess it would need to be called somewhere from varpool_node::assemble_decl
or so (or other varpool method or multiple of them).


Here is a new version, which assumes CONSTRUCTORs with all or none indexes
and for CONSTRUCTORs without indexes performs the verification for
flag_checking directly in find_array_ctor_elt.  For CONSTRUCTORs with
indexes, it doesn't do the verification of all elts, because some CONSTRUCTORs
can be large, and it "verifies" only what it really needs - if all elts
touched during the binary search have indexes, that is actually all we care
about because we are sure we found the right elt.  It is just if we see a
missing index we need assurance that all are missing to be able to directly
access it.

The assumption then simplifies the patch, for no index CONSTRUCTORs we can
use direct access like for CONSTRUCTORs where last elt index is equal to the
elt position.  If we append right after the last elt, we just should clear
the index so that we don't violate the assumption, and if we need a gap
between the elts and the elt to be added, we need to add indexes.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


Looks good, thanks.


2020-02-08  Jakub Jelinek  

PR c++/93549
* constexpr.c (find_array_ctor_elt): If last element has no index,
for flag_checking verify all elts have no index.  If i is within the
elts, return it directly, if it is right after the last elt, append
if NULL index, otherwise force indexes on all elts.
(cxx_eval_store_expression): Allow cep->index to be NULL.

* g++.dg/ext/constexpr-pr93549.C: New test.

--- gcc/cp/constexpr.c.jj   2020-02-05 11:12:33.632383924 +0100
+++ gcc/cp/constexpr.c  2020-02-07 19:30:10.549836834 +0100
@@ -3028,8 +3028,32 @@ find_array_ctor_elt (tree ary, tree dind
if (end > 0)
  {
tree cindex = (*elts)[end - 1].index;
-  if (TREE_CODE (cindex) == INTEGER_CST
- && compare_tree_int (cindex, end - 1) == 0)
+  if (cindex == NULL_TREE)
+   {
+ /* Verify that if the last index is missing, all indexes
+are missing.  */
+ if (flag_checking)
+   for (unsigned int j = 0; j < len - 1; ++j)
+ gcc_assert ((*elts)[j].index == NULL_TREE);
+ if (i < end)
+   return i;
+ else
+   {
+ begin = end;
+ if (i == end)
+   /* If the element is to be added right at the end,
+  make sure it is added with cleared index too.  */
+   dindex = NULL_TREE;
+ else if (insert)
+   /* Otherwise, in order not to break the assumption
+  that CONSTRUCTOR either has all indexes or none,
+  we need to add indexes to all elements.  */
+   for (unsigned int j = 0; j < len; ++j)
+ (*elts)[j].index = build_int_cst (TREE_TYPE (dindex), j);
+   }
+   }
+  else if (TREE_CODE (cindex) == INTEGER_CST
+  && compare_tree_int (cindex, end - 1) == 0)
{
  if (i < end)
return i;
@@ -4551,7 +4575,8 @@ cxx_eval_store_expression (const constex
= find_array_ctor_elt (*valp, index, /*insert*/true);
  gcc_assert (i >= 0);
  cep = CONSTRUCTOR_ELT (*valp, i);
- gcc_assert (TREE_CODE (cep->index) != RANGE_EXPR);
+ gcc_assert (cep->index == NULL_TREE
+ || TREE_CODE (cep->index) != RANGE_EXPR);
}
else
{
--- gcc/testsuite/g++.dg/ext/constexpr-pr93549.C.jj 2020-02-07 
19:14:24.163815456 +0100
+++ gcc/testsuite/g++.dg/ext/constexpr-pr93549.C2020-02-07 
19:14:24.163815456 +0100
@@ -0,0 +1,26 @@
+// PR c++/93549
+// { dg-do compile { target c++17 } }
+// { dg-options "-O2 -Wno-psabi -w" }
+
+struct simd {
+  using shortx8 [[gnu::vector_size(16)]] = short;
+  shortx8 data;
+  constexpr simd (short x) : data{x, x, x, x, x, x, x, x} {}
+  constexpr friend unsigned operator== (simd lhs, simd rhs)
+  {
+

[PATCH] i386: Properly pop restore token in signal frame

2020-02-08 Thread H.J. Lu
Linux CET kernel places a restore token on shadow stack for signal
handler to enhance security.  The restore token is 8 byte and aligned
to 8 bytes.  It is usually transparent to user programs since kernel
will pop the restore token when signal handler returns.  But when an
exception is thrown from a signal handler, now we need to pop the
restore token from shadow stack.  For x86-64, we just need to treat
the signal frame as normal frame.  For i386, we need to search for
the restore token to check if the original shadow stack is 8 byte
aligned.  If the original shadow stack is 8 byte aligned, we just
need to pop 2 slots, one restore token, from shadow stack.  Otherwise,
we need to pop 3 slots, one restore token + 4 byte pading, from
shadow stack.

This patch also includes 2 tests, one has a restore token with 4 byte
padding and one without.

Tested on Linux/x86-64 CET machine with and without -m32.  OK for master
and backport to GCC 8/9 branches?

Thanks.

H.J.
---
libgcc/

PR libgcc/85334
* config/i386/shadow-stack-unwind.h (_Unwind_Frames_Increment):
New.

gcc/testsuite/

PR libgcc/85334
* g++.target/i386/pr85334-1.C: New test.
* g++.target/i386/pr85334-2.C: Likewise.
---
 gcc/testsuite/g++.target/i386/pr85334-1.C | 55 +++
 gcc/testsuite/g++.target/i386/pr85334-2.C | 48 
 libgcc/config/i386/shadow-stack-unwind.h  | 43 ++
 3 files changed, 146 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/pr85334-1.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr85334-2.C

diff --git a/gcc/testsuite/g++.target/i386/pr85334-1.C 
b/gcc/testsuite/g++.target/i386/pr85334-1.C
new file mode 100644
index 000..3c5ccad1714
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr85334-1.C
@@ -0,0 +1,55 @@
+// { dg-do run }
+// { dg-require-effective-target cet }
+// { dg-additional-options "-fexceptions -fnon-call-exceptions 
-fcf-protection" }
+
+// Delta between numbers of call stacks of pr85334-1.C and pr85334-2.C is 1.
+
+#include 
+#include 
+
+void sighandler (int signo, siginfo_t * si, void * uc)
+{
+  throw (5);
+}
+
+char *
+__attribute ((noinline, noclone))
+dosegv ()
+{
+  * ((volatile int *)0) = 12;
+  return 0;
+}
+
+int
+__attribute ((noinline, noclone))
+func2 ()
+{
+  try {
+dosegv ();
+  }
+  catch (int x) {
+return (x != 5);
+  }
+  return 1;
+}
+
+int
+__attribute ((noinline, noclone))
+func1 ()
+{
+  return func2 ();
+}
+
+int main ()
+{
+  struct sigaction sa;
+  int status;
+
+  sa.sa_sigaction = sighandler;
+  sa.sa_flags = SA_SIGINFO;
+
+  status = sigaction (SIGSEGV, & sa, NULL);
+  status = sigaction (SIGBUS, & sa, NULL);
+
+  return func1 ();
+}
diff --git a/gcc/testsuite/g++.target/i386/pr85334-2.C 
b/gcc/testsuite/g++.target/i386/pr85334-2.C
new file mode 100644
index 000..e2b5afe78cb
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr85334-2.C
@@ -0,0 +1,48 @@
+// { dg-do run }
+// { dg-require-effective-target cet }
+// { dg-additional-options "-fexceptions -fnon-call-exceptions 
-fcf-protection" }
+
+// Delta between numbers of call stacks of pr85334-1.C and pr85334-2.C is 1.
+
+#include 
+#include 
+
+void sighandler (int signo, siginfo_t * si, void * uc)
+{
+  throw (5);
+}
+
+char *
+__attribute ((noinline, noclone))
+dosegv ()
+{
+  * ((volatile int *)0) = 12;
+  return 0;
+}
+
+int
+__attribute ((noinline, noclone))
+func1 ()
+{
+  try {
+dosegv ();
+  }
+  catch (int x) {
+return (x != 5);
+  }
+  return 1;
+}
+
+int main ()
+{
+  struct sigaction sa;
+  int status;
+
+  sa.sa_sigaction = sighandler;
+  sa.sa_flags = SA_SIGINFO;
+
+  status = sigaction (SIGSEGV, & sa, NULL);
+  status = sigaction (SIGBUS, & sa, NULL);
+
+  return func1 ();
+}
diff --git a/libgcc/config/i386/shadow-stack-unwind.h 
b/libgcc/config/i386/shadow-stack-unwind.h
index a0244d2ee70..a5f9235b146 100644
--- a/libgcc/config/i386/shadow-stack-unwind.h
+++ b/libgcc/config/i386/shadow-stack-unwind.h
@@ -49,3 +49,46 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
}   \
 }  \
 while (0)
+
+/* Linux CET kernel places a restore token on shadow stack for signal
+   handler to enhance security.  The restore token is 8 byte and aligned
+   to 8 bytes.  It is usually transparent to user programs since kernel
+   will pop the restore token when signal handler returns.  But when an
+   exception is thrown from a signal handler, now we need to pop the
+   restore token from shadow stack.  For x86-64, we just need to treat
+   the signal frame as normal frame.  For i386, we need to search for
+   the restore token to check if the original shadow stack is 8 byte
+   aligned.  If the original shadow stack is 8 byte aligned, we just
+   need to pop 2 slots, one restore token, from shadow stack.  Otherwise,
+   we need to pop 3 slots, one restore token + 

Re: [PATCH] i386: Make xmm16-xmm31 call used even in ms ABI

2020-02-08 Thread Uros Bizjak
On Sat, Feb 8, 2020 at 11:52 AM Jakub Jelinek  wrote:
>
> On Sat, Feb 08, 2020 at 11:32:40AM +0100, Uros Bizjak wrote:
> > I think that the patch should also be backported to gcc-9 branch. The
> > change is backward compatible, since the new code will save and
> > restore zmm16+ registers at the caller site, and the old code (e.g.
> > existing libraries) will then unnecessarily save and restore regs in
> > the callee.
>
> Actually, it isn't backward compatible.
> If both caller and callee touch xmm16+ (sure, unlikely), then if caller
> is compiled by new gcc and callee by old gcc it is the case you described,
> caller doesn't try to preserve values across the call and callee uselessly
> saves them anyway (unless it fails to assemble due to SEH).
> But if caller is compiled by old gcc and callee by new gcc, caller assumes
> it can preserve values across the call in those registers and callee doesn't
> save them and clobbers them.

I was considering only the most likely scenario where the compiler is
upgraded without recompiling existing system libraries. So, with the
new compiler installed, the new compiled code calls either new or old
libraries. *Assuming* that old libraries never call new code, the
situation is OK.

As you point out, when the old code calls new code, registers get
clobbered. By not backporting the patch, we can consider gcc-9 and
gcc-10 ABI incompatible, otherwise the forward incompatibility moves
in between gcc-9.2 and gcc-9.3. I agree that the former is somehow
more convenient.

IMO, this issue should be pointed out in the compiler release notes in any case.

Uros.


Re: [PATCH] Improve splitX passes management

2020-02-08 Thread Uros Bizjak
On Fri, Feb 7, 2020 at 5:41 PM Segher Boessenkool
 wrote:
>
> On Thu, Feb 06, 2020 at 12:13:35PM +0100, Uros Bizjak wrote:
> > The names of split_before_sched2 ("split4") and split_before_regstack
> > ("split3") do not reflect their insertion point in the sequence of passes,
> > where split_before_regstack follows split_before_sched2. Reorder the code
> > and rename the passes to reflect the reality.
>
> Renaming them to other splitN doesn't help much :-/  Having stable names
> is more important (some archs actually use these names), I'd say.  But

True, this is why I took care to found and fix all references to
existing names. It is a simple substitution of old name with a new.

> it's hard to come up with shortish more meaningful names.
>
> There is no real need for the N in splitN to be in order, but sure it
> can be surprising otherwise.

I'm not familiar with tree passes, how the situation is handled there.
If a new instance of the same pass is inserted before existing pass,
does name of the existing pass get changed?

> > +bool
> > +pass_split_before_regstack::gate (function *)
> > +{
> > +#if HAVE_ATTR_length && defined (STACK_REGS)
> > +  /* If flow2 creates new instructions which need splitting
> > + and scheduling after reload is not done, they might not be
> > + split until final which doesn't allow splitting
> > + if HAVE_ATTR_length.  */
> > +  return !enable_split_before_sched2 ();
> > +#else
> > +  return false;
> > +#endif
> > +}
>
> flow.c was deleted in 2006...  Is this split still needed at all?  If
> so, change the comment please?  :-)

Hm, I don't know in general, but x86 needs a late split at the point
where instantiated registers won't change in the insn. Mainly an
optimization, not correctness issue, but the split is needed to fix
some HW issues in the processor (please see comments in i386.md around
insn condition involving epilogue_completed flag.

As you suggested in the other mail, the issue with many late split
passes (passes 3-5) should be rewiewed, considering also other (e.g.
non-stack regs) targets.

Uros.


Re: [PATCH] i386: Make xmm16-xmm31 call used even in ms ABI

2020-02-08 Thread Jakub Jelinek
On Sat, Feb 08, 2020 at 11:32:40AM +0100, Uros Bizjak wrote:
> I think that the patch should also be backported to gcc-9 branch. The
> change is backward compatible, since the new code will save and
> restore zmm16+ registers at the caller site, and the old code (e.g.
> existing libraries) will then unnecessarily save and restore regs in
> the callee.

Actually, it isn't backward compatible.
If both caller and callee touch xmm16+ (sure, unlikely), then if caller
is compiled by new gcc and callee by old gcc it is the case you described,
caller doesn't try to preserve values across the call and callee uselessly
saves them anyway (unless it fails to assemble due to SEH).
But if caller is compiled by old gcc and callee by new gcc, caller assumes
it can preserve values across the call in those registers and callee doesn't
save them and clobbers them.

Jakub



Re: [PATCH] i386: Make xmm16-xmm31 call used even in ms ABI

2020-02-08 Thread Jakub Jelinek
On Sat, Feb 08, 2020 at 11:32:40AM +0100, Uros Bizjak wrote:
> On Sat, Feb 8, 2020 at 11:05 AM Jakub Jelinek  wrote:
> >
> > On Sat, Feb 08, 2020 at 08:24:38AM +, JonY wrote:
> > > It does not, I just checked with the master branch of binutils.
> > ...
> > > I did a -c test build with an older toolchain, it fails to compile
> > > (invalid register for .seh_savexmm) while the latest gcc is passing,
> > > both are using the same binutils version.
> > >
> > > I guess that covers it?
> >
> > I went ahead and committed the patch (with the double config/i386/ in it
> > fixed obviously).
> 
> I think that the patch should also be backported to gcc-9 branch. The
> change is backward compatible, since the new code will save and
> restore zmm16+ registers at the caller site, and the old code (e.g.
> existing libraries) will then unnecessarily save and restore regs in
> the callee.

Agreed, will do together with other backports soon.

Jakub



Re: [PATCH] i386: Make xmm16-xmm31 call used even in ms ABI

2020-02-08 Thread Uros Bizjak
On Sat, Feb 8, 2020 at 11:05 AM Jakub Jelinek  wrote:
>
> On Sat, Feb 08, 2020 at 08:24:38AM +, JonY wrote:
> > It does not, I just checked with the master branch of binutils.
> ...
> > I did a -c test build with an older toolchain, it fails to compile
> > (invalid register for .seh_savexmm) while the latest gcc is passing,
> > both are using the same binutils version.
> >
> > I guess that covers it?
>
> I went ahead and committed the patch (with the double config/i386/ in it
> fixed obviously).

I think that the patch should also be backported to gcc-9 branch. The
change is backward compatible, since the new code will save and
restore zmm16+ registers at the caller site, and the old code (e.g.
existing libraries) will then unnecessarily save and restore regs in
the callee.

HJ fixed some other MSABI issue there.

Uros.

>
> > > 2020-02-07  Uroš Bizjak  
> > > Jakub Jelinek  
> > >
> > > PR target/65782
> > > * config/i386/config/i386/i386.h (CALL_USED_REGISTERS): Make
> > > xmm16-xmm31 call-used even in 64-bit ms-abi.
> > >
> > > * gcc.target/i386/pr65782.c: New test.
>
> Jakub
>


[PATCH v2] c++: Handle CONSTRUCTORs without indexes in find_array_ctor_elt [PR93549]

2020-02-08 Thread Jakub Jelinek
Hi!

On Thu, Feb 06, 2020 at 05:04:49PM +0100, Jakub Jelinek wrote:
> On Thu, Feb 06, 2020 at 10:38:25AM -0500, Jason Merrill wrote:
> > > I don't know, can try to add some instrumentation and do bootstrap/regtest
> > > with it.  The handling of the CONSTRUCTORs with missing or present or 
> > > mixed
> > > indexes is what I found in various middle-end routines.
> > > The only thing I see in our verifiers is that in GIMPLE function bodies,
> > > we don't allow non-VECTOR_TYPE CONSTRUCTORs with any elements, and for
> > > VECTOR_TYPE CONSTRUCTORs we require that indexes are NULL for elements 
> > > with
> > > VECTOR_TYPE and for others require that it is either NULL or INTEGER_CST
> > > matching the position (so effectively for those direct access is still
> > > possible).
> > >
> > 
> > Where are these verifiers?  I'm not finding them.
> 
> tree-cfg.c (verify_gimple_assign_single).
> Though, we don't really have verifiers for initializers of global variables,
> guess it would need to be called somewhere from varpool_node::assemble_decl
> or so (or other varpool method or multiple of them).

Here is a new version, which assumes CONSTRUCTORs with all or none indexes
and for CONSTRUCTORs without indexes performs the verification for
flag_checking directly in find_array_ctor_elt.  For CONSTRUCTORs with
indexes, it doesn't do the verification of all elts, because some CONSTRUCTORs
can be large, and it "verifies" only what it really needs - if all elts
touched during the binary search have indexes, that is actually all we care
about because we are sure we found the right elt.  It is just if we see a
missing index we need assurance that all are missing to be able to directly
access it.

The assumption then simplifies the patch, for no index CONSTRUCTORs we can
use direct access like for CONSTRUCTORs where last elt index is equal to the
elt position.  If we append right after the last elt, we just should clear
the index so that we don't violate the assumption, and if we need a gap
between the elts and the elt to be added, we need to add indexes.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-02-08  Jakub Jelinek  

PR c++/93549
* constexpr.c (find_array_ctor_elt): If last element has no index,
for flag_checking verify all elts have no index.  If i is within the
elts, return it directly, if it is right after the last elt, append
if NULL index, otherwise force indexes on all elts.
(cxx_eval_store_expression): Allow cep->index to be NULL.

* g++.dg/ext/constexpr-pr93549.C: New test.

--- gcc/cp/constexpr.c.jj   2020-02-05 11:12:33.632383924 +0100
+++ gcc/cp/constexpr.c  2020-02-07 19:30:10.549836834 +0100
@@ -3028,8 +3028,32 @@ find_array_ctor_elt (tree ary, tree dind
   if (end > 0)
 {
   tree cindex = (*elts)[end - 1].index;
-  if (TREE_CODE (cindex) == INTEGER_CST
- && compare_tree_int (cindex, end - 1) == 0)
+  if (cindex == NULL_TREE)
+   {
+ /* Verify that if the last index is missing, all indexes
+are missing.  */
+ if (flag_checking)
+   for (unsigned int j = 0; j < len - 1; ++j)
+ gcc_assert ((*elts)[j].index == NULL_TREE);
+ if (i < end)
+   return i;
+ else
+   {
+ begin = end;
+ if (i == end)
+   /* If the element is to be added right at the end,
+  make sure it is added with cleared index too.  */
+   dindex = NULL_TREE;
+ else if (insert)
+   /* Otherwise, in order not to break the assumption
+  that CONSTRUCTOR either has all indexes or none,
+  we need to add indexes to all elements.  */
+   for (unsigned int j = 0; j < len; ++j)
+ (*elts)[j].index = build_int_cst (TREE_TYPE (dindex), j);
+   }
+   }
+  else if (TREE_CODE (cindex) == INTEGER_CST
+  && compare_tree_int (cindex, end - 1) == 0)
{
  if (i < end)
return i;
@@ -4551,7 +4575,8 @@ cxx_eval_store_expression (const constex
= find_array_ctor_elt (*valp, index, /*insert*/true);
  gcc_assert (i >= 0);
  cep = CONSTRUCTOR_ELT (*valp, i);
- gcc_assert (TREE_CODE (cep->index) != RANGE_EXPR);
+ gcc_assert (cep->index == NULL_TREE
+ || TREE_CODE (cep->index) != RANGE_EXPR);
}
   else
{
--- gcc/testsuite/g++.dg/ext/constexpr-pr93549.C.jj 2020-02-07 
19:14:24.163815456 +0100
+++ gcc/testsuite/g++.dg/ext/constexpr-pr93549.C2020-02-07 
19:14:24.163815456 +0100
@@ -0,0 +1,26 @@
+// PR c++/93549
+// { dg-do compile { target c++17 } }
+// { dg-options "-O2 -Wno-psabi -w" }
+
+struct simd {
+  using shortx8 [[gnu::vector_size(16)]] = short;
+  shortx8 data;
+  constexpr simd (short x) : data{x, x, x, x, x, x, x, x} {}
+  constexpr friend unsigned operator== (simd lhs, 

Re: [PATCH] i386: Make xmm16-xmm31 call used even in ms ABI

2020-02-08 Thread Jakub Jelinek
On Sat, Feb 08, 2020 at 08:24:38AM +, JonY wrote:
> It does not, I just checked with the master branch of binutils.
...
> I did a -c test build with an older toolchain, it fails to compile
> (invalid register for .seh_savexmm) while the latest gcc is passing,
> both are using the same binutils version.
> 
> I guess that covers it?

I went ahead and committed the patch (with the double config/i386/ in it
fixed obviously).

> > 2020-02-07  Uroš Bizjak  
> > Jakub Jelinek  
> > 
> > PR target/65782
> > * config/i386/config/i386/i386.h (CALL_USED_REGISTERS): Make
> > xmm16-xmm31 call-used even in 64-bit ms-abi.
> > 
> > * gcc.target/i386/pr65782.c: New test.

Jakub



[FYI] Patches that fix testing santiziers with qemu user mode

2020-02-08 Thread Andrew Pinski
Hi,
  These two patches are what I use to fix testing of the santizers
with qemu.  The first one disables coloring always as for some reason
when running with qemu (but not normally), coloring is detected.  I
have not gone and debugged the reason why the sanitizers does not
detect coloring when run under dejagnu even though, it is designed to
be running under a psedu-TTY.

The second patch disables LSAN when clone with the arguments that are
needed to stop the world fail.  This second patch should in theory go
upstream but I am not going to that right now; plus maybe it should
only disable it when used with asan instead of in general.

With these two patches, I get clean test results on aarch64-linux-gnu.
I am mainly sending them here as I think they are useful for people
who are doing testing; especially cross testing.

Thanks,
Andrew Pinski
From 7666c4ec5db5e99530f8ff9411b782326ce96655 Mon Sep 17 00:00:00 2001
From: Andrew Pinski 
Date: Thu, 6 Feb 2020 02:06:27 +
Subject: [PATCH 1/2] Set default coloring to never.

Auto does not work always.  So just disable coloring.

Change-Id: I68564c6b4c35ed6d7f4e2938d765f428995900e7
Signed-off-by: Andrew Pinski 
---
 libsanitizer/sanitizer_common/sanitizer_flags.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libsanitizer/sanitizer_common/sanitizer_flags.inc 
b/libsanitizer/sanitizer_common/sanitizer_flags.inc
index 065258a5a6e..9caf442b2d1 100644
--- a/libsanitizer/sanitizer_common/sanitizer_flags.inc
+++ b/libsanitizer/sanitizer_common/sanitizer_flags.inc
@@ -108,7 +108,7 @@ COMMON_FLAG(
 uptr, clear_shadow_mmap_threshold, 64 * 1024,
 "Large shadow regions are zero-filled using mmap(NORESERVE) instead of "
 "memset(). This is the threshold size in bytes.")
-COMMON_FLAG(const char *, color, "auto",
+COMMON_FLAG(const char *, color, "never",
 "Colorize reports: (always|never|auto).")
 COMMON_FLAG(
 bool, legacy_pthread_cond, false,
-- 
2.17.1

From 1a931d339e6e89bdc7292f6c52ba5f89278bda6a Mon Sep 17 00:00:00 2001
From: Andrew Pinski 
Date: Thu, 6 Feb 2020 17:18:26 -0800
Subject: [PATCH 2/2] Have the ability to disable lsan if clone fails.

clone fails for some cases for qemu, lsan is not needed
for asan testing.  So we can get lsan disabled when the
clone fails.

Tested on aarch64-linux-gnu with qemu.  All asan.exp tests now pass.

Change-Id: I1e281a3701ef0a1a4325ce2fbf8ca263a930fbe5
Signed-off-by: Andrew Pinski 
---
 libsanitizer/asan/asan_rtl.cpp|  2 +-
 libsanitizer/lsan/lsan.cpp|  5 ++
 libsanitizer/lsan/lsan_common.cpp | 81 +++
 libsanitizer/lsan/lsan_common.h   |  1 +
 4 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/libsanitizer/asan/asan_rtl.cpp b/libsanitizer/asan/asan_rtl.cpp
index 594d7752eea..93ef79e111d 100644
--- a/libsanitizer/asan/asan_rtl.cpp
+++ b/libsanitizer/asan/asan_rtl.cpp
@@ -500,7 +500,7 @@ static void AsanInitInternal() {
 
   if (CAN_SANITIZE_LEAKS) {
 __lsan::InitCommonLsan();
-if (common_flags()->detect_leaks && common_flags()->leak_check_at_exit) {
+if (!__lsan::lsan_cannotwork && common_flags()->detect_leaks && 
common_flags()->leak_check_at_exit) {
   if (flags()->halt_on_error)
 Atexit(__lsan::DoLeakCheck);
   else
diff --git a/libsanitizer/lsan/lsan.cpp b/libsanitizer/lsan/lsan.cpp
index 4ce03046ffb..3488f0436a1 100644
--- a/libsanitizer/lsan/lsan.cpp
+++ b/libsanitizer/lsan/lsan.cpp
@@ -108,6 +108,11 @@ extern "C" void __lsan_init() {
   AvoidCVE_2016_2143();
   InitializeFlags();
   InitCommonLsan();
+  if (lsan_cannotwork) {
+lsan_init_is_running = false;
+lsan_inited = true;
+return;
+  }
   InitializeAllocator();
   ReplaceSystemMalloc();
   InitTlsSize();
diff --git a/libsanitizer/lsan/lsan_common.cpp 
b/libsanitizer/lsan/lsan_common.cpp
index 9ff9f4c5d1c..2023a9685dd 100644
--- a/libsanitizer/lsan/lsan_common.cpp
+++ b/libsanitizer/lsan/lsan_common.cpp
@@ -25,9 +25,17 @@
 #include "sanitizer_common/sanitizer_thread_registry.h"
 #include "sanitizer_common/sanitizer_tls_get_addr.h"
 
+#if SANITIZER_LINUX
+#include  // for CLONE_* definitions
+#include  // for signal-related stuff
+#include 
+#endif
+
 #if CAN_SANITIZE_LEAKS
 namespace __lsan {
 
+bool lsan_cannotwork;
+
 // This mutex is used to prevent races between DoLeakCheck and IgnoreObject, 
and
 // also to protect the global list of root regions.
 BlockingMutex global_mutex(LINKER_INITIALIZED);
@@ -111,7 +119,76 @@ const char *MaybeCallLsanDefaultOptions() {
   return (&__lsan_default_options) ? __lsan_default_options() : "";
 }
 
+#if SANITIZER_LINUX
+namespace {
+
+class ScopedStackSpaceWithGuard {
+ public:
+  explicit ScopedStackSpaceWithGuard(uptr stack_size) {
+stack_size_ = stack_size;
+guard_size_ = GetPageSizeCached();
+// FIXME: Omitting MAP_STACK here works in current kernels but might break
+// in the future.
+guard_start_ = (uptr)MmapOrDie(stack_size_ + guard_size_,
+

Re: [PATCH] i386: Make xmm16-xmm31 call used even in ms ABI

2020-02-08 Thread JonY
On 2/7/20 11:28 AM, Jakub Jelinek wrote:
> On Fri, Feb 07, 2020 at 10:57:22AM +, JonY wrote:
 Is this patch testing still required? I just got back from traveling.
>>>
>>> Yes, our reading of the MS ABI docs show that xmm16-31 are to be call used
>>> (not preserved over calls), while in gcc they are currently handled as
>>> preserved across the calls.
> 
> The other parts are I guess mainly about SEH.  Consider e.g.
> void
> foo (void)
> {
>   register double x __asm ("xmm14");
>   register double y __asm ("xmm18");
>   asm ("" : "=x" (x));
>   asm ("" : "=v" (y));
>   x += y;
>   y += x;
>   asm ("" : : "x" (x));
>   asm ("" : : "v" (y));
> }
> looking at cross-compiler output, with -O2 -mavx512f this emits
>   .file   "abcdeq.c"
>   .text
>   .align 16
>   .globl  foo
>   .deffoo;.scl2;  .type   32; .endef
>   .seh_proc   foo
> foo:
>   subq$40, %rsp
>   .seh_stackalloc 40
>   vmovaps %xmm14, (%rsp)
>   .seh_savexmm%xmm14, 0
>   vmovaps %xmm18, 16(%rsp)
>   .seh_savexmm%xmm18, 16
>   .seh_endprologue
>   vaddsd  %xmm18, %xmm14, %xmm14
>   vaddsd  %xmm18, %xmm14, %xmm18
>   vmovaps (%rsp), %xmm14
>   vmovaps 16(%rsp), %xmm18
>   addq$40, %rsp
>   ret
>   .seh_endproc
>   .ident  "GCC: (GNU) 10.0.1 20200207 (experimental)"
> Does whatever assembler mingw64 uses even assemble this (I mean the
> .seh_savexmm %xmm16, 16 could be problematic)?''

It does not, I just checked with the master branch of binutils.

> I can find e.g.
> https://stackoverflow.com/questions/43152633/invalid-register-for-seh-savexmm-in-cygwin/43210527
> which then links to
> https://gcc.gnu.org/PR65782
> 
> So, I'd say we want to add PR target/65782 to the ChangeLog entry in the
> patch, and likely add a testcase like the above, so like below?
> 
> Have you tested the earlier version of the patch on mingw64 or cygwin?
> If yes, can you just test the testcase from the following patch, without and
> with the i386.h part and verify it FAILs without and PASSes with it?
> Just make check-gcc RUNTESTFLAGS=i386.exp=pr65782.c should do?
> If not, can you please test the whole patch?
> 

I did a -c test build with an older toolchain, it fails to compile
(invalid register for .seh_savexmm) while the latest gcc is passing,
both are using the same binutils version.

I guess that covers it?


> 2020-02-07  Uroš Bizjak  
>   Jakub Jelinek  
> 
>   PR target/65782
>   * config/i386/config/i386/i386.h (CALL_USED_REGISTERS): Make
>   xmm16-xmm31 call-used even in 64-bit ms-abi.
> 
>   * gcc.target/i386/pr65782.c: New test.
> 
> --- gcc/config/i386/config/i386/i386.h.jj 2020-01-22 10:19:24.199221986 
> +0100
> +++ gcc/config/i386/config/i386/i386.h2020-02-04 12:09:12.338341003 
> +0100
> @@ -1128,9 +1128,9 @@ extern const char *host_detect_local_cpu
>  /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/\
>   6,   6,6,6,6,6,6,6, \
>  /*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/  \
> - 6,6, 6,6,6,6,6,6,   \
> + 1,1, 1,1,1,1,1,1,   \
>  /*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/  \
> - 6,6, 6,6,6,6,6,6,   \
> + 1,1, 1,1,1,1,1,1,   \
>   /* k0,  k1,  k2,  k3,  k4,  k5,  k6,  k7*/  \
>   1,   1,   1,   1,   1,   1,   1,   1 }
>  
> --- gcc/testsuite/gcc.target/i386/pr65782.c.jj2020-02-07 
> 12:21:09.472819018 +0100
> +++ gcc/testsuite/gcc.target/i386/pr65782.c   2020-02-07 12:24:06.820154495 
> +0100
> @@ -0,0 +1,16 @@
> +/* PR target/65782 */
> +/* { dg-do assemble { target { avx512vl && { ! ia32 } } } } */
> +/* { dg-options "-O2 -mavx512vl" } */
> +
> +void
> +foo (void)
> +{
> +  register double x __asm ("xmm14");
> +  register double y __asm ("xmm18");
> +  asm ("" : "=x" (x));
> +  asm ("" : "=v" (y));
> +  x += y;
> +  y += x;
> +  asm ("" : : "x" (x));
> +  asm ("" : : "v" (y));
> +}
> 
> 
>   Jakub
> 




signature.asc
Description: OpenPGP digital signature