Re: drop -aux{dir,base}, revamp -dump{dir,base}

2020-07-13 Thread Alexandre Oliva
FWIW, I spotted two bugs in the completely untested patch by running it
through the compiler.  s/dmp_filename/dump_filename/g will fix it.

-- 
Alexandre Oliva, freedom fighterhe/himhttps://FSFLA.org/blogs/lxo/
Free Software Evangelist  Stallman was right, but he's left :(
GNU Toolchain Engineer   Live long and free, and prosper ethically


Re: drop -aux{dir,base}, revamp -dump{dir,base}

2020-07-13 Thread Alexandre Oliva
On Jun 30, 2020, Thomas Schwinge  wrote:

>>> I looked for a mkoffload
>>> program for it in the GCC source tree and couldn't find one.

> See 'gcc/config/i386/intelmic-mkoffload.c'.  ;-)

Thanks!

> :-) Yes, I quickly tested, and found that similar changes are required
> there, too.

> Can you easily adjust that file as you did for the GCN and nvptx
> 'mkoffload's?  Due to other workload, resolving it myself would be very
> low priority for me, I'm afraid.  (But I can easily test anything anybody
> else comes up with.)

Thanks for the offer.  Here's what I've been able to come up with,
totally untested.  Ok to install after you give it a spin?


revamp intelmic-mkoffload aux dump names

From: Alexandre Oliva 

Rework intelmic-mkoffload into the new aux and dump file naming
semantics.  Obey -save-temps.


for  gcc/ChangeLog

* config/i386/intelmic-mkoffload.c
(generate_target_descr_file): Use dumppfx for save_temps
files.  Pass -dumpbase et al down to the compiler.
(generate_target_offloadend_file): Likewise.
(generate_host_descr_file): Likewise.
(prepare_target_image): Likewise.  Move out_obj_filename
setting...
(main): ... here.  Detect -dumpbase, set dumppfx too.
---
 gcc/config/i386/intelmic-mkoffload.c |   72 +-
 1 file changed, 62 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/intelmic-mkoffload.c 
b/gcc/config/i386/intelmic-mkoffload.c
index e108bc0..52c06b9 100644
--- a/gcc/config/i386/intelmic-mkoffload.c
+++ b/gcc/config/i386/intelmic-mkoffload.c
@@ -245,8 +245,13 @@ compile_for_target (struct obstack *argv_obstack)
 static const char *
 generate_target_descr_file (const char *target_compiler)
 {
-  const char *src_filename = make_temp_file ("_target_descr.c");
-  const char *obj_filename = make_temp_file ("_target_descr.o");
+  char *dump_filename = concat (dumppfx, "_target_descr.c", NULL);
+  const char *src_filename = save_temps
+? dump_filename
+: make_temp_file ("_target_descr.c");
+  const char *obj_filename = save_temps
+? concat (dumppfx, "_target_descr.o", NULL)
+: make_temp_file ("_target_descr.o");
   temp_files[num_temps++] = src_filename;
   temp_files[num_temps++] = obj_filename;
   FILE *src_file = fopen (src_filename, "w");
@@ -293,6 +298,12 @@ generate_target_descr_file (const char *target_compiler)
 obstack_ptr_grow (_obstack, "-save-temps");
   if (verbose)
 obstack_ptr_grow (_obstack, "-v");
+  obstack_ptr_grow (_obstack, "-dumpdir");
+  obstack_ptr_grow (_obstack, "");
+  obstack_ptr_grow (_obstack, "-dumpbase");
+  obstack_ptr_grow (_obstack, dump_filename);
+  obstack_ptr_grow (_obstack, "-dumpbase-ext");
+  obstack_ptr_grow (_obstack, ".c");
   obstack_ptr_grow (_obstack, "-c");
   obstack_ptr_grow (_obstack, "-shared");
   obstack_ptr_grow (_obstack, "-fPIC");
@@ -309,8 +320,13 @@ generate_target_descr_file (const char *target_compiler)
 static const char *
 generate_target_offloadend_file (const char *target_compiler)
 {
-  const char *src_filename = make_temp_file ("_target_offloadend.c");
-  const char *obj_filename = make_temp_file ("_target_offloadend.o");
+  char *dump_filename = concat (dumppfx, "_target_offloadend.c", NULL);
+  const char *src_filename = save_temps
+? dmp_filename
+: make_temp_file ("_target_offloadend.c");
+  const char *obj_filename = save_temps
+? concat (dumppfx, "_target_offloadend.o", NULL)
+: make_temp_file ("_target_offloadend.o");
   temp_files[num_temps++] = src_filename;
   temp_files[num_temps++] = obj_filename;
   FILE *src_file = fopen (src_filename, "w");
@@ -335,6 +351,12 @@ generate_target_offloadend_file (const char 
*target_compiler)
 obstack_ptr_grow (_obstack, "-save-temps");
   if (verbose)
 obstack_ptr_grow (_obstack, "-v");
+  obstack_ptr_grow (_obstack, "-dumpdir");
+  obstack_ptr_grow (_obstack, "");
+  obstack_ptr_grow (_obstack, "-dumpbase");
+  obstack_ptr_grow (_obstack, dump_filename);
+  obstack_ptr_grow (_obstack, "-dumpbase-ext");
+  obstack_ptr_grow (_obstack, ".c");
   obstack_ptr_grow (_obstack, "-c");
   obstack_ptr_grow (_obstack, "-shared");
   obstack_ptr_grow (_obstack, "-fPIC");
@@ -350,8 +372,13 @@ generate_target_offloadend_file (const char 
*target_compiler)
 static const char *
 generate_host_descr_file (const char *host_compiler)
 {
-  const char *src_filename = make_temp_file ("_host_descr.c");
-  const char *obj_filename = make_temp_file ("_host_descr.o");
+  char *dump_filename = concat (dumppfx, "_host_descr.c", NULL);
+  const char *src_filename = save_temps
+? dmp_filename
+: make_temp_file ("_host_descr.c");
+  const char *obj_filename = save_temps
+? concat (dumppfx, "_host_descr.o", NULL)
+: make_temp_file ("_host_descr.o");
   temp_files[num_temps++] = src_filename;
   temp_files[num_temps++] = obj_filename;
   FILE *src_file = fopen (src_filename, "w");
@@ -402,6 +429,12 @@ generate_host_descr_file (const char 

Re: drop -aux{dir,base}, revamp -dump{dir,base}

2020-07-13 Thread Alexandre Oliva
Hello, Thomas,

Sorry it took me so long to get back to you.

Thanks for the patches you posted a couple of weeks ago, I'm merging
them into my aux-dump-revamp branch, where I'm accumulating the patches
related with this change.


On Jun 30, 2020, Thomas Schwinge  wrote:

> For example, if there are two 'offload_targets' configured, and you do a:

> PASS: libgomp.c++/scan-offload-1.C scan-offload-tree-dump-times optimized 
> "(?n)^main\\._omp_fn\\.0 " 1
> PASS: libgomp.c++/scan-offload-1.C scan-offload-tree-dump-times optimized 
> "(?n)^main\\._omp_fn\\.0 " 1

> Can we easily get the respective offload target into that, or even full
> 'scoff-format' if that's easier?  I surely could hack up something, but
> can you see an elegant way?

I'm not sure this is elegant, but...  does it work?
Ok to install if it does?  (I haven't tested it at all)


add offload target to testname for pass/fail message

From: Alexandre Oliva 

Offload tests that scan dump files may run multiple times, once per
offload target, but the test result messages do not mention the
offload target, so we may seem to have repeated results.  Fixed by
modifying the test name so that it contains the offload target name.


for  gcc/testsuite/ChangeLog

* lib/scanoffload.exp (scoff-testname, scoff-adjust): New.
(scoff): Call them.
---
 gcc/testsuite/lib/scanoffload.exp |   17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/lib/scanoffload.exp 
b/gcc/testsuite/lib/scanoffload.exp
index 1a26e44..397420a 100644
--- a/gcc/testsuite/lib/scanoffload.exp
+++ b/gcc/testsuite/lib/scanoffload.exp
@@ -22,6 +22,19 @@ proc scoff-format { offtgt suffix } {
 return ".x$offtgt.mkoffload$suffix"
 }
 
+# Adjust an offload dump TESTNAME for offload TARGET.
+proc scoff-testname { target testname } {
+return "$target-$testname"
+}
+
+# Adjust the arglist ARGS, so that argument IDX gets scoff-formatted,
+# and argument 0 (the test name) gets scoff-testnamed.
+proc scoff-adjust { args idx target } {
+set args [lreplace $args $idx $idx "[scoff-format $target [lindex $args 
$idx]"]
+set args [lreplace $args 0 0 "[scoff-testname $target [lindex $args 0]"]
+return $args
+}
+
 # Wrapper for scan procs.
 # Argument 0 is the index of the argument to replace when calling
 # argument 1 with the remaining arguments.  Use end-1 or end or so.
@@ -34,7 +47,7 @@ proc scoff { args } {
 if [info exists offload_target] {
set target $offload_target
if { "$target" != "disable" } {
-   eval $prc [lreplace $args $idx $idx "[scoff-format $target [lindex 
$args $idx]]"]
+   eval $prc [scoff-adjust $args $idx $target]
}
 } else {
global offload_targets
@@ -42,7 +55,7 @@ proc scoff { args } {
# HSA offloading is doing things differently, doesn't use 
'mkoffload'.
if { "$target" == "hsa" } continue
 
-   eval $prc [lreplace $args $idx $idx "[scoff-format $target [lindex 
$args $idx]]"]
+   eval $prc [scoff-adjust $args $idx $target]
}
 }
 }


-- 
Alexandre Oliva, freedom fighterhe/himhttps://FSFLA.org/blogs/lxo/
Free Software Evangelist  Stallman was right, but he's left :(
GNU Toolchain Engineer   Live long and free, and prosper ethically


Re: [PATCH] Fortran : ICE in gfc_check_reshape PR95585

2020-07-13 Thread Jerry DeLisle via Gcc-patches

OK to commit and Backport.

On 6/18/20 1:11 AM, Mark Eggleston wrote:

Please find attached fix for PR95585.

OK to commit and backport?

Commit message:

Fortran  : ICE in gfc_check_reshape PR95585

Issue an error where an array is used before its definition
instead of an ICE.

2020-06-18  Steven G. Kargl  

gcc/fortran/

    PR fortran/PR95585
    *check.c (gfc_check_reshape): Add check for a value when
    the symbol has an attribute flavor FL_PARAMETER.

2020-06-17  Mark Eggleston 

gcc/testsuite/

    PR fortran/95585
    * gfortran.dg/pr95585.f90: New test.






Re: [PATCH] Fortran : ICE in gfc_check_pointer_assign PR95612

2020-07-13 Thread Jerry DeLisle via Gcc-patches

OK to commit and backport.

On 6/30/20 11:12 PM, Mark Eggleston wrote:
Please find attached a patch which is a fix for PR95612.  The original 
patch is by Steve Kargl.


OK to commit and backport?

Commit message:

Fortran  : ICE in gfc_check_pointer_assign PR95612

Output an error if the right hand value is a zero sized array or
does not have a symbol tree otherwise continue checking.

2020-07-01  Steven G. Kargl  

gcc/fortran/

    PR fortran/95612
    * expr.c (gfc_check_pointer_assigb): Output an error if
    rvalue is a zero sized array or output an error if rvalue
    doesn't have a symbol tree.

2020-07-01  Mark Eggleston 

gcc/testsuite/

    PR fortran/95612
    * gfortran.dg/pr95612.f90: New test.





[PING #2][PATCH] separate reading past the end from -Wstringop-overflow

2020-07-13 Thread Martin Sebor via Gcc-patches

Ping: https://gcc.gnu.org/pipermail/gcc-patches/2020-June/548786.html

On 7/7/20 12:56 PM, Martin Sebor wrote:

Ping.  Despite its size, there isn't much new in the patch, it
pretty much just splits an existing warning into two, one for
buffer overflow and another for "overread."

https://gcc.gnu.org/pipermail/gcc-patches/2020-June/548786.html

On 6/23/20 8:05 PM, Martin Sebor wrote:

-Wstringop-overflow is issued for both writing and reading past
the end, even though the latter problem is often viewed as being
lower severity than the former, or at least sufficiently different
to triage separately.  In CWE, for example, each of the two kinds
of problems has its own classification (CWE-787: Out-of-bounds
Write, and CWE-125: Out-of-bounds Read).

To make this easier with GCC, the attached patch introduces
a new option called -Wstringop-overread and splits the current
indiscriminate warning into the respective two categories.  Other
than the new option the improvements also exposed a few instances
of reading past the end that GCC doesn't detect that the new code
does thanks to more consistent checking.

As usual, I tested the patch by building Binutils/GDB, Glibc, and
the Linux kernel with no unexpected (or any, in fact) instances
of the two warnings.

The work involved more churn that I expected, and maintaining
the expected precedence of warnings (missing terminating nul,
excessive bounds, etc.) turned out to be more delicate and time
consuming than I would have liked.  I think the result is cleaner
code, but I'm quite sure it can still stand to be made better.
That's also my plan for the next step when I'd like to move this
checking out of builtins.c and calls.c and into a file of its own.
Ultimately, Jeff and have would like to integrate it into the path
isolation pass.

Martin

PS There's a FIXME comment in expand_builtin_memory_chk as
a reminder of a future change.  It currently has no bearing
on anything.






[PING #2][PATCH] correct memcmp expansion of constant representations containing embedded nuls (PR 95189)

2020-07-13 Thread Martin Sebor via Gcc-patches

Ping: https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549225.html

On 7/7/20 2:02 PM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549225.html

On 6/30/20 6:23 PM, Martin Sebor wrote:

An enhancement to GCC 10 to improve the expansion of strncmp
calls with strings with embedded nuls introduced a regression
in similar calls to memcmp.  A review of the changes that led
up to the regression exposed a number of questionable choices
that likely conspired to cause the bug.

For example, the name of the function with both the strncmp
enhancement as well as the memcmp bug is
inline_expand_builtin_string_cmp().  It's easy to assume that
the function handles calls to strcmp and strncmp but not also
memcmp.

Another similar example is the name of the second c_getstr()
argument -- strlen -- that doesn't actually store the length
of the retrieved string but rather its size in bytes
(including any embedded nuls, but excluding those appended
implicitly to zero out the remainder of an array the string
is stored in, up to the array's size).

Yet another example of a dubious choice is string_constant()
returning the empty string (i.e., STRING_CST with size 1) for
zero initializers of constants of any type (as opposed to one
of the same size as the constant object).

Besides fixing the memcmp bug the attached patch (hopefully)
also rectifies some of the otherwise more or less benign
mistakes that precipitated it, mostly by clarifying comments
and changing misleading names of functions, their arguments,
or local variables.

A happy consequence of the fixes is that they improve codegen
for calls to memcpy with constants whose representation includes
embedded nuls.

Tested on x86_64-linux.

Martin







[GCC 10 PATCH] c++: Treat GNU and Advanced SIMD vectors as distinct [PR95726]

2020-07-13 Thread Richard Sandiford
Jakub Jelinek  writes:
> On Wed, Jul 08, 2020 at 03:10:14PM +0100, Richard Sandiford wrote:
>> gcc/
>>  PR target/95726
>>  * config/aarch64/aarch64.c (aarch64_attribute_table): Add
>>  "Advanced SIMD type".
>>  * config/aarch64/aarch64-builtins.c: Include stringpool.h and
>>  attribs.h.
>>  (aarch64_init_simd_builtin_types): Add an "Advanced SIMD type"
>>  attribute to each Advanced SIMD type.
>> 
>> gcc/cp/
>>  PR target/95726
>>  * typeck.c (structural_comptypes): When comparing template
>>  specializations, differentiate between vectors that have and
>>  do not have an "Advanced SIMD type" attribute.
>> 
>> gcc/testsuite/
>>  PR target/95726
>>  * g++.target/aarch64/pr95726.C: New test.
>> --- a/gcc/cp/typeck.c
>> +++ b/gcc/cp/typeck.c
>> @@ -1429,6 +1429,15 @@ structural_comptypes (tree t1, tree t2, int strict)
>>|| maybe_ne (TYPE_VECTOR_SUBPARTS (t1), TYPE_VECTOR_SUBPARTS (t2))
>>|| !same_type_p (TREE_TYPE (t1), TREE_TYPE (t2)))
>>  return false;
>
> I'd at least add an explaining comment that it is a hack for GCC 8-10 only,
> for aarch64 and arm targets, why, reference to the PR and that it is solved
> differently for GCC 11+.

OK, done below.  This version also includes arm support.

>> +  if (comparing_specializations)
>> +{
>> +  bool asimd1 = lookup_attribute ("Advanced SIMD type",
>> +  TYPE_ATTRIBUTES (t1));
>> +  bool asimd2 = lookup_attribute ("Advanced SIMD type",
>> +  TYPE_ATTRIBUTES (t2));
>> +  if (asimd1 != asimd2)
>> +return false;
>> +}
>
> Otherwise LGTM for release branches if it is acceptable that way to Jason.

Thanks.  Jason, does it look OK from your point of view?

Richard


This is a release branch version of
r11-1741-g:31427b974ed7b7dd54e28fec595e731bf6eea8ba and
r11-2022-g:efe99cca78215e339ba79f0a900a896b4c0a3d36.

The trunk versions of the patch made GNU and Advanced SIMD vectors
distinct (but inter-convertible) in all cases.  However, the
traditional behaviour is that the types are distinct in template
arguments but not otherwise.

Following a suggestion from Jason, this patch puts the check
for different vector types under comparing_specializations.
In order to keep the backport as simple as possible, the patch
hard-codes the name of the attribute in the frontend rather than
adding a new branch-only target hook.

I didn't find a test that tripped the assert on the branch,
even with the --param in the PR, so instead I tested this by
forcing the hash function to only hash the tree code.  That made
the static assertion in the test fail without the patch but pass
with it.

This means that the tests pass for unmodified sources even
without the patch (unless you're very unlucky).

gcc/
PR target/95726
* config/aarch64/aarch64.c (aarch64_attribute_table): Add
"Advanced SIMD type".
* config/aarch64/aarch64-builtins.c: Include stringpool.h and
attribs.h.
(aarch64_init_simd_builtin_types): Add an "Advanced SIMD type"
attribute to each Advanced SIMD type.
* config/arm/arm.c (arm_attribute_table): Add "Advanced SIMD type".
* config/arm/arm-builtins.c: Include stringpool.h and attribs.h.
(arm_init_simd_builtin_types): Add an "Advanced SIMD type"
attribute to each Advanced SIMD type.

gcc/cp/
PR target/95726
* typeck.c (structural_comptypes): When comparing template
specializations, differentiate between vectors that have and
do not have an "Advanced SIMD type" attribute.

gcc/testsuite/
PR target/95726
* g++.target/aarch64/pr95726.C: New test.
* g++.target/arm/pr95726.C: Likewise.
---
 gcc/config/aarch64/aarch64-builtins.c  | 14 +---
 gcc/config/aarch64/aarch64.c   |  1 +
 gcc/config/arm/arm-builtins.c  | 15 ++--
 gcc/config/arm/arm.c   |  1 +
 gcc/cp/typeck.c| 42 ++
 gcc/testsuite/g++.target/aarch64/pr95726.C | 28 +++
 gcc/testsuite/g++.target/arm/pr95726.C | 31 
 7 files changed, 125 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/pr95726.C
 create mode 100644 gcc/testsuite/g++.target/arm/pr95726.C

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 5f8f8290f0f..6ebf6319efd 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -1429,6 +1429,48 @@ structural_comptypes (tree t1, tree t2, int strict)
  || maybe_ne (TYPE_VECTOR_SUBPARTS (t1), TYPE_VECTOR_SUBPARTS (t2))
  || !same_type_p (TREE_TYPE (t1), TREE_TYPE (t2)))
return false;
+
+  /* This hack is specific to release branches and fixes PR95726 for
+arm and aarch64 targets.  The idea is to treat GNU and Advanced
+SIMD types as distinct types for template specialization, but to
+treat them as 

[PATCH] rs6000: clean up testsuite power10_hw check

2020-07-13 Thread Aaron Sawdey via Gcc-patches
Because the check for power10_hw is not called
check_effective_target_power10_hw, it needs to be looked
for by is-effective-target-keyword. Also reorder things
in is-effective-target to put power10_hw with the other
ppc stuff.

These little fixes for power10 dejagnu support were pre-approved
for trunk and 10 by Segher. Posting before pushing.

  Aaron

gcc/testsuite/

* lib/target-supports.exp (is-effective-target):
Reorder to put powerpc stuff together.
(is-effective-target-keyword): Add power10_hw.
---
 gcc/testsuite/lib/target-supports.exp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 2e4c696fdd1..57eed3012b9 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7851,6 +7851,7 @@ proc is-effective-target { arg } {
  "p8vector_hw"{ set selected [check_p8vector_hw_available] }
  "p9vector_hw"{ set selected [check_p9vector_hw_available] }
  "p9modulo_hw"{ set selected [check_p9modulo_hw_available] }
+ "power10_hw" { set selected [check_power10_hw_available] }
  "ppc_float128_sw" { set selected [check_ppc_float128_sw_available] }
  "ppc_float128_hw" { set selected [check_ppc_float128_hw_available] }
  "ppc_recip_hw"   { set selected [check_ppc_recip_hw_available] }
@@ -7861,7 +7862,6 @@ proc is-effective-target { arg } {
  "named_sections" { set selected [check_named_sections_available] }
  "gc_sections"{ set selected [check_gc_sections_available] }
  "cxa_atexit" { set selected [check_cxa_atexit_available] }
- "power10_hw" { set selected [check_power10_hw_available] }
  default  { error "unknown effective target keyword `$arg'" }
}
 }
@@ -7883,6 +7883,7 @@ proc is-effective-target-keyword { arg } {
  "p8vector_hw"{ return 1 }
  "p9vector_hw"{ return 1 }
  "p9modulo_hw"{ return 1 }
+ "power10_hw" { return 1 }
  "ppc_float128_sw" { return 1 }
  "ppc_float128_hw" { return 1 }
  "ppc_recip_hw"   { return 1 }
-- 
2.17.1



[PATCH] c++: constraints and explicit instantiation [PR96164]

2020-07-13 Thread Patrick Palka via Gcc-patches
When considering to instantiate a member of a class template as part of an
explicit instantiation of the class template, we need to first check the
member's constraints before proceeding with the instantiation of the member.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK to commit
to trunk (and perhaps to the 10 branch)?

gcc/cp/ChangeLog:

PR c++/96164
* pt.c (do_type_instantiation): Update a paragraph taken from
[temp.explicit] to reflect the latest specification.  Don't
instantiate a member with unsatisfied constraints.

gcc/testsuite/ChangeLog:

PR c++/96164
* g++.dg/cpp2a/concepts-explicit-inst5.C: New test.
---
 gcc/cp/pt.c   | 23 +--
 .../g++.dg/cpp2a/concepts-explicit-inst5.C| 14 +++
 2 files changed, 25 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst5.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 61f22733858..c518f221f2f 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -24902,23 +24902,22 @@ do_type_instantiation (tree t, tree storage, 
tsubst_flags_t complain)
 
 [temp.explicit]
 
-The explicit instantiation of a class template specialization
-implies the instantiation of all of its members not
-previously explicitly specialized in the translation unit
-containing the explicit instantiation.
-
- Of course, we can't instantiate member template classes, since we
- don't have any arguments for them.  Note that the standard is
- unclear on whether the instantiation of the members are
- *explicit* instantiations or not.  However, the most natural
- interpretation is that it should be an explicit
- instantiation.  */
+An explicit instantiation that names a class template
+specialization is also an explicit instantiation of the same
+kind (declaration or definition) of each of its members (not
+including members inherited from base classes and members
+that are templates) that has not been previously explicitly
+specialized in the translation unit containing the explicit
+instantiation, provided that the associated constraints, if
+any, of that member are satisfied by the template arguments
+of the explicit instantiation.  */
   for (tree fld = TYPE_FIELDS (t); fld; fld = DECL_CHAIN (fld))
 if ((VAR_P (fld)
 || (TREE_CODE (fld) == FUNCTION_DECL
 && !static_p
 && user_provided_p (fld)))
-   && DECL_TEMPLATE_INSTANTIATION (fld))
+   && DECL_TEMPLATE_INSTANTIATION (fld)
+   && (!flag_concepts || constraints_satisfied_p (fld)))
   {
mark_decl_instantiated (fld, extern_p);
if (! extern_p)
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst5.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst5.C
new file mode 100644
index 000..05959a8972c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst5.C
@@ -0,0 +1,14 @@
+// PR c++/96164
+// { dg-do compile { target concepts } }
+
+template 
+struct A {
+void f() requires (N == 3) { static_assert(N == 3); }
+};
+template struct A<2>;
+
+template 
+struct B {
+void f() requires (N == 2) { static_assert(N == 3); } // { dg-error 
"assert" }
+};
+template struct B<2>;
-- 
2.28.0.rc0



Re: [PATCH] diagnostics: Add options to control the column units [PR49973] [PR86904]

2020-07-13 Thread Lewis Hyatt via Gcc-patches
On Mon, Jul 13, 2020 at 03:04:20PM -0400, David Malcolm wrote:
> > +@item -fdiagnostics-column-unit=@var{UNIT}
> > +@opindex fdiagnostics-column-unit
> > +Select the units for the column number.  This affects traditional 
> > diagnostics
> > +(in the absence of @option{-fno-show-column}), as well as JSON format
> > +diagnostics if requested.
> > +
> > +The default @var{UNIT}, @samp{display}, considers the number of display
> > +columns occupied by each character.  This may be larger than the number
> > +of bytes required to encode the character, in the case of tab
> > +characters, or it may be smaller, in the case of multibyte characters.
> > +For example, the character ``@U{03C0}'' occupies one display column,
> > +and its UTF-8 encoding requires two bytes; the character ``@U{1F642}''
> > +occupies two display columns, and its UTF-8 encoding requires four
> > +bytes.
> 
> Thanks for reworking this.
> 
> I'm wary of those @U commands - does the generated HTML from "make
> html" work? (similarly for the man page).  Would it be safer to express
> them in the following form?
> 
>  For example, the character ``GREEK SMALL LETTER PI (U+03C0)'' occupies one
>  display column, and its UTF-8 encoding requires two bytes; the character
>  ``SLIGHTLY SMILING FACE' (U+1F642)'' occupies two display columns, and
>  its UTF-8 encoding requires four bytes.
> 
> or somesuch?

The HTML works, yes, but I hadn't thought to check the man page. Seems the
@U notation is carried through unmodified there, which isn't so clear. I
changed it to your suggestion, no need to overcomplicate it.

> 
> > +Setting @var{UNIT} to @samp{byte} changes the column number to the raw byte
> > +count in all cases, as was traditionally output by GCC prior to version 
> > 11.1.0.
> 
> [...snip...]
> 
> >  @item -fdiagnostics-format=@var{FORMAT}
> >  @opindex fdiagnostics-format
> >  Select a different format for printing diagnostics.
> > @@ -4764,11 +4791,15 @@ might be printed in JSON form (after formatting) 
> > like this:
> >  "locations": [
> >  @{
> >  "caret": @{
> > +   "display-column": 3,
> > +   "byte-column": 3,
> >  "column": 3,
> >  "file": "misleading-indentation.c",
> >  "line": 15
> >  @},
> >  "finish": @{
> > +   "display-column": 4,
> > +   "byte-column": 4,
> >  "column": 4,
> >  "file": "misleading-indentation.c",
> >  "line": 15
> 
> Nit: the new fields don't line up with the old ones.
> 
> > @@ -4784,6 +4815,8 @@ might be printed in JSON form (after formatting) like 
> > this:
> >  "locations": [
> >  @{
> >  "caret": @{
> > +   "display-column": 5,
> > +   "byte-column": 5,
> >  "column": 5,
> >  "file": "misleading-indentation.c",
> >  "line": 17
> 
> Likewise.

You are referring to the source code as opposed to the generated HTML,
correct? Both look fine to me, I think above effect is due to mixed
tabs+spaces convention in the git diff ironically :). I'll make sure it
looks right in any case.

> 
> [...snip...]
> 
> > diff --git a/gcc/opts.c b/gcc/opts.c
> > index 340d99434b3..525f44d079f 100644
> > --- a/gcc/opts.c
> > +++ b/gcc/opts.c
> > @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "opt-suggestions.h"
> >  #include "diagnostic-color.h"
> >  #include "selftest.h"
> > +#include "cpplib.h"
> 
> Is this new #include still needed?
> 
> [...snip...]
> 
> 
> OK for trunk with those nits fixed.
> 
> Dave
> 
>

Thanks again for your time! I will address the above and then push in a day or 
two.

-Lewis


Re: [PATCH v3] c++: Make convert_like complain about bad ck_ref_bind again [PR95789]

2020-07-13 Thread Marek Polacek via Gcc-patches
On Sat, Jul 11, 2020 at 10:32:55AM -0400, Jason Merrill via Gcc-patches wrote:
> On 7/8/20 4:35 PM, Marek Polacek wrote:
> > On Fri, Jul 03, 2020 at 05:24:34PM -0400, Jason Merrill via Gcc-patches 
> > wrote:
> > > On 6/22/20 10:09 PM, Marek Polacek wrote:
> > > > convert_like issues errors about bad_p conversions at the beginning
> > > > of the function, but in the ck_ref_bind case, it only issues them
> > > > after we've called convert_like on the next conversion.
> > > > 
> > > > This doesn't work as expected since r10-7096 because when we see
> > > > a conversion from/to class type in a template, we return early, thereby
> > > > missing the error, and a bad_p conversion goes by undetected.  That
> > > > made the attached test to compile even though it should not.
> > > 
> > > Hmm, why isn't there an error at instantiation time?
> > 
> > We threw away the result: we're called from
> > 
> > 12213   if (complain & tf_error)
> > 12214 {
> > 12215   if (conv)
> > 12216 convert_like (conv, expr, complain);
> > ...
> > 12228   return error_mark_node;
> > 
> > and convert_like never saw converting this->f to B& again when 
> > instantiating.
> > 
> > > Though giving an error at template parsing time is definitely preferable.
> > 
> > Yup.
> > 
> > > > I had thought that I could just move the ck_ref_bind/bad_p errors
> > > > above to the rest of them, but that regressed diagnostics because
> > > > expr then wasn't converted yet by the nested convert_like_real call.
> > > 
> > > Yeah, the early section is really just for scalar conversions.
> > > 
> > > It would probably be good to do normal processing for all other bad
> > > conversions and only afterward build the IMPLICIT_CONV_EXPR if we aren't
> > > returning error_mark_node.
> > 
> > Ok, so that if we add more bad_p errors, we won't run into this again.
> > 
> > Unfortunately it's a bit ugly.  I could introduce a RETURN macro to
> > use RETURN (expr); instead of what I have now, but it wouldn't be simply
> > "conv_expr ? conv_expr : expr", because if we have error_mark_node, we
> > want to return that, not conv_expr.  Does that seem worth it?
> > (I wish I could at least use the op0 ?: op1 GNU extension.)
> 
> Or introduce a wrapper function that handles IMPLICIT_CONV_EXPR?

Perhaps like this?  convert_like_real_1 is unsightly and I have 
plans to fix that (and get rid of the convert_like macro), but
that's a clean-up, not a bug fix so it's not included in this patch.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/10?

-- >8 --
convert_like issues errors about bad_p conversions at the beginning
of the function, but in the ck_ref_bind case, it only issues them
after we've called convert_like on the next conversion.

This doesn't work as expected since r10-7096 because when we see
a conversion from/to class type in a template, we return early, thereby
missing the error, and a bad_p conversion goes by undetected.  That
made the attached test to compile even though it should not.

I had thought that I could just move the ck_ref_bind/bad_p errors
above to the rest of them, but that regressed diagnostics because
expr then wasn't converted yet by the nested convert_like_real call.

So, for bad_p conversions, do the normal processing, but still return
the IMPLICIT_CONV_EXPR to avoid introducing trees that the template
processing can't handle well.  This I achieved by adding a wrapper
function.

gcc/cp/ChangeLog:

PR c++/95789
PR c++/96104
PR c++/96179
* call.c (convert_like_real_1): Renamed from convert_like_real.
(convert_like_real): New wrapper for convert_like_real_1.

gcc/testsuite/ChangeLog:

PR c++/95789
PR c++/96104
PR c++/96179
* g++.dg/conversion/ref4.C: New test.
* g++.dg/conversion/ref5.C: New test.
* g++.dg/conversion/ref6.C: New test.
---
 gcc/cp/call.c  | 54 ++
 gcc/testsuite/g++.dg/conversion/ref4.C | 22 +++
 gcc/testsuite/g++.dg/conversion/ref5.C | 14 +++
 gcc/testsuite/g++.dg/conversion/ref6.C | 24 
 4 files changed, 98 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/conversion/ref4.C
 create mode 100644 gcc/testsuite/g++.dg/conversion/ref5.C
 create mode 100644 gcc/testsuite/g++.dg/conversion/ref6.C

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 5341a572980..6d5d5e801a5 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -171,6 +171,8 @@ static tree build_over_call (struct z_candidate *, int, 
tsubst_flags_t);
 /*c_cast_p=*/false, (COMPLAIN))
 static tree convert_like_real (conversion *, tree, tree, int, bool,
   bool, tsubst_flags_t);
+static tree convert_like_real_1 (conversion *, tree, tree, int, bool,
+bool, tsubst_flags_t);
 static void op_error (const op_location_t &, enum tree_code, enum tree_code,
  tree, 

testsuite: scan-lang-dump-times & scan-lang-dump-not

2020-07-13 Thread Nathan Sidwell


Sigh, last week's success at not breaking things failed with an
incorrect 'fix' this morning.  Let's reduce my confusion by making
lib/scanlang.exp the same on trunk as modules.

gcc/testsuite/
* lib/scanlang.exp (scan-lang-dump): Fix breakage.
(scan-lang-dump-times, scan-lang-dump-not): New.

nathan
--
Nathan Sidwell
diff --git c/gcc/testsuite/lib/scanlang.exp w/gcc/testsuite/lib/scanlang.exp
index d9f8c1e6e83..b85ec681666 100644
--- c/gcc/testsuite/lib/scanlang.exp
+++ w/gcc/testsuite/lib/scanlang.exp
@@ -35,6 +35,56 @@ proc scan-lang-dump { args } {
 	error "scan-lang-dump: too many arguments"
 	return
 }
+if { [llength $args] >= 3 } {
+	scan-dump "lang" [lindex $args 0] \
+		  "\[0-9\]\[0-9\]\[0-9\]l.[lindex $args 1]" "" [lindex $args 2]
+} else {
+	scan-dump "lang" [lindex $args 0] \
+		  "\[0-9\]\[0-9\]\[0-9\]l.[lindex $args 1]" ""
+}
+}
+
+# Call pass if pattern is present given number of times, otherwise fail.
+# Argument 0 is the regexp to match
+# Argument 1 is number of times the regexp must be found
+# Argument 2 is the name of the dumped tree pass
+# Argument 3 handles expected failures and the like
+proc scan-lang-dump-times { args } {
+
+if { [llength $args] < 3 } {
+	error "scan-lang-dump-times: too few arguments"
+	return
+}
+if { [llength $args] > 4 } {
+	error "scan-lang-dump-times: too many arguments"
+	return
+}
+if { [llength $args] >= 4 } {
+	scan-dump-times "lang" [lindex $args 0] [lindex $args 1] \
+			"\[0-9\]\[0-9\]\[0-9\]l.[lindex $args 2]" "" \
+			[lindex $args 3]
+} else {
+	scan-dump-times "lang" [lindex $args 0] [lindex $args 1] \
+			"\[0-9\]\[0-9\]\[0-9\]l.[lindex $args 2]" ""
+}
+}
+
+# Utility for scanning compiler result, invoked via dg-final.
+# Call pass if pattern is not present, otherwise fail.
+#
+# Argument 0 is the regexp to match
+# Argument 1 is the name of the dumped lang pass
+# Argument 2 handles expected failures and the like
+proc scan-lang-dump-not { args } {
+
+if { [llength $args] < 2 } {
+	error "scan-lang-dump-not: too few arguments"
+	return
+}
+if { [llength $args] > 3 } {
+	error "scan-lang-dump-not: too many arguments"
+	return
+}
 if { [llength $args] >= 3 } {
 	scan-dump-not "lang" [lindex $args 0] \
 	"\[0-9\]\[0-9\]\[0-9\]l.[lindex $args 1]" "" \


makefile cleanup

2020-07-13 Thread Nathan Sidwell
I discovered we were deleting cxxmain.c, which we generated at some 
point in time.  Not anymore.


nathan
--
Nathan Sidwell
diff --git c/gcc/Makefile.in w/gcc/Makefile.in
index d5dcc03d59b..2ba76656dbf 100644
--- c/gcc/Makefile.in
+++ w/gcc/Makefile.in
@@ -3501,7 +3501,6 @@ distclean: clean lang.distclean
 	-cd testsuite && rm -f x *.x *.x? *.exe *.rpo *.o *.s *.S *.c
 	-cd testsuite && rm -f *.out *.gcov *$(coverageexts)
 	-rm -rf ${QMTEST_DIR} stamp-qmtest
-	-rm -f cxxmain.c
 	-rm -f .gdbinit configargs.h
 	-rm -f gcov.pod
 # Delete po/*.gmo only if we are not building in the source directory.
diff --git c/gcc/cp/Make-lang.in w/gcc/cp/Make-lang.in
index 7896591dd4b..6ee4e41266f 100644
--- c/gcc/cp/Make-lang.in
+++ w/gcc/cp/Make-lang.in
@@ -287,7 +287,6 @@ c++.mostlyclean:
 c++.clean:
 c++.distclean:
 	-rm -f cp/config.status cp/Makefile
-	-rm -f cxxmain.c
 c++.maintainer-clean:
 #
 # Stage hooks:


Re: [PATCH] diagnostics: Add options to control the column units [PR49973] [PR86904]

2020-07-13 Thread David Malcolm via Gcc-patches
> On Wed, Jun 10, 2020 at 12:11:00PM -0400, David Malcolm wrote:
> > Thanks for the patch; sorry about the delay in reviewing it.
> > 
> > Some high-level review points
> > 
> > - I like the patch overall
> > 
> > - This will deserve an item in the release notes
> > 
> > - I don't like adding "global_tabstop" (I don't like global
> > variables).  Is there nowhere else we can handle this? I believe
> > there's a cluster of functions in the callgraph that make use of
> > it; can we simply pass around the tabstop value instead?  "tabstop"
> > seems to have several meanings.  If I'm reading the patch correctly
> >   * "tabstop > 0" means to expand tabs so that column numbers are a
> > multiple of tabstop
> >   * "tabstop == 0" means "don't expand tabs"
> >   * "tabstop < 0" in some places means: use the global_tabstop
value
> > Is it possible to eliminate global_tabstop value?  Or is there some
> > deep reason I'm missing?
> > 
> > I'll do a more thorough review once that's addressed/resolved
(since
> > eliminating global_tabstop might touch a few places).
> >
> 
> Thanks for the feedback! The attached updated patch addresses these
> concerns. Regarding tabstop, I have removed the new static variable
> global_tabstop in charset.c. FWIW, the usage of "tabstop" arguments
in the
> various new APIs did previously work a bit more consistently than you
> described. In all cases "tabstop <= 0" meant to use the default
value,
> otherwise it specified the tabstop to use (with tabstop=1 naturally
> restoring the old behavior of changing tabs to a single space). In
order
> for libcpp to provide this feature (callers can pass tabstop <= 0 to
get a
> default, and the default can in turn by configured when processing
the
> -ftabstop option), it does need to remember the default, and this has
to
> be a file-level static variable because the routines need to work
> independent of any cpp_reader instance. (Some frontends don't use
> libcpp to read their input, for instance.) Anyway, I see the point
that
> this file-level static, being accessible with cpp_set_tabstop() and
> cpp_get_tabstop(), is effectively just a global variable, so I have
> removed this feature, which just means that all callers need to pass
the
> tabstop they want to use. I am now rather using the
diagnostic_context
> object to remember the value passed to -ftabstop. The only place this
> involves global variables is now in c-family/c-indentation.c, where
if I
> understood correctly, the only diagnostic_context available is
global_dc,
> so I am getting the tabstop value from there. Please let me know if
> there's a better way to handle that? Prior to my patch, the tabstop
was
> obtained from a different global variable (extern cpp_options
*cpp_opts),
> so at least conservation of total globals is maintained. :)

Thanks.  That sounds like a good approach.


> Compared to the previous version, this one is a bit longer, since 25
or
> so call sites had to be modified to know the value of -ftabstop. Most
of
> the churn is in diagnostic-show-locus.c, because there are a fair
number of
> static helper functions and helper classes there, which just needed
to
> receive the diagnostic_context object from their callers. I could
> have made this simpler by letting the tabstop argument default to
> something like 8 in all functions that require it... this would
remove the
> need to pass it in all the selftests that are indifferent to it. I
figured
> it would be better to force this argument to be passed, though, or
else in
> the future it may be easy to forget to pass it where it is needed. 

Thanks.

> > Thanks for adding docs; some nits on them:
> > 
> > > --- a/gcc/doc/invoke.texi
> > > +++ b/gcc/doc/invoke.texi
> > 
> > [...snip...]
> > 
> > > +@item -fdiagnostics-column-unit=@var{UNIT}
> > > +@opindex fdiagnostics-column-unit
> > > +Select the units for the column number.  This affects
traditional diagnostics
> > > +(in the absence of @option{-fno-show-column}), as well as JSON
format
> > > +diagnostics if requested.
> > > +
> > > +The default @var{UNIT}, @samp{display}, considers the number of
display columns
> > > +occupied by each character.  This may be larger than the number
of bytes
> > > +occupied, in the case of tab characters, or it may be smaller,
in the case of
> > > +multibyte characters.  For example, the UTF-8 character ``@U{03C
0}'' occupies
> > > +two bytes and one display column, while the character ``@U{1F642
}'' occupies
> > > +four bytes and two display columns.
> > 
> > This is imprecise.  A unicode code point occupies some number of
display columns,
> > and its *UTF-8 encoding* occupies some number of bytes.
> > 
> > [and my inner pedant is now thinking: what about combining
diacritics? 
> > But I don't think we can ever issue a diagnostic on a diacritic; I
> > *think* we only ever care about the per-glyph level]
> > 
> > > +Setting @var{UNIT} to @samp{byte} changes the column number to
the
> > raw byte
> > > +count in all cases, as was 

Re: [PATCH 4/4] doc: Clarify __builtin_return_address [PR94891]

2020-07-13 Thread Szabolcs Nagy
The 07/13/2020 12:24, Richard Sandiford wrote:
> Szabolcs Nagy  writes:
> > The expected semantics and valid usage of __builtin_return_address is
> > not clear since it exposes implementation internals that are normally
> > not meaningful to portable c code.
> >
> > This documentation change tries to clarify the semantics in case the
> > return address is stored in a mangled form. This affects AArch64 when
> > pointer authentication is used for the return address signing (i.e.
> > -mbranch-protection=pac-ret).
> >
> > gcc/ChangeLog:
> >
> > 2020-06-04  Szabolcs Nagy  
> >
> > * doc/extend.texi: Update the text for  __builtin_return_address.
> 
> LGTM apart from minor nits:
...
> OK for trunk and branches with those changes.

thanks, committed with those changes
and backported to gcc-10 now.

(i will backport to gcc-9 and gcc-8
a bit later.)


Re: [PATCH] c++: Fix tentative parsing of enum-specifier [PR96077]

2020-07-13 Thread Marek Polacek via Gcc-patches
On Mon, Jul 13, 2020 at 12:18:03PM -0400, Nathan Sidwell wrote:
> On 7/13/20 11:15 AM, Marek Polacek wrote:
> > On Mon, Jul 13, 2020 at 10:08:52AM -0400, Nathan Sidwell wrote:
> > > On 7/10/20 11:43 AM, Marek Polacek via Gcc-patches wrote:
> > > > Here's an interesting issue: in this code a ) is missing:
> > > > 
> > > > enum { E = (2 } e;
> > > > 
> > > > but we compile the code anyway, and E is set to 0 in build_enumerator,
> > > > which is sneaky.
> > > > 
> > > > The problem is that cp_parser_enum_specifier parses tentatively, because
> > > > when we see the enum keyword, we don't know yet if we'll find an
> > > > enum-specifier, opaque-enum-declaration, or elaborated-enum-specifier.
> > > > 
> > > > In this test when we call cp_parser_enumerator_list we're still parsing
> > > > tentatively, and as a consequence, parens.require_close (parser) in
> > > > cp_parser_primary_expression doesn't report any errors.  But we only go
> > > > on to parse the enumerator-list after we've seen a {, at which point we
> > > > might as well commit -- we know we're dealing with an enum-specifier.
> > > > 
> > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > 
> > > ok, (with the typo Jakub noticed fixed :)
> > 
> > Thanks.
> > 
> > > does this also fix 95288?
> > 
> > Hmm, in a way: instead of
> > 
> > 95288.C:3:3: error: expected primary-expression before ‘enum’
> >  3 |   enum X
> >|   ^~~~
> > 
> > with this patch we say
> > 
> > 95288.C:5:8: error: expected ‘}’ before ‘.’ token
> >  5 |   a. // DOT
> >|^
> > 95288.C:4:5: note: to match this ‘{’
> >  4 | {
> >| ^
> > 
> > but the rest of the output is the same.
> > 
> > Whether that means that 95288 is fixed, I'm not too sure.  But since you 
> > opened
> > it, if you're happy with the diagnostic with this patch, I'll add the test 
> > and
> > close 95288.
> 
> Yes, that's the fix I'd expect.  I'm sorry the report didn't make it clear
> that the function-scope diagnostic is the poor one.

No worries.  I've added the test and closed the PR.

Marek



[committed] c++: Add test [PR95288]

2020-07-13 Thread Marek Polacek via Gcc-patches
Somewhat improved by r11-2064, though we still generate junk that seems
redundant.  But at least it says
error: expected ‘}’ before ‘.’ token

PR c++/95288
* g++.dg/diagnostic/enum2.C: New test.
---
 gcc/testsuite/g++.dg/diagnostic/enum2.C | 10 ++
 1 file changed, 10 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/diagnostic/enum2.C

diff --git a/gcc/testsuite/g++.dg/diagnostic/enum2.C 
b/gcc/testsuite/g++.dg/diagnostic/enum2.C
new file mode 100644
index 000..e6e8320e3b6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/diagnostic/enum2.C
@@ -0,0 +1,10 @@
+// PR c++/95288
+
+void f()
+{
+  enum X 
+{ // { dg-message "to match this" }
+  a. // { dg-error "expected" }
+  b
+}; // { dg-error "extra" "" { target c++98_only } }
+} // { dg-error "expected" }

base-commit: 776e48e0931db69f158f40e5cb8e15463d879a42
-- 
2.26.2



Re: [PATCH 1/2] x86: Pass a copy of the string length to cmpstrnqi

2020-07-13 Thread Jeff Law via Gcc-patches
On Sun, 2020-05-31 at 16:10 -0700, H.J. Lu via Gcc-patches wrote:
> cmpstrnsi expander may pass the actual string length directly to cmpstrnqi
> patterns.  For cmpstrnsi, one of the strings must be a constant and
> expand_builtin_strncmp rewrites the length argument to be the minimum of
> the const string length and the actual string length.  But it is not the
> case for cmpmemsi.  Pass a copy of the string length to cmpstrnqi patterns
> to avoid changing the actual string length by cmpstrnqi patterns.
> 
> gcc/
> 
>   PR target/95443
>   * config/i386/i386.md (cmpstrnsi): Pass a copy of the string
>   length to cmpstrnqi patterns.
> 
> gcc/testsuite/
> 
>   PR target/95443
>   * gcc.target/i386/pr95443-1.c: New test.
>   * gcc.target/i386/pr95443-2.c: Likewise.
OK
jeff
> 



Re: [PATCH] c++: Fix tentative parsing of enum-specifier [PR96077]

2020-07-13 Thread Nathan Sidwell

On 7/13/20 11:15 AM, Marek Polacek wrote:

On Mon, Jul 13, 2020 at 10:08:52AM -0400, Nathan Sidwell wrote:

On 7/10/20 11:43 AM, Marek Polacek via Gcc-patches wrote:

Here's an interesting issue: in this code a ) is missing:

enum { E = (2 } e;

but we compile the code anyway, and E is set to 0 in build_enumerator,
which is sneaky.

The problem is that cp_parser_enum_specifier parses tentatively, because
when we see the enum keyword, we don't know yet if we'll find an
enum-specifier, opaque-enum-declaration, or elaborated-enum-specifier.

In this test when we call cp_parser_enumerator_list we're still parsing
tentatively, and as a consequence, parens.require_close (parser) in
cp_parser_primary_expression doesn't report any errors.  But we only go
on to parse the enumerator-list after we've seen a {, at which point we
might as well commit -- we know we're dealing with an enum-specifier.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


ok, (with the typo Jakub noticed fixed :)


Thanks.


does this also fix 95288?


Hmm, in a way: instead of

95288.C:3:3: error: expected primary-expression before ‘enum’
 3 |   enum X
   |   ^~~~

with this patch we say

95288.C:5:8: error: expected ‘}’ before ‘.’ token
 5 |   a. // DOT
   |^
95288.C:4:5: note: to match this ‘{’
 4 | {
   | ^

but the rest of the output is the same.

Whether that means that 95288 is fixed, I'm not too sure.  But since you opened
it, if you're happy with the diagnostic with this patch, I'll add the test and
close 95288.


Yes, that's the fix I'd expect.  I'm sorry the report didn't make it 
clear that the function-scope diagnostic is the poor one.


nathan
--
Nathan Sidwell


[PATCH] x86: Rename VF_AVX512VL_VF1_128_256 to VF1_AVX512ER_128_256

2020-07-13 Thread H.J. Lu via Gcc-patches
On Thu, Jul 9, 2020 at 6:35 AM H.J. Lu  wrote:
>
> On Thu, Jul 9, 2020 at 5:04 AM Kirill Yukhin  wrote:
> >
> > On 07 июл 09:06, H.J. Lu wrote:
> > > On Tue, Jul 7, 2020 at 8:56 AM Kirill Yukhin  
> > > wrote:
> > > >
> > > > Hello HJ,
> > > >
> > > > On 28 июн 07:19, H.J. Lu via Gcc-patches wrote:
> > > > > Enable FMA in rsqrt2 expander and fold rsqrtv16sf2 expander into
> > > > > rsqrt2 expander which expands to UNSPEC_RSQRT28 for 
> > > > > TARGET_AVX512ER.
> > > > > Although it doesn't show performance change in our workloads, FMA can
> > > > > improve other workloads.
> > > > >
> > > > > gcc/
> > > > >
> > > > >   PR target/88713
> > > > >   * config/i386/i386-expand.c (ix86_emit_swsqrtsf): Enable FMA.
> > > > >   * config/i386/sse.md (VF_AVX512VL_VF1_128_256): New.
> > > > >   (rsqrt2): Replace VF1_128_256 with 
> > > > > VF_AVX512VL_VF1_128_256.
> > > > >   (rsqrtv16sf2): Removed.
> > > > >
> > > > > gcc/testsuite/
> > > > >
> > > > >   PR target/88713
> > > > >   * gcc.target/i386/pr88713-1.c: New test.
> > > > >   * gcc.target/i386/pr88713-2.c: Likewise.
> > > >
> > > > So, you've introduced new rsqrt expanders for DF vectors and relaxed
> > > > condition for V16SF. What I didn't get is why did you change unspec
> > > > type from RSQRT to RSQRT28 for V16SF expander?
> > > >
> > >
> > > UNSPEC in define_expand is meaningless when the pattern is fully
> > > expanded by ix86_emit_swsqrtsf.  I believe that UNSPEC in rsqrt2
> > > expander can be removed.
> >
> > Agree.
>
> I will leave UNSPEC alone here.
>
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr88713-1.c
> > @@ -0,0 +1,13 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -Ofast -mno-avx512f -mfma" } */
> >
> > I gues -O2 is useless here (and in -2.c test).
>
> Fixed.
>
> > Othwerwise LGTM.
> >
>
> This is the patch I am checking in.
>

Since ix86_emit_swsqrtsf shouldn't be called with DF vector modes, rename
VF_AVX512VL_VF1_128_256 to VF1_AVX512ER_128_256 and drop DF vector modes.

-- 
H.J.
From 9a984a4ecc96cfa53b203be065f365ca7b1f3bf0 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Mon, 13 Jul 2020 09:07:00 -0700
Subject: [PATCH] x86: Rename VF_AVX512VL_VF1_128_256 to VF1_AVX512ER_128_256

Since ix86_emit_swsqrtsf shouldn't be called with DF vector modes, rename
VF_AVX512VL_VF1_128_256 to VF1_AVX512ER_128_256 and drop DF vector modes.

gcc/

	PR target/96186
	PR target/88713
	* config/i386/sse.md (VF_AVX512VL_VF1_128_256): Renamed to ...
	(VF1_AVX512ER_128_256): This.  Drop DF vector modes.
	(rsqrt2): Replace VF_AVX512VL_VF1_128_256 with
	VF1_AVX512ER_128_256.

gcc/testsuite/

	PR target/96186
	PR target/88713
	* gcc.target/i386/pr88713-3.c: New test.
---
 gcc/config/i386/sse.md| 14 ++
 gcc/testsuite/gcc.target/i386/pr88713-3.c | 17 +
 2 files changed, 23 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88713-3.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d3ad5833e1f..b6348de67cb 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -326,11 +326,9 @@ (define_mode_iterator VF_AVX512VL
   [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
-;; AVX512VL SF/DF plus 128- and 256-bit SF vector modes
-(define_mode_iterator VF_AVX512VL_VF1_128_256
-  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX512VL")
-   (V2DF "TARGET_AVX512VL")])
+;; AVX512ER SF plus 128- and 256-bit SF vector modes
+(define_mode_iterator VF1_AVX512ER_128_256
+  [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
 
 (define_mode_iterator VF2_AVX512VL
   [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
@@ -2076,9 +2074,9 @@ (define_insn "*_vmsqrt2"
(set_attr "mode" "")])
 
 (define_expand "rsqrt2"
-  [(set (match_operand:VF_AVX512VL_VF1_128_256 0 "register_operand")
-	(unspec:VF_AVX512VL_VF1_128_256
-	  [(match_operand:VF_AVX512VL_VF1_128_256 1 "vector_operand")]
+  [(set (match_operand:VF1_AVX512ER_128_256 0 "register_operand")
+	(unspec:VF1_AVX512ER_128_256
+	  [(match_operand:VF1_AVX512ER_128_256 1 "vector_operand")]
 	  UNSPEC_RSQRT))]
   "TARGET_SSE && TARGET_SSE_MATH"
 {
diff --git a/gcc/testsuite/gcc.target/i386/pr88713-3.c b/gcc/testsuite/gcc.target/i386/pr88713-3.c
new file mode 100644
index 000..85b6cf87a02
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88713-3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mno-avx512er -march=skylake-avx512" } */
+
+#include 
+
+double square(double d[3], double rad)
+{
+  double res[3];
+
+  for (int i = 0; i < 3; i++)
+{
+  res[i] = d[i] * d[i];
+  res[i] *= rad/sqrt(res[i]);
+}
+
+  return res[0];
+}
-- 
2.26.2



Re: [PATCH][GCC][aarch64] Generation of adjusted ldp/stp for vector types

2020-07-13 Thread Richard Sandiford
Hi,

Sorry for the slow review.

Przemyslaw Wirkus  writes:
> Hi,
>
> Introduce simple peephole2 optimization which substitutes a sequence of
> four consecutive load or store (LDR, STR) instructions with two load or
> store pair (LDP, STP) instructions for 2 element supported vector modes
> (V2SI, V2SF, V2DI, and V2DF).
> Generated load / store pair instruction offset is adjusted accordingly.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> Example:
> $ cat stp_vec_v2sf.c
> typedef float __attribute__((vector_size(8))) vec;
>
> void
> store_adjusted(vec *out, vec x, vec y)
> {
>   out[400] = x;
>   out[401] = y;
>   out[402] = y;
>   out[403] = x;
> }
>
> Example compiled with:
> $ ./aarch64-none-linux-gnu-gcc -S -O2 stp_vec_v2sf.c -dp
>
> Before the patch:
>
> store_adjusted:
> str d0, [x0, 3200]// 9[c=4 l=4]  *aarch64_simd_movv2si/2
> str d1, [x0, 3208]// 11   [c=4 l=4]  *aarch64_simd_movv2si/2
> str d1, [x0, 3216]// 13   [c=4 l=4]  *aarch64_simd_movv2si/2
> str d0, [x0, 3224]// 15   [c=4 l=4]  *aarch64_simd_movv2si/2
> ret   // 26   [c=0 l=4]  *do_return
>
> After the patch:
>
> store_adjusted:
> add x1, x0, 3200// 27   [c=4 l=4]  *adddi3_aarch64/0
> stp d0, d1, [x1]// 28   [c=0 l=4]  vec_store_pairv2siv2si
> stp d1, d0, [x1, 16]// 29   [c=0 l=4]  vec_store_pairv2siv2si
> ret // 22   [c=0 l=4]  *do_return
>
>
> OK for master ?
>
> kind regards,
> Przemyslaw

Thanks for doing this, looks good.

My only real comment is that I wonder if we really need the nunits
parameter, or if we should instead change the scalar_mode to a general
machine_mode and just pass the vector mode to that.

Passing nunits means that we don't need a to_constant here:

> @@ -21970,7 +21973,7 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, 
> bool load,
>for (int i = 0; i < num_insns; i++)
>  offvals[i] = INTVAL (offset[i]);
>  
> -  msize = GET_MODE_SIZE (mode);
> +  msize = GET_MODE_SIZE (mode) * nunits;
>  
>/* Check if the offsets can be put in the right order to do a ldp/stp.  */
>qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),

but I think adding to_constant is fine in this context, since it's
inherently non-SVE code.

That would also avoid having to recalculate the mode:

> @@ -22114,8 +22118,11 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool 
> load,
>replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
> new_off_3 + msize), true);
>  
> -  if (!aarch64_mem_pair_operand (mem_1, mode)
> -  || !aarch64_mem_pair_operand (mem_3, mode))
> +  /* If nunits > 1 we are adjusting for vector mode. In this case we should
> + generate mode for vector built from nunits and scalar_mode provided.  */
> +  mem_mode = (nunits == 1) ? mode : mode_for_vector(mode, 
> nunits).else_void();
> +  if (!aarch64_mem_pair_operand (mem_1, mem_mode)
> +  || !aarch64_mem_pair_operand (mem_3, mem_mode))
>  return false;
>  
>if (code == ZERO_EXTEND)

…here.

One other (very) minor thing is that some of the lines were over the
80 character limit, but removing the nunits parameter might fix that :-)

> diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2sf.c 
> b/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2sf.c
> new file mode 100644
> index 
> ..f46dea1f748a094509ecfa0292a7c54e94164c9a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2sf.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +typedef float __attribute__((vector_size(8))) vec;
> +
> +vec
> +load_long(vec *v) {
> +  return v[110] + v[111] + v[112] + v[113];
> +}
> +
> +/* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, 880" } } */
> +/* { dg-final { scan-assembler "ldp\td\[0-9\]+, d\[0-9\]+, 
> \\\[x\[0-9\]+\\\]" } } */
> +/* { dg-final { scan-assembler "ldp\td\[0-9\]+, d\[0-9\]+, \\\[x\[0-9\]+, 
> 16\\\]" } } */

FWIW, it's possible to avoid many of these backslashes by quoting the
regexp with {…} rather than "…".  E.g.:

/* { dg-final { scan-assembler {ldp\td[0-9]+, d[0-9]+, \[x[0-9]+, 16\]} } } */

The above is fine too though.

Thanks,
Richard


Re: [PATCH] ipa-fnsummary: Fix ICE with switch predicates [PR96130]

2020-07-13 Thread Martin Jambor
On Fri, Jul 10 2020, Jakub Jelinek wrote:
> Hi!
>
> The following testcase ICEs since r10-3199.
> There is a switch with default label, where the controlling expression has
> range just 0..7 and there are case labels for all those 8 values, but
> nothing has yet optimized away the default.
> Since r10-3199, set_switch_stmt_execution_predicate sets the switch to
> default label's edge's predicate to a false predicate and then
> compute_bb_predicates propagates the predicates through the cfg, but false
> predicates aren't really added.  The caller of compute_bb_predicates
> in one place handles NULL bb->aux as false predicate:
>   if (fbi.info)
> {
>   if (bb->aux)
> bb_predicate = *(predicate *) bb->aux;
>   else
> bb_predicate = false;
> }
>   else
> bb_predicate = true;
> but then in two further spots that the patch below is changing
> it assumes bb->aux must be non-NULL.  Those two spots are guarded by a
> condition that is only true if fbi.info is non-NULL, so I think the right
> fix is to treat NULL aux as false predicate in those spots too.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/10.2?
>

The above is correct and the patch is OK.  OTOH, simply putting continue
statement in both of the two places when the predicate has not been
created (or if it is false) would also work and might avoid some
unnecessary work - the code below always computes a predicate only to
and it with the BB predicate.  But hopefully we do not have too many
unreachable loops after early optimizations and I can do this change
afterwards as a follow-up.

Thanks a lot for looking into this.

Martin



> 2020-07-10  Jakub Jelinek  
>
>   PR ipa/96130
>   * ipa-fnsummary.c (analyze_function_body): Treat NULL bb->aux
>   as false predicate.
>
>   * gcc.dg/torture/pr96130.c: New test.
>
> --- gcc/ipa-fnsummary.c.jj2020-04-05 00:27:26.0 +0200
> +++ gcc/ipa-fnsummary.c   2020-07-10 16:12:59.155168850 +0200
> @@ -2766,7 +2766,10 @@ analyze_function_body (struct cgraph_nod
> edge ex;
> unsigned int j;
> class tree_niter_desc niter_desc;
> -   bb_predicate = *(predicate *) loop->header->aux;
> +   if (loop->header->aux)
> + bb_predicate = *(predicate *) loop->header->aux;
> +   else
> + bb_predicate = false;
>  
> exits = get_loop_exit_edges (loop);
> FOR_EACH_VEC_ELT (exits, j, ex)
> @@ -2799,7 +2802,10 @@ analyze_function_body (struct cgraph_nod
> for (unsigned i = 0; i < loop->num_nodes; i++)
>   {
> gimple_stmt_iterator gsi;
> -   bb_predicate = *(predicate *) body[i]->aux;
> +   if (body[i]->aux)
> + bb_predicate = *(predicate *) body[i]->aux;
> +   else
> + bb_predicate = false;
> for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
>  gsi_next ())
>   {
> --- gcc/testsuite/gcc.dg/torture/pr96130.c.jj 2020-07-10 16:15:28.794082127 
> +0200
> +++ gcc/testsuite/gcc.dg/torture/pr96130.c2020-07-10 16:14:49.273633241 
> +0200
> @@ -0,0 +1,26 @@
> +/* PR ipa/96130 */
> +/* { dg-do compile } */
> +
> +struct S { unsigned j : 3; };
> +int k, l, m;
> +
> +void
> +foo (struct S x)
> +{
> +  while (l != 5)
> +switch (x.j)
> +  {
> +  case 1:
> +  case 3:
> +  case 4:
> +  case 6:
> +  case 2:
> +  case 5:
> + l = m;
> +  case 7:
> +  case 0:
> + k = 0;
> +  default:
> + break;
> +  }
> +}
>
>   Jakub


[PATCH] libgomp: Fix hang when profiling OpenACC programs with CUDA 9.0 nvprof

2020-07-13 Thread Kwok Cheung Yeung

Hello

(This patch was previously posted for OG7 at: 
https://gcc.gnu.org/pipermail/gcc-patches/2018-February/494594.html).


When the version of nvprof in CUDA 9.0 is run on an OpenACC program, it sets up 
a callback that is called on device initialization. Inside the callback, it 
calls the acc_get_device_type() function in libgomp. Inside 
acc_get_device_type(), it attempts to acquire the acc_device_lock lock, but that 
has already been acquired by goacc_lazy_initialize() at this point, so the 
program deadlocks.


This is fixed by making acc_get_device_type() return acc_device_none without 
attempting to acequire the lock when initialization has not finished yet. This 
appears to be legal according to the OpenACC spec, since it states 'If the 
device type has not yet been selected, the value acc_device_none may be 
returned.' (in section 3.2.3 of the OpenACC 2.7 spec).


I have added a testcase that sets up the situation presented by nvprof. This 
testcase hangs without the patch (hence the short dg-timeout), and passes with 
it. Tested on a x86-64 host with Nvidia and AMD GCN offloading.


Okay for master, GCC 10 branch and OG10?

Thanks

Kwok
commit d20f269e8571a76d682a500e78654ddd260ffaf1
Author: Kwok Cheung Yeung 
Date:   Fri Jul 10 14:06:26 2020 -0700

libgomp: Fix hang when profiling OpenACC programs with CUDA 9.0 nvprof

The version of nvprof in CUDA 9.0 causes a hang when used to profile an
OpenACC program.  This is because it calls acc_get_device_type from
a callback called during device initialization, which then attempts
to acquire acc_device_lock while it is already taken, resulting in
deadlock.  This works around the issue by returning acc_device_none
from acc_get_device_type without attempting to acquire the lock when
initialization has not completed yet.

2020-07-13  Tom de Vries  
Cesar Philippidis  
Thomas Schwinge  
Kwok Cheung Yeung  

libgomp/
* oacc-init.c (acc_init_state_lock, acc_init_state, acc_init_thread):
New variable.
(acc_init_1): Set acc_init_thread to pthread_self ().  Set
acc_init_state to initializing at the start, and to initialized at the
end.
(self_initializing_p): New function.
(acc_get_device_type): Return acc_device_none if called by thread that
is currently executing acc_init_1.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-cb-call.c: New.

diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 5d786a5..1e7f934 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -40,6 +40,11 @@
 
 static gomp_mutex_t acc_device_lock;
 
+static gomp_mutex_t acc_init_state_lock;
+static enum { uninitialized, initializing, initialized } acc_init_state
+  = uninitialized;
+static pthread_t acc_init_thread;
+
 /* A cached version of the dispatcher for the global "current" accelerator 
type,
e.g. used as the default when creating new host threads.  This is the
device-type equivalent of goacc_device_num (which specifies which device to
@@ -228,6 +233,11 @@ acc_dev_num_out_of_range (acc_device_t d, int ord, int 
ndevs)
 static struct gomp_device_descr *
 acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
 {
+  gomp_mutex_lock (_init_state_lock);
+  acc_init_state = initializing;
+  acc_init_thread = pthread_self ();
+  gomp_mutex_unlock (_init_state_lock);
+
   bool check_not_nested_p;
   if (implicit)
 {
@@ -317,6 +327,14 @@ acc_init_1 (acc_device_t d, acc_construct_t 
parent_construct, int implicit)
_info);
 }
 
+  /* We're setting 'initialized' *after* 'goacc_profiling_dispatch', so that a
+ nested 'acc_get_device_type' called from a profiling callback still sees
+ 'initializing', so that we don't deadlock when it then again tries to lock
+ 'goacc_prof_lock'.  See also the discussion in 'acc_get_device_type'.  */
+  gomp_mutex_lock (_init_state_lock);
+  acc_init_state = initialized;
+  gomp_mutex_unlock (_init_state_lock);
+
   return base_dev;
 }
 
@@ -643,6 +661,17 @@ acc_set_device_type (acc_device_t d)
 
 ialias (acc_set_device_type)
 
+static bool
+self_initializing_p (void)
+{
+  bool res;
+  gomp_mutex_lock (_init_state_lock);
+  res = (acc_init_state == initializing
+&& pthread_equal (acc_init_thread, pthread_self ()));
+  gomp_mutex_unlock (_init_state_lock);
+  return res;
+}
+
 acc_device_t
 acc_get_device_type (void)
 {
@@ -652,6 +681,15 @@ acc_get_device_type (void)
 
   if (thr && thr->base_dev)
 res = acc_device_type (thr->base_dev->type);
+  else if (self_initializing_p ())
+/* The Cuda libaccinj64.so version 9.0+ calls acc_get_device_type during 
the
+   acc_ev_device_init_start event callback, which is dispatched during
+   acc_init_1.  Trying to lock acc_device_lock during such a call (as we do
+   in the else clause below), will result in deadlock, since the lock has
+  

Re: [PATCH] c++: Fix tentative parsing of enum-specifier [PR96077]

2020-07-13 Thread Marek Polacek via Gcc-patches
On Mon, Jul 13, 2020 at 10:08:52AM -0400, Nathan Sidwell wrote:
> On 7/10/20 11:43 AM, Marek Polacek via Gcc-patches wrote:
> > Here's an interesting issue: in this code a ) is missing:
> > 
> >enum { E = (2 } e;
> > 
> > but we compile the code anyway, and E is set to 0 in build_enumerator,
> > which is sneaky.
> > 
> > The problem is that cp_parser_enum_specifier parses tentatively, because
> > when we see the enum keyword, we don't know yet if we'll find an
> > enum-specifier, opaque-enum-declaration, or elaborated-enum-specifier.
> > 
> > In this test when we call cp_parser_enumerator_list we're still parsing
> > tentatively, and as a consequence, parens.require_close (parser) in
> > cp_parser_primary_expression doesn't report any errors.  But we only go
> > on to parse the enumerator-list after we've seen a {, at which point we
> > might as well commit -- we know we're dealing with an enum-specifier.
> > 
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> 
> ok, (with the typo Jakub noticed fixed :)

Thanks.

> does this also fix 95288?

Hmm, in a way: instead of

95288.C:3:3: error: expected primary-expression before ‘enum’
3 |   enum X
  |   ^~~~

with this patch we say

95288.C:5:8: error: expected ‘}’ before ‘.’ token
5 |   a. // DOT
  |^
95288.C:4:5: note: to match this ‘{’
4 | {
  | ^

but the rest of the output is the same.

Whether that means that 95288 is fixed, I'm not too sure.  But since you opened
it, if you're happy with the diagnostic with this patch, I'll add the test and
close 95288.

Marek



testsuite: Fix scan-lang-dump-not

2020-07-13 Thread Nathan Sidwell

pushed as obvious

turned out scan-lang-dump-not was broken in the 3 argument case -- I'd 
missed a necessary empty arg.  Fixed thusly.


gcc/testsuite/
* lib/scanlang.exp (scan-lang-dump-not): Fix 3-arg case.

--
Nathan Sidwell
diff --git c/gcc/testsuite/lib/scanlang.exp w/gcc/testsuite/lib/scanlang.exp
index 2f80f65f569..d9f8c1e6e83 100644
--- c/gcc/testsuite/lib/scanlang.exp
+++ w/gcc/testsuite/lib/scanlang.exp
@@ -36,10 +36,11 @@ proc scan-lang-dump { args } {
 	return
 }
 if { [llength $args] >= 3 } {
-	scan-dump "lang" [lindex $args 0] \
-		  "\[0-9\]\[0-9\]\[0-9\]l.[lindex $args 1]" "" [lindex $args 2]
+	scan-dump-not "lang" [lindex $args 0] \
+	"\[0-9\]\[0-9\]\[0-9\]l.[lindex $args 1]" "" \
+	[lindex $args 2]
 } else {
-	scan-dump "lang" [lindex $args 0] \
-		  "\[0-9\]\[0-9\]\[0-9\]l.[lindex $args 1]" ""
+	scan-dump-not "lang" [lindex $args 0] \
+	"\[0-9\]\[0-9\]\[0-9\]l.[lindex $args 1]"
 }
 }


Re: [PATCH][RFC] __builtin_shuffle sometimes should produce zip1 rather than TBL (PR82199)

2020-07-13 Thread Dmitrij Pochepko
Hi,

I please take a look at new version (attached).

Thanks,
Dmitrij

On Sat, Jul 11, 2020 at 09:39:13AM +0100, Richard Sandiford wrote:
...
> This should push “newelt” instead.
done

...
> This test is endian-agnostic and should work for all targets, but…
...
> 
> …the shuffles in these tests are specific to little-endian targets.
> For big-endian targets, architectural lane 0 is the last in memory
> rather than first, so e.g. the big-endian permute vector for vzip_4.c
> would be: { 0, 1, 5, 6 } instead.
> 
> I guess the options are:
> 
> (1) add __ARM_BIG_ENDIAN preprocessor tests to choose the right mask
> (2) restrict the tests to aarch64_little_endian.
> (3) have two scan-assembler lines with opposite zip1/zip2 choices, e.g:
> 
> /* { dg-final { scan-assembler-times {[ \t]*zip1[ \t]+v[0-9]+\.2d} 1 { target 
> aarch64_big_endian } } } */
> /* { dg-final { scan-assembler-times {[ \t]*zip2[ \t]+v[0-9]+\.2d} 1 { target 
> aarch64_little_endian } } } */
> 
> (untested).
> 
> I guess (3) is probably best, but (2) is fine too if you're not set
> up to test big-endian.
> 
> Sorry for not noticing last time.  I only realised when testing the
> patch locally.
> 
> Thanks,
> Richard

I restricted tests to aarch64_little_endian, because I don't have big-endian 
setup to check it.
>From 197a9bc05f96c3f100b3f4748c9dd12a60de86d1 Mon Sep 17 00:00:00 2001
From: Dmitrij Pochepko 
Date: Mon, 13 Jul 2020 17:44:08 +0300
Subject: [PATCH]  __builtin_shuffle sometimes should produce zip1 rather than
 TBL (PR82199)

The following patch enables vector permutations optimization by using another vector element size when applicable.
It allows usage of simpler instructions in applicable cases.

example:

vector float f(vector float a, vector float b)
{
  return __builtin_shuffle  (a, b, (vector int){0, 1, 4,5});
}

was compiled into:
...
	adrpx0, .LC0
	ldr q2, [x0, #:lo12:.LC0]
	tbl v0.16b, {v0.16b - v1.16b}, v2.16b
...

and after patch:
...
	zip1v0.2d, v0.2d, v1.2d
...

bootstrapped and tested on aarch64-linux-gnu with no regressions

gcc/ChangeLog:

2020-07-13 Andrew Pinski   

	PR gcc/82199

	* gcc/config/aarch64/aarch64.c (aarch64_evpc_reencode): New function

gcc/testsuite/ChangeLog:

2020-07-13  Andrew Pinski   

	PR gcc/82199

	* gcc.target/aarch64/vdup_n_3.c: New test
	* gcc.target/aarch64/vzip_1.c: New test
	* gcc.target/aarch64/vzip_2.c: New test
	* gcc.target/aarch64/vzip_3.c: New test
	* gcc.target/aarch64/vzip_4.c: New test

Co-Authored-By: Dmitrij Pochepko
---
 gcc/config/aarch64/aarch64.c| 57 +
 gcc/testsuite/gcc.target/aarch64/vdup_n_3.c | 16 
 gcc/testsuite/gcc.target/aarch64/vzip_1.c   | 12 ++
 gcc/testsuite/gcc.target/aarch64/vzip_2.c   | 13 +++
 gcc/testsuite/gcc.target/aarch64/vzip_3.c   | 13 +++
 gcc/testsuite/gcc.target/aarch64/vzip_4.c   | 13 +++
 6 files changed, 124 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vdup_n_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vzip_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vzip_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vzip_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vzip_4.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 17dbe67..e259d05 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -19991,6 +19991,8 @@ struct expand_vec_perm_d
   bool testing_p;
 };
 
+static bool aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
+
 /* Generate a variable permutation.  */
 
 static void
@@ -20176,6 +20178,59 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Try to re-encode the PERM constant so it combines odd and even elements.
+   This rewrites constants such as {0, 1, 4, 5}/V4SF to {0, 2}/V2DI.
+   We retry with this new constant with the full suite of patterns.  */
+static bool
+aarch64_evpc_reencode (struct expand_vec_perm_d *d)
+{
+  expand_vec_perm_d newd;
+  unsigned HOST_WIDE_INT nelt;
+
+  if (d->vec_flags != VEC_ADVSIMD)
+return false;
+
+  /* Get the new mode.  Always twice the size of the inner
+ and half the elements.  */
+  poly_uint64 vec_bits = GET_MODE_BITSIZE (d->vmode);
+  unsigned int new_elt_bits = GET_MODE_UNIT_BITSIZE (d->vmode) * 2;
+  auto new_elt_mode = int_mode_for_size (new_elt_bits, false).require ();
+  machine_mode new_mode = aarch64_simd_container_mode (new_elt_mode, vec_bits);
+
+  if (new_mode == word_mode)
+return false;
+
+  /* to_constant is safe since this routine is specific to Advanced SIMD
+ vectors.  */
+  nelt = d->perm.length ().to_constant ();
+
+  vec_perm_builder newpermconst;
+  newpermconst.new_vector (nelt / 2, nelt / 2, 1);
+
+  /* Convert the perm constant if we can.  Require even, odd as the pairs.  */
+  for (unsigned int i = 0; i < nelt; i += 2)
+{
+  poly_int64 elt0 = d->perm[i];
+  poly_int64 elt1 = d->perm[i + 1];
+  

RE: [PATCH] x86: Provide expanders for truncdisi2 and friends.

2020-07-13 Thread Roger Sayle

Hi Richard,

> It seems to be improving TARGET_TRULY_NOOP_TRUNCATION documentation might be 
> useful here.

This is an excellent suggestion.  How about the following/attached:

2020-07-13  Roger Sayle  

gcc/ChangeLog:
* doc/tm.texi (TARGET_TRULY_NOOP_TRUNCATION): Clarify that targets
that (sometimes) return false, indicating SUBREGs shouldn't be
used, also need to provide a trunc?i?i2 optab that performs this
truncation.

> The only user (after your patch) of this hook is in function.c for the 
> function parameter setup btw.

The targetm.truly_noop_truncation in assign_parm_setup_block is the last place 
that calls this
hook directly (with sizes), but the majority of uses go via 
TRULY_NOOP_TRUNCATION_MODES_P
as defined in machmode.h.

I'll prepare a patch to switch function.c to use TRULY_NOOP_TRUNCATION_MODE_P 
so that we
are consistent throughout the compiler.  In theory, this hook could then be 
changed to take modes
instead of (poly_unit64) sizes, but that clean-up might be tricky without 
access to the affected
platforms.

The hard register semantics that you're referring to are related to 
TARGET_MODES_TIEABLE_P,
which is documented to have interesting interactions with 
TARGET_TRULY_NOOP_TRUNCATION.

Is the above documentation change Ok for mainline?

Thanks,
Roger
--

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 6e7d9dc54a9..41b9e10c856 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11122,7 +11122,9 @@ This hook returns true if it is safe to ``convert'' a 
value of
 @var{inprec} bits to one of @var{outprec} bits (where @var{outprec} is
 smaller than @var{inprec}) by merely operating on it as if it had only
 @var{outprec} bits.  The default returns true unconditionally, which
-is correct for most machines.
+is correct for most machines.  When @code{TARGET_TRULY_NOOP_TRUNCATION}
+returns false, the machine description should provide a @code{trunc}
+optab to specify the RTL that performs the required truncation.
 
 If @code{TARGET_MODES_TIEABLE_P} returns false for a pair of modes,
 suboptimal code can result if this hook returns true for the corresponding


[PATCH] tree-optimization/96163 - fix placement issue with SLP and vectors

2020-07-13 Thread Richard Biener
This avoids placing stmts beyond the vectorizer region begin which
confuses vect_stmt_dominates_stmt_p.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2020-07-13  Richard Biener  

PR tree-optimization/96163
* tree-vect-slp.c (vect_schedule_slp_instance): Put new stmts
at least after region begin.

* g++.dg/vect/pr96163.cc: New testcase.
---
 gcc/testsuite/g++.dg/vect/pr96163.cc | 146 +++
 gcc/tree-vect-slp.c  |  32 +++---
 2 files changed, 166 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/vect/pr96163.cc

diff --git a/gcc/testsuite/g++.dg/vect/pr96163.cc 
b/gcc/testsuite/g++.dg/vect/pr96163.cc
new file mode 100644
index 000..3bd376f67f6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr96163.cc
@@ -0,0 +1,146 @@
+// { dg-do compile }
+// { dg-require-effective-target c++17 }
+
+typedef double b __attribute__((__vector_size__(16)));
+b c;
+enum { d };
+namespace e {
+template  struct g;
+struct h {
+  enum { i, j };
+};
+template  struct aa;
+} // namespace e
+template  struct k;
+template  class l;
+template  class ab;
+template  class o;
+template  class ac;
+class p;
+namespace e {
+template  struct q { typedef ac> ae; };
+struct s {
+  template  void af(double *ak, t) { *(b *)ak = c; }
+};
+} // namespace e
+template  class ab : public k {};
+template  class ab : public ab {
+public:
+  typedef typename e::g::ag ag;
+  using ab::ah;
+  ag ai() {
+long aj = 0;
+return e::aa(ah()).ai(aj);
+  }
+  ag operator[](long) { return ai(); }
+};
+template  class ay : public ab {
+public:
+  enum { a, al };
+};
+template  class ac : public ay {
+public:
+  p am();
+};
+template  struct k {
+  o () { return *static_cast *>(this); }
+};
+namespace e {
+template  struct aa { aa(f); };
+template  struct aa> {
+  typedef ad ao;
+  typedef typename ao::ag ag;
+  aa(ao ) : ap(ak.aq()) {}
+  ag (long ak) { return ap[ak]; }
+  ag *ap;
+};
+template 
+struct aa> : aa>> {
+  typedef o aw;
+  aa(aw ) : aa>(ak) {}
+};
+template  struct u {
+  enum { az, ba, bb };
+  static void bc(ax ak) { ak.template be(az, ba); }
+};
+template  struct v {
+  static void bc(ax ak) { u::bc(ak); }
+};
+template  class w {
+  typedef bg bi;
+
+public:
+  typedef bg bj;
+  typedef bg bf;
+  w(bj ak, int, bh, bi x) : bk(ak), bl(x) {}
+  template  void be(long, long) {
+bn.template af((0), 0);
+  }
+  bj bk;
+  bh bn;
+  bi bl;
+};
+template  void bp(bi , bo, bh bq) {
+  typedef aa bj;
+  bo br;
+  bj bs(ak);
+  typedef w ax;
+  ax bd(bs, br, bq, ak);
+  v::bc(bd);
+}
+template  struct bt;
+template  void bx(bu , bv by, bw bq) 
{
+  bt::bc(ak, by, bq);
+}
+template  struct bt {
+  static void bc(o , int by, s bq) { bp(ak, by, bq); }
+};
+} // namespace e
+class bz {
+public:
+  template  void operator*(an);
+};
+namespace e {
+template  struct cb { double am[ca]; };
+} // namespace e
+template  class cc {
+  e::cb ap;
+
+public:
+  double *aq() { return ap.am; }
+};
+template  class l : public e::q::ae {
+public:
+  typedef typename e::q::ae cd;
+  typedef typename e::g::ag ag;
+  cc ce;
+  ag *aq() { return ce.aq(); }
+  l() {}
+  template  l(an ak) { bx(this->ah(), ak, e::s()); }
+  template  void ch(cf, cg by) {
+ag *z, *y;
+{ z = aq(); }
+y = z;
+y[0] = aq()[1] = by;
+  }
+};
+namespace e {
+template 
+struct g> {
+  typedef ci ag;
+};
+} // namespace e
+template 
+class o : public l> {
+public:
+  typedef l cd;
+  template  o(cf ak, cg by) { cd::ch(ak, by); }
+  template  o(an ak) : cd(ak) {}
+};
+class p : public bz {};
+double cq;
+void cm() {
+  o r = 0;
+  o cn = r;
+  cn.am() * o(0, r[0] / cq).am();
+}
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index b3645b0a820..72192b5a813 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -4404,18 +4404,26 @@ vect_schedule_slp_instance (vec_info *vinfo,
else
  {
/* For externals we have to look at all defs since their
-  insertion place is decided per vector.  */
-   unsigned j;
-   tree vdef;
-   FOR_EACH_VEC_ELT (SLP_TREE_VEC_DEFS (child), j, vdef)
- if (TREE_CODE (vdef) == SSA_NAME
- && !SSA_NAME_IS_DEFAULT_DEF (vdef))
-   {
- gimple *vstmt = SSA_NAME_DEF_STMT (vdef);
- if (!last_stmt
- || vect_stmt_dominates_stmt_p (last_stmt, vstmt))
-   last_stmt = vstmt;
-   }
+  insertion place is decided per vector.  But beware
+  of pre-existing vectors where we need to make sure
+  we do not insert before the region boundary.  */
+   if (SLP_TREE_SCALAR_OPS (child).is_empty ()
+   && !vinfo->lookup_def (SLP_TREE_VEC_DEFS (child)[0]))
+ last_stmt = gsi_stmt (as_a  (vinfo)->region_begin);
+   else
+ {
+   unsigned j;
+   tree vdef;
+

Re: [PATCH 0/6 ver 4] ] Permute Class Operations

2020-07-13 Thread Segher Boessenkool
Hi!

On Wed, Jul 08, 2020 at 12:59:12PM -0700, Carl Love wrote:
> [PATCH 3/6] rs6000, Add vector replace builtin support

This is okay for trunk.  Thanks!


Segher


Re: [PATCH] c++: Fix tentative parsing of enum-specifier [PR96077]

2020-07-13 Thread Nathan Sidwell

On 7/10/20 11:43 AM, Marek Polacek via Gcc-patches wrote:

Here's an interesting issue: in this code a ) is missing:

   enum { E = (2 } e;

but we compile the code anyway, and E is set to 0 in build_enumerator,
which is sneaky.

The problem is that cp_parser_enum_specifier parses tentatively, because
when we see the enum keyword, we don't know yet if we'll find an
enum-specifier, opaque-enum-declaration, or elaborated-enum-specifier.

In this test when we call cp_parser_enumerator_list we're still parsing
tentatively, and as a consequence, parens.require_close (parser) in
cp_parser_primary_expression doesn't report any errors.  But we only go
on to parse the enumerator-list after we've seen a {, at which point we
might as well commit -- we know we're dealing with an enum-specifier.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


ok, (with the typo Jakub noticed fixed :)

does this also fix 95288?


gcc/cp/ChangeLog:

PR c++/96077
* parser.c (cp_parser_enum_specifier): Commit to tentative parse
after we've seen an opening brace.

gcc/testsuite/ChangeLog:

PR c++/96077
* g++.dg/parse/enum14.C: New test.



nathan


--
Nathan Sidwell


[PATCH] x86: Change CTZ_DEFINED_VALUE_AT_ZERO to return 0/2

2020-07-13 Thread H.J. Lu via Gcc-patches
Change CTZ_DEFINED_VALUE_AT_ZERO/CTZ_DEFINED_VALUE_AT_ZERO to return 0/2
to enable table-based clz/ctz optimization:

 -- Macro: CLZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE)
 -- Macro: CTZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE)
 A C expression that indicates whether the architecture defines a
 value for 'clz' or 'ctz' with a zero operand.  A result of '0'
 indicates the value is undefined.  If the value is defined for only
 the RTL expression, the macro should evaluate to '1'; if the value
 applies also to the corresponding optab entry (which is normally
 the case if it expands directly into the corresponding RTL), then
 the macro should evaluate to '2'.  In the cases where the value is
 defined, VALUE should be set to this value.

gcc/

PR target/95863
* config/i386/i386.h (CTZ_DEFINED_VALUE_AT_ZERO): Return 0/2.
(CLZ_DEFINED_VALUE_AT_ZERO): Likewise.

gcc/testsuite/

PR target/95863
* gcc.target/i386/pr95863-1.c: New test.
* gcc.target/i386/pr95863-2.c: Likewise.
---
 gcc/config/i386/i386.h|  4 +-
 gcc/testsuite/gcc.target/i386/pr95863-1.c | 47 +++
 gcc/testsuite/gcc.target/i386/pr95863-2.c | 27 +
 3 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95863-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95863-2.c

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f4a8f1391fa..1deb59f286f 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2946,9 +2946,9 @@ extern void debug_dispatch_window (int);
 /* The value at zero is only defined for the BMI instructions
LZCNT and TZCNT, not the BSR/BSF insns in the original isa.  */
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
-   ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 1 : 0)
+   ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 2 : 0)
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
-   ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 1 : 0)
+   ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 2 : 0)
 
 
 /* Flags returned by ix86_get_callcvt ().  */
diff --git a/gcc/testsuite/gcc.target/i386/pr95863-1.c 
b/gcc/testsuite/gcc.target/i386/pr95863-1.c
new file mode 100644
index 000..f3918a1a766
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95863-1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mbmi" } */
+
+int ctz1 (unsigned x)
+{
+  static const char table[32] =
+{
+  0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+  31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+};
+
+  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
+}
+
+int ctz2 (unsigned x)
+{
+#define u 0
+  static short table[64] =
+{
+  32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
+  10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
+  31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
+  30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
+};
+
+  x = (x & -x) * 0x0450FBAF;
+  return table[x >> 26];
+}
+
+int ctz3 (unsigned x)
+{
+  static int table[32] =
+{
+  0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
+  31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
+};
+
+  if (x == 0) return 32;
+  x = (x & -x) * 0x04D7651F;
+  return table[x >> 27];
+}
+
+/* { dg-final { scan-assembler-times "tzcntl\t" 3 } } */
+/* { dg-final { scan-assembler-times "andl\t" 1 } } */
+/* { dg-final { scan-assembler-not "neg" } } */
+/* { dg-final { scan-assembler-not "imul" } } */
+/* { dg-final { scan-assembler-not "shr" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95863-2.c 
b/gcc/testsuite/gcc.target/i386/pr95863-2.c
new file mode 100644
index 000..cb56dfc6d94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95863-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O -mbmi" } */
+
+static const unsigned long long magic = 0x03f08c5392f756cdULL;
+
+static const char table[64] = {
+ 0,  1, 12,  2, 13, 22, 17,  3,
+14, 33, 23, 36, 18, 58, 28,  4,
+62, 15, 34, 26, 24, 48, 50, 37,
+19, 55, 59, 52, 29, 44, 39,  5,
+63, 11, 21, 16, 32, 35, 57, 27,
+61, 25, 47, 49, 54, 51, 43, 38,
+10, 20, 31, 56, 60, 46, 53, 42,
+ 9, 30, 45, 41,  8, 40,  7,  6,
+};
+
+int ctz4 (unsigned long long x)
+{
+  unsigned long long lsb = x & -x;
+  return table[(lsb * magic) >> 58];
+}
+
+/* { dg-final { scan-assembler-times "tzcntq\t" 1 } } */
+/* { dg-final { scan-assembler-times "andl\t" 1 } } */
+/* { dg-final { scan-assembler-not "negq" } } */
+/* { dg-final { scan-assembler-not "imulq" } } */
+/* { dg-final { scan-assembler-not "shrq" } } */
-- 
2.26.2



Re: [PATCH 0/6 ver 4] ] Permute Class Operations

2020-07-13 Thread Segher Boessenkool
On Thu, Jul 09, 2020 at 11:02:39AM -0500, will schmidt wrote:
> > * config/rs6000/rs6000-call.c (P10_BUILTIN_VEC_REPLACE_ELT,
> > P10_BUILTIN_VEC_REPLACE_UN): New.
> 
> New what?

Just "New." is fine :-)


Segher


[PATCH wwwdocs] Explicitly list powerpc64le-unknown-linux-gnu as a primary platform

2020-07-13 Thread Florian Weimer via Gcc-patches
The intent was that this was implied by powerpc64-unknown-linux-gnu,
but this not very obvious because of the ELFv1 vs ELFv2 ABI
differences.

---
 htdocs/gcc-10/criteria.html | 1 +
 htdocs/gcc-11/criteria.html | 1 +
 htdocs/gcc-9/criteria.html  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/htdocs/gcc-10/criteria.html b/htdocs/gcc-10/criteria.html
index b870b829..10144710 100644
--- a/htdocs/gcc-10/criteria.html
+++ b/htdocs/gcc-10/criteria.html
@@ -111,6 +111,7 @@ application testing.
 i686-pc-linux-gnu
 mipsisa64-elf
 powerpc64-unknown-linux-gnu
+powerpc64le-unknown-linux-gnu
 sparc-sun-solaris2.11
 x86_64-pc-linux-gnu
 
diff --git a/htdocs/gcc-11/criteria.html b/htdocs/gcc-11/criteria.html
index cef92821..adf566e1 100644
--- a/htdocs/gcc-11/criteria.html
+++ b/htdocs/gcc-11/criteria.html
@@ -111,6 +111,7 @@ application testing.
 i686-pc-linux-gnu
 mipsisa64-elf
 powerpc64-unknown-linux-gnu
+powerpc64le-unknown-linux-gnu
 sparc-sun-solaris2.11
 x86_64-pc-linux-gnu
 
diff --git a/htdocs/gcc-9/criteria.html b/htdocs/gcc-9/criteria.html
index d434646c..f75bf990 100644
--- a/htdocs/gcc-9/criteria.html
+++ b/htdocs/gcc-9/criteria.html
@@ -111,6 +111,7 @@ application testing.
 i686-pc-linux-gnu
 mipsisa64-elf
 powerpc64-unknown-linux-gnu
+powerpc64le-unknown-linux-gnu
 sparc-sun-solaris2.11
 x86_64-pc-linux-gnu
 



Re: [PATCH 0/6 ver 4] ] Permute Class Operations

2020-07-13 Thread Segher Boessenkool
Hi!

On Wed, Jul 08, 2020 at 12:59:00PM -0700, Carl Love wrote:
> [PATCH 2/6] rs6000 Add vector insert builtin support

> +For little-endian,
> +the generated code will be semantically equivalent to vinsbrx, vinshrx,
> +or vinswrx instructions.  Similarly for big-endian it will be semantically
> +equivalent to vinsblx, vinshlx, vinswlx.

"This builtin generates vins[bhw]lx on BE" etc.?

>  Note that some
> +fairly anomalous results can be generated if the byte index is not aligned
> +on an element boundary for the sort of element being inserted. This is a
> +limitation of the bi-endian vector programming model.

Yeah, leave this out, like Will says?  (Also, two spaces after dot.)

> +For little-endian, the code generation will be semantically equivalent to
> +vins*lx, while for big-endian it will be semantically equivalent to vins*rx.

Similar / same as above.

This is okay for trunk (with that improved a bit, also the typos and
other doc things Will found).  Thanks!


Segher


[PATCH rs6000]: Refine RTL unroll hook for small loops

2020-07-13 Thread guojiufu via Gcc-patches
Hi,

For very small loops (< 6 insns), it would be fine to unroll 4
times to run fast with less latency and better cache usage.  Like
below loops:
 while (i) a[--i] = NULL;   while (p < e)  *d++ = *p++;

With this patch enhances, we could see some performance improvement
for some workloads(e.g. SPEC2017).

Bootstrap and regtest pass on powerpc64le. Ok for trunk?

BR,
Jiufu Guo

2020-07-13  Jiufu Guo   

* config/rs6000/rs6000.c (rs6000_loop_unroll_adjust): Refine hook.

---
 gcc/config/rs6000/rs6000.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 58f5d780603..06844fdba57 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5135,16 +5135,15 @@ rs6000_destroy_cost_data (void *data)
 static unsigned
 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
 {
-   if (unroll_only_small_loops)
+  if (unroll_only_small_loops)
 {
-  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
-example we may want to unroll very small loops more times (4 perhaps).
-We also should use a PARAM for this.  */
+  /* TODO: Using hardcodes here, for tunable, PARAM(s) maybe refined.  */
+  if (loop->ninsns <= 6)
+   return MIN (4, nunroll);
   if (loop->ninsns <= 10)
return MIN (2, nunroll);
-  else
-   return 0;
+
+  return 0;
 }
 
   return nunroll;
-- 
2.25.1



Re: [patch] fix PR lto/95604, -flto and -fcf-protection

2020-07-13 Thread Matthias Klose
On 6/17/20 3:11 PM, Richard Biener wrote:
> On Wed, 17 Jun 2020, H.J. Lu wrote:
> 
>> On Wed, Jun 17, 2020 at 5:33 AM Richard Biener  wrote:
>>>
>>> On Wed, 17 Jun 2020, H.J. Lu wrote:
>>>
 On Wed, Jun 17, 2020 at 5:00 AM Richard Biener  wrote:
>
> On Wed, 17 Jun 2020, H.J. Lu wrote:
>
>> On Wed, Jun 17, 2020 at 1:59 AM Richard Biener
>>  wrote:
>>>
>>> On Mon, Jun 15, 2020 at 5:30 PM Matthias Klose  wrote:

 PR lto/95604 was seen when checking for binaries without having CET 
 support in a
 distro archive, for binaries built with LTO optimization.  The 
 hardening flag
 -fcf-protection=full is passed in CFLAGS, and maybe should be passed 
 in LDFLAGS
 as well.  However to make it work when not passed to the link step, it 
 should be
 extracted from the options found in the lto opts section.

 Richard suggested two solutions offline.  I checked that both fix the 
 test case.
 Which one to install?  Also ok for the 9 and 10 branches?
>>>
>>> I guess even though variant two is simpler it doesn't make much sense to
>>> have differing settings of -fcf-protection between different functions? 
>>>  HJ?
>>
>> -fcf-protection is applied to a file, not a function since CET marker
>> is per file.
>>
>>> So looking at variant one,
>>>
>>> @@ -287,6 +287,18 @@
>>>  foption->orig_option_with_args_text);
>>>   break;
>>>
>>> +   case OPT_fcf_protection_:
>>> + /* Append or check identical.  */
>>> + for (j = 0; j < *decoded_options_count; ++j)
>>> +   if ((*decoded_options)[j].opt_index == foption->opt_index)
>>> + break;
>>> + if (j == *decoded_options_count)
>>> +   append_option (decoded_options, decoded_options_count, 
>>> foption);
>>> + else if (strcmp ((*decoded_options)[j].arg, foption->arg))
>>> +   warning (input_location, "option %s with different values",
>>> +foption->orig_option_with_args_text);
>>> + break;
>>>
>>> you are always streaming a -fcf-protection option so the if (j ==
>>> *decoded_options_count)
>>> case shouldn't ever happen but I guess it's safe to leave the code
>>> as-is.  Can you
>>> amend the warning with the option that prevails?  Maybe
>>>
>>> + else if (strcmp ((*decoded_options)[j].arg, foption->arg))
>>>{
>>>   static bool noted;
>>> +   warning (input_location, "option %s with different values",
>>> +foption->orig_option_with_args_text);
>>>   if (!noted)
>>> {
>>>inform ("%s will be used instead",
>>> (*decoded_options)[j].orig_option_with_args_text);
>>>noted = true;
>>> }
>>>
>>> I guess input_location is simply zero so the diagnostic doesn't
>>> contain the actual file we're
>>> looking at.  Something to improve I guess (also applyign to other
>>> diagnostics we emit).
>>>
>>> Otherwise looks OK.
>>>
>>> Please wait for HJ in case he'd like to go with option two.
>>>
>>
>> I prefer option one.  But what happens if input files are compiled
>> with different -fcf-protection settings?
>
> You get a warning and the first option setting wins (and I requested
> to inform the user which that is).
>

 I think it should be an error and the user should explicitly specify the
 option with -lfto in the final link command.
>>>
>>> But AFAIK ld will not reject linking objects with mismatched settings?
>>
>> Linker will silently change CET run-time behavior.
>>
>>> Does -fcf-protection have effects "early" (up to IPA)?  That is, is it
>>> safe to turn it on only post-IPA?
>>
>> I think so.  We don't do anything with SHSTK.   For IBT, we just insert an
>> ENDBR when we output the indirect branch target.
>>
>>> So yeah, if the user provided an explicit -fcf-protection option at link
>>> we honor that (but with the current patch we'd still warn about conflicts
>>
>> I don't think we should warn about the explicit -fcf-protection option at 
>> link
>> time.
>>
>>> between the original TU setting - but not conflicts with the setting
>>> at link time).  If we want to error on mismatches but allow if at link
>>> time a setting is specified that needs extra code (it would be the
>>> first time we have this behavior).
>>>
>>
>> I think we should give an error if there are any mismatches in
>> inputs and let the explicit -fcf-protection option at link time override
>> -fcf-protection options in inputs .
> 
> Note the linker will then still silently change CET run-time behavior
> when there's a non-LTO object involved in the link that has

[committed] libstdc++: Fix istream::ignore exit conditions (PR 94749, PR 96161)

2020-07-13 Thread Jonathan Wakely via Gcc-patches
My previous fix for PR 94749 did fix the reported case, so that the next
character is not discarded if it happens to equal the delimiter when __n
characters have already been read. But it introduced a new bug, which is
that the delimiter character would *not* be discarded if the number of
characters discarded is numeric_limits::max() or more before
reaching the delimiter.

The new bug happens because I changed the code to check _M_gcount < __n.
But when __n == numeric_limits::max() that is false, and so
we don't discard the delimiter. It's not sufficient to check for the
delimiter when the __large_ignore condition is true, because there's an
edge case where the delimiter is reached when _M_gcount == __n and so
we break out of the loop without setting __large_ignore.

PR 96161 is a similar bug to the original PR 94749 report, where eofbit
is set after discarding __n characters if there happen to be no more
characters in the stream.

This patch fixes both cases (and the regression) by checking different
conditions for the __n == max case and the __n < max case. For the
former case, we know that we must have either reached the delimiter or
EOF, and the value of _M_gcount doesn't matter (except to avoid integer
overflow). For the latter case we need to check _M_gcount first and only
set eofbit or discard the delimiter if it didn't reach __n. For the
latter case overflow can't happen because _M_gcount <= __n < max.

libstdc++-v3/ChangeLog:

PR libstdc++/94749
PR libstdc++/96161
* include/bits/istream.tcc (basic_istream::ignore(streamsize))
[n == max]: Check overflow conditions on _M_gcount. Rely on
the fact that either EOF or the delimiter was reached.
[n < max]: Check _M_gcount < n before checking for EOF or
delimiter.
(basic_istream::ignore(streamsize, char_type): Likewise.
* src/c++98/compatibility.cc (istream::ignore(streamsize))
(wistream::ignore(streamsize)): Likewise.
* src/c++98/istream.cc (istream::ignore(streamsize, char_type))
(wistream::ignore(streamsize, char_type)): Likewise.
* testsuite/27_io/basic_istream/ignore/char/94749.cc: Check that
delimiter is discarded if the number of characters ignored
doesn't fit in streamsize.
* testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc:
Likewise.
* testsuite/27_io/basic_istream/ignore/char/96161.cc: New test.
* testsuite/27_io/basic_istream/ignore/wchar_t/96161.cc: New test.

Tested powerpc64le-linux, i686-linux, committed to trunk.


commit ba8fe4b4832e30277f2e4a73b5d35b2e55074d07
Author: Jonathan Wakely 
Date:   Mon Jul 13 10:26:39 2020 +0100

libstdc++: Fix istream::ignore exit conditions (PR 94749, PR 96161)

My previous fix for PR 94749 did fix the reported case, so that the next
character is not discarded if it happens to equal the delimiter when __n
characters have already been read. But it introduced a new bug, which is
that the delimiter character would *not* be discarded if the number of
characters discarded is numeric_limits::max() or more before
reaching the delimiter.

The new bug happens because I changed the code to check _M_gcount < __n.
But when __n == numeric_limits::max() that is false, and so
we don't discard the delimiter. It's not sufficient to check for the
delimiter when the __large_ignore condition is true, because there's an
edge case where the delimiter is reached when _M_gcount == __n and so
we break out of the loop without setting __large_ignore.

PR 96161 is a similar bug to the original PR 94749 report, where eofbit
is set after discarding __n characters if there happen to be no more
characters in the stream.

This patch fixes both cases (and the regression) by checking different
conditions for the __n == max case and the __n < max case. For the
former case, we know that we must have either reached the delimiter or
EOF, and the value of _M_gcount doesn't matter (except to avoid integer
overflow). For the latter case we need to check _M_gcount first and only
set eofbit or discard the delimiter if it didn't reach __n. For the
latter case overflow can't happen because _M_gcount <= __n < max.

libstdc++-v3/ChangeLog:

PR libstdc++/94749
PR libstdc++/96161
* include/bits/istream.tcc (basic_istream::ignore(streamsize))
[n == max]: Check overflow conditions on _M_gcount. Rely on
the fact that either EOF or the delimiter was reached.
[n < max]: Check _M_gcount < n before checking for EOF or
delimiter.
(basic_istream::ignore(streamsize, char_type): Likewise.
* src/c++98/compatibility.cc (istream::ignore(streamsize))
(wistream::ignore(streamsize)): Likewise.
* src/c++98/istream.cc (istream::ignore(streamsize, char_type))

Re: [PATCH 4/4] doc: Clarify __builtin_return_address [PR94891]

2020-07-13 Thread Richard Sandiford
Szabolcs Nagy  writes:
> The expected semantics and valid usage of __builtin_return_address is
> not clear since it exposes implementation internals that are normally
> not meaningful to portable c code.
>
> This documentation change tries to clarify the semantics in case the
> return address is stored in a mangled form. This affects AArch64 when
> pointer authentication is used for the return address signing (i.e.
> -mbranch-protection=pac-ret).
>
> gcc/ChangeLog:
>
> 2020-06-04  Szabolcs Nagy  
>
>   * doc/extend.texi: Update the text for  __builtin_return_address.

LGTM apart from minor nits:

> ---
>  gcc/doc/extend.texi | 16 ++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index cced19d2018..0fd32a22599 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -11151,18 +11151,30 @@ The @var{level} argument must be a constant integer.
>  
>  On some machines it may be impossible to determine the return address of
>  any function other than the current one; in such cases, or when the top
> -of the stack has been reached, this function returns @code{0} or a
> -random value.  In addition, @code{__builtin_frame_address} may be used
> +of the stack has been reached, this function returns an unspecified
> +value.  In addition, @code{__builtin_frame_address} may be used
>  to determine if the top of the stack has been reached.
>  
>  Additional post-processing of the returned value may be needed, see
>  @code{__builtin_extract_return_addr}.
>  
> +The stored representation of the return address in memory may be different
> +from the address returned by @code{__builtin_return_address}.  For example

IMO reads better with a comma after “For example”.

> +on AArch64 the stored address may be mangled with return address signing.

Maybe add “whereas the address returned by @code{__builtin_return_address}
is not“.

> +
>  Calling this function with a nonzero argument can have unpredictable
>  effects, including crashing the calling program.  As a result, calls
>  that are considered unsafe are diagnosed when the @option{-Wframe-address}
>  option is in effect.  Such calls should only be made in debugging
>  situations.
> +
> +On targets where code addresses are representable as @code{void *},
> +@smallexample
> +void *addr = __builtin_extract_return_addr (__builtin_return_address (0))

Might as well add the ”;” too

> +@end smallexample
> +gives the code address where the current function would return.  For example

Same comment about the comma.

> +such address may be used with @code{dladdr} or other interfaces that work

“such an address”

> +with code addresses.
>  @end deftypefn
>  
>  @deftypefn {Built-in Function} {void *} __builtin_extract_return_addr (void 
> *@var{addr})

OK for trunk and branches with those changes.

Thanks,
Richard


Re: [PATCH 3/4] libgcc: fix the handling of return address mangling [PR94891]

2020-07-13 Thread Richard Sandiford
Szabolcs Nagy  writes:
> Mangling, currently only used on AArch64 for return address signing,
> is an internal representation that should not be exposed via
>
>   __builtin_return_address return value,
>   __builtin_eh_return handler argument,
>   _Unwind_DebugHook handler argument.
>
> Note that a mangled address might not even fit into a void *, e.g.
> with AArch64 ilp32 ABI the return address is stored as 64bit, so
> the mangled return address cannot be accessed via _Unwind_GetPtr.

Summarising what we talked about off-list wrt __builtin_eh_return
(in the context of this patch and 2/4):

* The unwinder routine is still a potential return-anywhere gadget with
  the current PAC-RET approach, since the caller of __builtin_eh_return
  has to sign a general address.  At best, the need to include the
  signing instruction in the gadget makes life harder for an attacker,
  if the signing instruction happens to be scheduled early.

* libgcc only provides an option to use PAC-RET + BTI together.

* With BTI, using a jump instead of a return gives better protection
  because it requires the EH handler address to be a valid jump target.
  The current PAC-RET approach is incompatible with doing that.

* BTI would make it harder for the jump itself to be used as a gadget.

* When BTI is enabled, it is safe to assume that all EH handlers already
  have a suitable BTI J.

* The current PAC-RET approach doesn't work for ILP32.

* There are no compatibility concerns with the patch, since this is
  essentially a private interface between libgcc and gcc.  (And in
  particular, we don't support building libgcc with any version of
  GCC except the one supplied with it.)

* Although glibc does have its own unwinder, it's only there to maintain
  compatiblity with ancient versions.  Those versions long predate aarch64
  support, so that unwinder isn't built for aarch64.  (And even if it were,
  it would expect the traditional interface instead of the current PAC one.)

The patch does have the potential to lower protection for processors
that implement PAC without BTI.  However, there's not much we can
reasonably do about that without creating a separate PAC-only mode
for libgcc.  And it isn't clear how useful such a mode would be.
A key feature of both PAC and BTI is that they use the NOP space,
and so it is safe to build PAC- and BTI-compatible binaries without
worrying about whether the target h/w supports them.  In particular,
even though PAC is a v8.3 feature and BTI is only a (backportable)
v8.5 feature, it is safe and useful to build v8.3 binaries with BTI
unconditionally.  Not many people are likely to want to compile-out
the extra BTI protection.

Given all the above, I agree that this is a good change to make.
And like you say, the affected interfaces are all specific to AArch64.

> This patch changes the unwinder hooks as follows:
>
> MD_POST_EXTRACT_ROOT_ADDR is removed: root address comes from
> __builtin_return_address which is not mangled.
>
> MD_POST_EXTRACT_FRAME_ADDR is renamed to MD_DEMANGLE_RETURN_ADDR,
> it now operates on _Unwind_Word instead of void *, so the hook
> should work when return address signing is enabled on AArch64 ilp32.
> (But for that __builtin_aarch64_autia1716 should be fixed to operate
> on 64bit input instead of a void *.)
>
> MD_POST_FROB_EH_HANDLER_ADDR is removed: it is the responsibility of
> __builtin_eh_return to do the mangling if necessary.
>
> libgcc/ChangeLog:
>
> 2020-06-04  Szabolcs Nagy  
>
>   * config/aarch64/aarch64-unwind.h (MD_POST_EXTRACT_ROOT_ADDR): Remove.
>   (MD_POST_FROB_EH_HANDLER_ADDR): Remove.
>   (MD_POST_EXTRACT_FRAME_ADDR): Rename to ...
>   (MD_DEMANGLE_RETURN_ADDR): This.
>   (aarch64_post_extract_frame_addr): Rename to ...
>   (aarch64_demangle_return_addr): This.
>   (aarch64_post_frob_eh_handler_addr): Remove.
>   * unwind-dw2.c (uw_update_context): Demangle return address.
>   (uw_frob_return_addr): Remove.

OK for trunk and branches, with a minor fix…

> @@ -57,9 +54,10 @@ aarch64_cie_signed_with_b_key (struct _Unwind_Context 
> *context)
> using CFA of current frame.  */
>  
>  static inline void *
> -aarch64_post_extract_frame_addr (struct _Unwind_Context *context,
> -  _Unwind_FrameState *fs, void *addr)
> +aarch64_demangle_return_addr (struct _Unwind_Context *context,
> +   _Unwind_FrameState *fs, _Unwind_Word addr_word)

The function comment should now refer to ADDR_WORD instead of ADDR.

Thanks,
Richard



[PATCH] fix global variable alignment for testcase gcc.dg/torture/pr96133.c

2020-07-13 Thread Richard Biener
The testcase was errorneously accessing the global variable via a
type that might require bigger alignment than provided.  Fix that
via an appropriate attribute.

2020-07-13  Richard Biener  

PR testsuite/96180
* gcc.dg/torture/pr96133.c: Align global variable.
---
 gcc/testsuite/gcc.dg/torture/pr96133.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr96133.c 
b/gcc/testsuite/gcc.dg/torture/pr96133.c
index 8d051ce2913..ac31a7141c4 100644
--- a/gcc/testsuite/gcc.dg/torture/pr96133.c
+++ b/gcc/testsuite/gcc.dg/torture/pr96133.c
@@ -1,7 +1,7 @@
 /* { dg-do run } */
 
 typedef int T;
-static const T a[2][3] = { { 1, 2, 3 }, { 4, 5, 6 } };
+static const T a[2][3] __attribute__((aligned(2*sizeof(T = { { 1, 2, 3 }, 
{ 4, 5, 6 } };
 typedef T v2 __attribute__((vector_size(2*sizeof(T;
 
 int
-- 
2.26.2


[PATCH 5/5] libstdc++: keep subtree sizes in pb_ds binary search trees (PR 81806)

2020-07-13 Thread Xi Ruoyao via Gcc-patches

> The fifth patch moves the functionality of tree_order_statistics_node_update
> into the implementation of binary search trees (they are now order-statistics
> trees), makes tree_order_statistics_node_update no-op, and deprecates it.

The patch is attached.

libstdc++-v3/ChangeLog:

* include/Makefile.am: Add
  include/ext/pb_ds/detail/bin_search_tree_/order_statistics_imps.hpp
  and
  include/ext/pb_ds/detail/ov_tree_map_/order_statistics_imps.hpp,
  remove
  include/ext/pb_ds/detail/tree_policy/order_statistics_imp.hpp.
* include/Makefile.in: Regenerate.
* include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
  (PB_DS_BIN_TREE_NAME::find_by_order): Declare new member function.
  (PB_DS_BIN_TREE_NAME::order_of_key): Likewise.
* include/ext/pb_ds/detail/ov_tree_map_/ov_tree_map_.hpp
  (PB_DS_OV_TREE_NAME::find_by_order): Likewise.
  (PB_DS_OV_TREE_NAME::order_of_key): Likewise.
* include/ext/pb_ds/detail/bin_search_tree_/order_statistics_imps.hpp:
  New file.
  (PB_DS_CLASS_C_DEC::find_by_order): Implement.
  (PB_DS_CLASS_C_DEC::order_of_key): Likewise.
* include/ext/pb_ds/detail/ov_tree_map_/order_statistics_imps.hpp:
  New file.
  (PB_DS_CLASS_C_DEC::find_by_order): Implement.
  (PB_DS_CLASS_C_DEC::order_of_key): Likewise.
* include/ext/pb_ds/detail/tree_policy/order_statistics_imp.hpp:
  Delete.
* include/ext/pb_ds/tree_policy.hpp
  (tree_order_statistics_node_update): Make it noop, and deprecate.
* testsuite/ext/pb_ds/example/tree_order_statistics.cc:
  Remove the usage of deprecated tree_order_statistics_node_update.
* testsuite/ext/pb_ds/example/tree_order_statistics_join.cc:
  Likewise.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University
From c0ee85f492872ba164ad3c1c9636aace1de02e96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?X=E2=84=B9=20Ruoyao?= 
Date: Sun, 12 Jul 2020 16:12:18 +0800
Subject: [PATCH 5/5] libstdc++: implement order statistics function in pb_ds
 tree maps

libstdc++-v3/ChangeLog:

	* include/Makefile.am: Add
	  include/ext/pb_ds/detail/bin_search_tree_/order_statistics_imps.hpp
	  and
	  include/ext/pb_ds/detail/ov_tree_map_/order_statistics_imps.hpp,
	  remove
	  include/ext/pb_ds/detail/tree_policy/order_statistics_imp.hpp.
	* include/Makefile.in: Regenerate.
	* include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
	  (PB_DS_BIN_TREE_NAME::find_by_order): Declare new member function.
	  (PB_DS_BIN_TREE_NAME::order_of_key): Likewise.
	* include/ext/pb_ds/detail/ov_tree_map_/ov_tree_map_.hpp
	  (PB_DS_OV_TREE_NAME::find_by_order): Likewise.
	  (PB_DS_OV_TREE_NAME::order_of_key): Likewise.
	* include/ext/pb_ds/detail/bin_search_tree_/order_statistics_imps.hpp:
	  New file.
	  (PB_DS_CLASS_C_DEC::find_by_order): Implement.
	  (PB_DS_CLASS_C_DEC::order_of_key): Likewise.
	* include/ext/pb_ds/detail/ov_tree_map_/order_statistics_imps.hpp:
	  New file.
	  (PB_DS_CLASS_C_DEC::find_by_order): Implement.
	  (PB_DS_CLASS_C_DEC::order_of_key): Likewise.
	* include/ext/pb_ds/detail/tree_policy/order_statistics_imp.hpp:
	  Delete.
	* include/ext/pb_ds/tree_policy.hpp
	  (tree_order_statistics_node_update): Make it noop, and deprecate.
	* testsuite/ext/pb_ds/example/tree_order_statistics.cc:
	  Remove the usage of deprecated tree_order_statistics_node_update.
	* testsuite/ext/pb_ds/example/tree_order_statistics_join.cc:
	  Likewise.
---
 libstdc++-v3/include/Makefile.am  |  3 +-
 libstdc++-v3/include/Makefile.in  |  3 +-
 .../bin_search_tree_/bin_search_tree_.hpp | 10 +++
 .../order_statistics_imps.hpp}| 50 ---
 .../ov_tree_map_/order_statistics_imps.hpp| 77 
 .../detail/ov_tree_map_/ov_tree_map_.hpp  | 10 +++
 .../include/ext/pb_ds/tree_policy.hpp | 88 ++-
 .../pb_ds/example/tree_order_statistics.cc|  9 +-
 .../example/tree_order_statistics_join.cc |  4 +-
 9 files changed, 125 insertions(+), 129 deletions(-)
 rename libstdc++-v3/include/ext/pb_ds/detail/{tree_policy/order_statistics_imp.hpp => bin_search_tree_/order_statistics_imps.hpp} (71%)
 create mode 100644 libstdc++-v3/include/ext/pb_ds/detail/ov_tree_map_/order_statistics_imps.hpp

diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index e131ce04f8c..604821ecda2 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -365,6 +365,7 @@ pb_headers2 = \
 	${pb_srcdir}/detail/bin_search_tree_/r_erase_fn_imps.hpp \
 	${pb_srcdir}/detail/bin_search_tree_/rotate_fn_imps.hpp \
 	${pb_srcdir}/detail/bin_search_tree_/split_join_fn_imps.hpp \
+	${pb_srcdir}/detail/bin_search_tree_/order_statistics_imps.hpp \
 	${pb_srcdir}/detail/bin_search_tree_/traits.hpp \
 	

[PATCH 4/5] libstdc++: keep subtree sizes in pb_ds binary search trees (PR 81806)

2020-07-13 Thread Xi Ruoyao via Gcc-patches

> The fourth patch converts the point_iterator of rb_tree and splay_tree based
> maps to random access iterator.  With the subtree size kept we can implement
> the
> operators required by random access iterator in logarithm time.

The patch is attached.

libstdc++-v3/ChangeLog:

* include/ext/pb_ds/detail/bin_search_tree_/point_iterators.hpp
  (bin_search_tree_const_it_::size_type): New typedef.
  (bin_search_tree_it_::difference_type): Likewise.
  (bin_search_tree_const_it_::inc): New overloads.
  (bin_search_tree_const_it_::dec): Likewise.
  (bin_search_tree_const_it_::order): New member function.
  (bin_search_tree_const_it_::operator+=): New operator.
  (bin_search_tree_const_it_::operator-=): Likewise.
  (bin_search_tree_const_it_::operator+): Likewise.
  (bin_search_tree_const_it_::operator-): Likewise.
  (bin_search_tree_const_it_::operator<): Likewise.
  (bin_search_tree_const_it_::operator<=): Likewise.
  (bin_search_tree_const_it_::operator>): Likewise.
  (bin_search_tree_const_it_::operator>=): Likewise.
  (bin_search_tree_const_it_::operator[]): Likewise.
  (bin_search_tree_it_::operator+=): Likewise.
  (bin_search_tree_it_::operator-=): Likewise.
  (bin_search_tree_it_::operator+): Likewise.
  (bin_search_tree_it_::operator-): Likewise.
  (bin_search_tree_it_::operator[]): Likewise.
  (bin_search_tree_const_it_::iterator_category):
  Change to std::random_access_iterator_tag.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University
From 5149d3156b0b1ae5d8cf82c54d041bd47451c995 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?X=E2=84=B9=20Ruoyao?= 
Date: Sat, 11 Jul 2020 23:32:36 +0800
Subject: [PATCH 4/5] libstdc++: make pb_ds binary search tree point iterator
 random accessible

libstdc++-v3/ChangeLog:

	* include/ext/pb_ds/detail/bin_search_tree_/point_iterators.hpp
	  (bin_search_tree_const_it_::size_type): New typedef.
	  (bin_search_tree_it_::difference_type): Likewise.
	  (bin_search_tree_const_it_::inc): New overloads.
	  (bin_search_tree_const_it_::dec): Likewise.
	  (bin_search_tree_const_it_::order): New member function.
	  (bin_search_tree_const_it_::operator+=): New operator.
	  (bin_search_tree_const_it_::operator-=): Likewise.
	  (bin_search_tree_const_it_::operator+): Likewise.
	  (bin_search_tree_const_it_::operator-): Likewise.
	  (bin_search_tree_const_it_::operator<): Likewise.
	  (bin_search_tree_const_it_::operator<=): Likewise.
	  (bin_search_tree_const_it_::operator>): Likewise.
	  (bin_search_tree_const_it_::operator>=): Likewise.
	  (bin_search_tree_const_it_::operator[]): Likewise.
	  (bin_search_tree_it_::operator+=): Likewise.
	  (bin_search_tree_it_::operator-=): Likewise.
	  (bin_search_tree_it_::operator+): Likewise.
	  (bin_search_tree_it_::operator-): Likewise.
	  (bin_search_tree_it_::operator[]): Likewise.
	  (bin_search_tree_const_it_::iterator_category):
	  Change to std::random_access_iterator_tag.
---
 .../bin_search_tree_/point_iterators.hpp  | 277 +-
 1 file changed, 276 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/point_iterators.hpp b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/point_iterators.hpp
index eb2f4fdbfd2..f7efe79290a 100644
--- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/point_iterators.hpp
+++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/point_iterators.hpp
@@ -105,8 +105,9 @@ namespace __gnu_pbds
 class bin_search_tree_const_it_
 {
 public:
-  typedef std::bidirectional_iterator_tag 		iterator_category;
+  typedef std::random_access_iterator_tag 		iterator_category;
   typedef typename _Alloc::difference_type 	difference_type;
+  typedef typename _Alloc::size_type		size_type;
   typedef Value_Type value_type;
   typedef Pointer 	pointer;
   typedef Const_Pointer const_pointer;
@@ -185,6 +186,28 @@ namespace __gnu_pbds
 	return ret_it;
   }
 
+  inline PB_DS_TREE_CONST_IT_C_DEC&
+  operator+=(difference_type dif)
+  {
+inc(dif, integral_constant());
+return *this;
+  }
+
+  inline PB_DS_TREE_CONST_IT_C_DEC
+  operator+(difference_type dif) const
+  {
+PB_DS_TREE_CONST_IT_C_DEC ret_it(m_p_nd);
+ret_it += dif;
+return ret_it;
+  }
+
+  inline PB_DS_TREE_CONST_IT_C_DEC&
+  operator-=(difference_type dif)
+  {
+dec(dif, integral_constant());
+return *this;
+  }
+
   inline PB_DS_TREE_CONST_IT_C_DEC& 
   operator--()
   {
@@ -200,6 +223,54 @@ namespace __gnu_pbds
 	return ret_it;
   }
 
+  inline PB_DS_TREE_CONST_IT_C_DEC
+  operator-(difference_type dif) const
+  {
+PB_DS_TREE_CONST_IT_C_DEC ret_it(m_p_nd);
+ret_it -= dif;
+return ret_it;
+   

[PATCH 3/5] libstdc++: keep subtree sizes in pb_ds binary search trees (PR 81806)

2020-07-13 Thread Xi Ruoyao via Gcc-patches

> The second and third patch together resolve PR 81806.

The attached patch modifies split_finish to use the subtree size we maintained
in the previous patch, resolving libstdc++/81806.

libstdc++-v3/ChangeLog:

PR libstdc++/81806
* include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp
  (split_finish): Use maintained size, instead of calling
  std::distance.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University
From 4434da1b2b45797204f4fd978dcc4fbba4b17c6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?X=E2=84=B9=20Ruoyao?= 
Date: Fri, 10 Jul 2020 21:38:09 +0800
Subject: [PATCH 3/5] libstdc++: use maintained size when split pb_ds binary
 search trees

libstdc++-v3/ChangeLog:

	PR libstdc++/81806
	* include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp
	  (split_finish): Use maintained size, instead of calling
	  std::distance.
---
 .../ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp
index d08288f186d..fb924b4434b 100644
--- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp
+++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp
@@ -133,7 +133,9 @@ PB_DS_CLASS_C_DEC::
 split_finish(PB_DS_CLASS_C_DEC& other)
 {
   other.initialize_min_max();
-  other.m_size = std::distance(other.begin(), other.end());
+  other.m_size = 0;
+  if (other.m_p_head->m_p_parent != 0)
+other.m_size = other.m_p_head->m_p_parent->m_subtree_size;
   m_size -= other.m_size;
   initialize_min_max();
   PB_DS_ASSERT_VALID((*this))
-- 
2.27.0



[PATCH 2/5] libstdc++: keep subtree sizes in pb_ds binary search trees (PR 81806)

2020-07-13 Thread Xi Ruoyao via Gcc-patches

> The second and third patch together resolve PR 81806.

The attached patch keeps the subtree size in binary search tree nodes.

libstdc++-v3/ChangeLog:

* include/ext/pb_ds/detail/rb_tree_map_/node.hpp
  (rb_tree_node_::size_type): New typedef.
  (rb_tree_node_::m_subtree_size): New field.
* include/ext/pb_ds/detail/splay_tree_map_/node.hpp
  (splay_tree_node_::size_type): New typedef.
  (splay_tree_node_::m_subtree_size): New field.
* include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
  (PB_DS_BIN_TREE_NAME::update_subtree_size): Declare new member
  function.
* include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp
  (update_subtree_size): Define.
  (apply_update, update_to_top): Call update_subtree_size.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University
From 248ab526b766070d348d676ebf9f9c7b16c3a93e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?X=E2=84=B9=20Ruoyao?= 
Date: Fri, 10 Jul 2020 20:58:04 +0800
Subject: [PATCH 2/5] libstdc++: maintain subtree size in pb_ds binary search
 trees

libstdc++-v3/ChangeLog:

	* include/ext/pb_ds/detail/rb_tree_map_/node.hpp
	  (rb_tree_node_::size_type): New typedef.
	  (rb_tree_node_::m_subtree_size): New field.
	* include/ext/pb_ds/detail/splay_tree_map_/node.hpp
	  (splay_tree_node_::size_type): New typedef.
	  (splay_tree_node_::m_subtree_size): New field.
	* include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
	  (PB_DS_BIN_TREE_NAME::update_subtree_size): Declare new member
	  function.
	* include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp
	  (update_subtree_size): Define.
	  (apply_update, update_to_top): Call update_subtree_size.
---
 .../bin_search_tree_/bin_search_tree_.hpp |  3 ++
 .../bin_search_tree_/rotate_fn_imps.hpp   | 31 ---
 .../ext/pb_ds/detail/rb_tree_map_/node.hpp|  8 +
 .../ext/pb_ds/detail/splay_tree_/node.hpp |  8 +
 4 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
index 32dcb6ee020..38a3bab0444 100644
--- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
+++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
@@ -304,6 +304,9 @@ namespace __gnu_pbds
   inline void
   rotate_parent(node_pointer);
 
+  inline void
+  update_subtree_size(node_pointer);
+
   inline void
   apply_update(node_pointer, null_node_update_pointer);
 
diff --git a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp
index 1f18243a859..72ccfeb16a2 100644
--- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp
+++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp
@@ -122,8 +122,23 @@ rotate_parent(node_pointer p_nd)
 PB_DS_CLASS_T_DEC
 inline void
 PB_DS_CLASS_C_DEC::
-apply_update(node_pointer /*p_nd*/, null_node_update_pointer /*p_update*/)
-{ }
+update_subtree_size(node_pointer p_nd)
+{
+  size_type size = 1;
+  if (p_nd->m_p_left)
+size += p_nd->m_p_left->m_subtree_size;
+  if (p_nd->m_p_right)
+size += p_nd->m_p_right->m_subtree_size;
+  p_nd->m_subtree_size = size;
+}
+
+PB_DS_CLASS_T_DEC
+inline void
+PB_DS_CLASS_C_DEC::
+apply_update(node_pointer p_nd, null_node_update_pointer /*p_update*/)
+{
+  update_subtree_size(p_nd);
+}
 
 PB_DS_CLASS_T_DEC
 template
@@ -131,6 +146,7 @@ inline void
 PB_DS_CLASS_C_DEC::
 apply_update(node_pointer p_nd, Node_Update_*  /*p_update*/)
 {
+  update_subtree_size(p_nd);
   node_update::operator()(node_iterator(p_nd),
 			  node_const_iterator(static_cast(0)));
 }
@@ -152,7 +168,14 @@ update_to_top(node_pointer p_nd, Node_Update_* p_update)
 PB_DS_CLASS_T_DEC
 inline void
 PB_DS_CLASS_C_DEC::
-update_to_top(node_pointer /*p_nd*/, null_node_update_pointer /*p_update*/)
-{ }
+update_to_top(node_pointer p_nd, null_node_update_pointer /*p_update */)
+{
+  while (p_nd != m_p_head)
+{
+  update_subtree_size(p_nd);
+
+  p_nd = p_nd->m_p_parent;
+}
+}
 
 #endif
diff --git a/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp b/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp
index de2d92a59b6..802562ad4f2 100644
--- a/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp
+++ b/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp
@@ -58,6 +58,9 @@ namespace __gnu_pbds
   typedef typename rebind_traits<_Alloc, rb_tree_node_>::pointer
 	node_pointer;
 
+  typedef typename rebind_traits<_Alloc, rb_tree_node_>::size_type
+	size_type;
+
   typedef typename rebind_traits<_Alloc, metadata_type>::reference
 	metadata_reference;
 
@@ -88,6 +91,7 @@ namespace __gnu_pbds
   node_pointer 	m_p_left;
   

[PATCH 1/5] libstdc++: keep subtree sizes in pb_ds binary search trees (PR 81806)

2020-07-13 Thread Xi Ruoyao via Gcc-patches
> The first patch removes two redundant statements which are confusing.  It
> should
> be applied anyway, disregarding other patches.

The patch is attached, to prevent my mail client from destroying it :(.

Please ignore a previous duplication of this mail with wrong title :(.

libstdc++-v3/ChangeLog:

* include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
  (insert_leaf_new, insert_imp_empty): remove redundant statements.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University
From 4eea45261ebf974ddf02f6154166c5cb6aa180da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?X=E2=84=B9=20Ruoyao?= 
Date: Fri, 10 Jul 2020 20:10:52 +0800
Subject: [PATCH 1/5] libstdc++: remove two redundant statements in pb_ds
 binary tree

libstdc++-v3/ChangeLog:

	* include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
	  (insert_leaf_new, insert_imp_empty): remove redundant statements.
---
 .../ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp| 2 --
 1 file changed, 2 deletions(-)

diff --git a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
index 3942da05600..bdc10379af6 100644
--- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
+++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
@@ -122,7 +122,6 @@ insert_leaf_new(const_reference r_value, node_pointer p_nd, bool left_nd)
 }
 
   p_new_nd->m_p_parent = p_nd;
-  p_new_nd->m_p_left = p_new_nd->m_p_right = 0;
   PB_DS_ASSERT_NODE_CONSISTENT(p_nd)
 
   update_to_top(p_new_nd, (node_update* )this);
@@ -142,7 +141,6 @@ insert_imp_empty(const_reference r_value)
 m_p_head->m_p_parent = p_new_node;
 
   p_new_node->m_p_parent = m_p_head;
-  p_new_node->m_p_left = p_new_node->m_p_right = 0;
   _GLIBCXX_DEBUG_ONLY(debug_base::insert_new(PB_DS_V2F(r_value));)
 
   update_to_top(m_p_head->m_p_parent, (node_update*)this);
-- 
2.27.0



*PING* Re: [Patch] [OpenMP, Fortran] Add structure/derived-type element mapping

2020-07-13 Thread Tobias Burnus

*PING*

On 6/24/20 7:32 PM, Tobias Burnus wrote:

(OpenMP 5 extends this a lot, but this is about OpenMP 4.5.
It touches code which is also used by OpenACC's attach/detach.)

@OpenACC/Julian: I think the character attach/detach for
deferred-length strings does not work properly with OpenACC;
I did not touch this code – but I think it needs some love.

This code adds support for
  map(dt%comp, dt%comp2)
where "comp" can be either a nonpointer, nonallocatable element
scalar, array or array section. Or it can be a pointer - where
character strings are one complication as for deferred-length
ones, the length is stored in an extra DT component.

While testing, I encountered two bugs, one relating to kind=4
character string (patch pending review; PR95837)
not part of testcase) and one related to deferred-length
character strings (commented in the test case; larger issue;
PR95868).

Like always, some more tests/testcase probably would not harm.

Regarding the patch:

(a) openmp.c:
This enabled component matching for 'map(' and
piggybacks on the OpenACC code for the checks. I think that
some additional checks might be useful – and I hope that no
check is too strict.
The "depend" clause was excluded as one otherwise gets a
testsuite fails due to the is-contiguous check.

(b) trans-openmp.c:
- gfc_trans_omp_clauses now has a "bool openacc".
- GOMP_MAP_ATTACH_DETACH is replaced by GOMP_MAP_ALWAYS_POINTER
- For arrays, the mapping of the descriptor is squeezed before
  "node" which contains the data transfer (var.desc.data mapping
  followed by the always_pointer for the mapping).
  In this array case, the latter gets a pointless cast in order
  to prevent that for both var.desc and var.desc.data memory gets
  allocated in the struct.
  → That's also the reason the big switch table is moved up.
- For deferred-length strings, the string-length is in an extra
  struct element (derived-type component) and will be mapped in
  addition.
- Bugs in the previous version:
  * gfc_trans_omp_array_section for "element == true", the size
of a pointer instead of the size of the element was mapped.
  * For string variables (with constant length) the kind=4 was
not properly handled.
  * Allocatable scalars were not handled – missing second clause
for the always_pointer (and attach_detach, I assume)

Comments, remarks, suggestions?
Otherwise: OK for the trunk?

Tobias


-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [PATCH 0/5] libstdc++: keep subtree sizes in pb_ds binary search trees (PR 81806)

2020-07-13 Thread Xi Ruoyao via Gcc-patches

> The first patch removes two redundant statements which are confusing.  It
> should
> be applied anyway, disregarding other patches.

The patch is attached, to prevent my mail client from destroying it :(.

libstdc++-v3/ChangeLog:

* include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
  (insert_leaf_new, insert_imp_empty): remove redundant statements.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University
From 4eea45261ebf974ddf02f6154166c5cb6aa180da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?X=E2=84=B9=20Ruoyao?= 
Date: Fri, 10 Jul 2020 20:10:52 +0800
Subject: [PATCH 1/5] libstdc++: remove two redundant statements in pb_ds
 binary tree

libstdc++-v3/ChangeLog:

	* include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
	  (insert_leaf_new, insert_imp_empty): remove redundant statements.
---
 .../ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp| 2 --
 1 file changed, 2 deletions(-)

diff --git a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
index 3942da05600..bdc10379af6 100644
--- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
+++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
@@ -122,7 +122,6 @@ insert_leaf_new(const_reference r_value, node_pointer p_nd, bool left_nd)
 }
 
   p_new_nd->m_p_parent = p_nd;
-  p_new_nd->m_p_left = p_new_nd->m_p_right = 0;
   PB_DS_ASSERT_NODE_CONSISTENT(p_nd)
 
   update_to_top(p_new_nd, (node_update* )this);
@@ -142,7 +141,6 @@ insert_imp_empty(const_reference r_value)
 m_p_head->m_p_parent = p_new_node;
 
   p_new_node->m_p_parent = m_p_head;
-  p_new_node->m_p_left = p_new_node->m_p_right = 0;
   _GLIBCXX_DEBUG_ONLY(debug_base::insert_new(PB_DS_V2F(r_value));)
 
   update_to_top(m_p_head->m_p_parent, (node_update*)this);
-- 
2.27.0



[cris 4/4] cris: Add new pass eliminating compares after delay-slot-filling

2020-07-13 Thread Hans-Peter Nilsson via Gcc-patches
(All patches are committed.)

Delayed-branch-slot-filling a.k.a. reorg or dbr, often causes
opportunities for more compare-elimination than were visible for
the cmpelim pass.  With cc0, these were caught by the
elimination pass run in "final", thus the missed opportunities
is a regression.  A simple reorg-aware pass run just after reorg
handles most of them, if not all.  I chose to keep the "mach2"
pass identifier string I copy-pasted from the SPARC port instead
of inventing one like "postdbr_cmpelim".  Note the gap in numbers
in the test-case file names.

gcc:
PR target/93372
* config/cris/cris-passes.def: New file.
* config/cris/t-cris (PASSES_EXTRA): Add cris-passes.def.
* config/cris/cris.c: Add infrastructure bits and pass execute
function cris_postdbr_cmpelim.
* config/cris/cris-protos.h (make_pass_cris_postdbr_cmpelim): Declare.

gcc/testsuite:
* gcc.target/cris/pr93372-44.c, gcc.target/cris/pr93372-46.c: New.
---
 gcc/config/cris/cris-passes.def|  20 +++
 gcc/config/cris/cris-protos.h  |   2 +
 gcc/config/cris/cris.c | 202 +
 gcc/config/cris/t-cris |   2 +
 gcc/testsuite/gcc.target/cris/pr93372-44.c |  13 ++
 gcc/testsuite/gcc.target/cris/pr93372-46.c |  16 +++
 6 files changed, 255 insertions(+)
 create mode 100644 gcc/config/cris/cris-passes.def
 create mode 100644 gcc/testsuite/gcc.target/cris/pr93372-44.c
 create mode 100644 gcc/testsuite/gcc.target/cris/pr93372-46.c

diff --git a/gcc/config/cris/cris-passes.def b/gcc/config/cris/cris-passes.def
new file mode 100644
index 000..db3c74d4978
--- /dev/null
+++ b/gcc/config/cris/cris-passes.def
@@ -0,0 +1,20 @@
+/* Description of target passes for Visium.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+INSERT_PASS_AFTER (pass_delay_slots, 1, pass_cris_postdbr_cmpelim);
diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h
index 2db1ea1b8bc..053bba90df4 100644
--- a/gcc/config/cris/cris-protos.h
+++ b/gcc/config/cris/cris-protos.h
@@ -60,3 +60,5 @@ extern int cris_fatal (char *);
 extern int cris_initial_elimination_offset (int, int);
 
 extern void cris_init_expanders (void);
+
+extern rtl_opt_pass *make_pass_cris_postdbr_cmpelim (gcc::context *);
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
index b26b9f2e883..59cbceef7a4 100644
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -51,6 +51,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "output.h"
 #include "tm-constrs.h"
 #include "builtins.h"
+#include "cfgrtl.h"
+#include "tree-pass.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -129,6 +131,8 @@ static void cris_asm_output_mi_thunk
 static void cris_file_start (void);
 static void cris_init_libfuncs (void);
 
+static unsigned int cris_postdbr_cmpelim (void);
+
 static reg_class_t cris_preferred_reload_class (rtx, reg_class_t);
 
 static int cris_register_move_cost (machine_mode, reg_class_t, reg_class_t);
@@ -298,6 +302,204 @@ int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION;
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
+namespace {
+
+const pass_data pass_data_cris_postdbr_cmpelim =
+{
+  RTL_PASS, /* type */
+  "mach2", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_cris_postdbr_cmpelim : public rtl_opt_pass
+{
+public:
+  pass_cris_postdbr_cmpelim (gcc::context *ctxt)
+: rtl_opt_pass (pass_data_cris_postdbr_cmpelim, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual unsigned int execute (function *)
+{
+  return cris_postdbr_cmpelim ();
+}
+
+  /* No use running this if reorg and cmpelim aren't both run.  */
+  virtual bool gate (function *)
+{
+  return
+   optimize > 0
+   && flag_delayed_branch
+   && flag_compare_elim_after_reload;
+}
+};
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_cris_postdbr_cmpelim (gcc::context *ctxt)
+{
+  return new pass_cris_postdbr_cmpelim (ctxt);
+}
+
+/* "Cheap version" of cmpelim, making use of the opportunities opened up
+   by reorg.
+
+   Go through the insns of a 

[PATCH 0/5] libstdc++: keep subtree sizes in pb_ds binary search trees (PR 81806)

2020-07-13 Thread Xi Ruoyao via Gcc-patches
This is a series of patches essentially adding order statistics directly into
pb_ds binary search trees.  The main purpose is to resolve PR 81806 (spliting a
splay tree needs linear time).

The first patch removes two redundant statements which are confusing.  It should
be applied anyway, disregarding other patches.

The second and third patch together resolve PR 81806.

The fourth patch converts the point_iterator of rb_tree and splay_tree based
maps to random access iterator.  With the subtree size kept we can implement the
operators required by random access iterator in logarithm time.

The fifth patch moves the functionality of tree_order_statistics_node_update
into the implementation of binary search trees (they are now order-statistics
trees), makes tree_order_statistics_node_update no-op, and deprecates it.

Tested with `make check-target-libstdc++` in a non-bootstrap GCC build.  GCC
does not use pb_ds so it should be unnecessary to run a bootstrap.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University



[cris 2/4] cris: Use addi.b for additions where flags aren't inspected

2020-07-13 Thread Hans-Peter Nilsson via Gcc-patches
Comparing to the cc0 version of the CRIS port, I ran a few
microbenchmarks, for example gcc.c-torture/execute/arith-rand.c,
where there's sometimes an addition between an operation of
interest and the test on the result.

Unfortunately this patch doesn't remedy all the performance
regression for that program.  But, this patch by itself helps
and makes sense to commit separately: lots of addi.b in
previously empty delay-slots, with functions shortened by one or
a few insns, in libgcc.  I had an experience with the
reload-related caveat of % on constraints, which is "fixed"
documentationwise since long (soon 15 years ago;
be3914df4cc8/r105517).  I removed an even older related FIXME.

gcc:
PR target/93372
* config/cris/cris.md ("*add3_addi"): New splitter.
("*addi_b_"): New pattern.
("*addsi3"): Remove stale %-related comment.

gcc/testsuite:
PR target/93372
* gcc.target/cris/pr93372-45.c: New test.
---
 gcc/config/cris/cris.md| 40 +-
 gcc/testsuite/gcc.target/cris/pr93372-45.c | 13 ++
 2 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/cris/pr93372-45.c

diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 074f5234402..efafb5b1be1 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -973,7 +973,6 @@ (define_insn "*addsi3"
 ;; The last constraint is due to that after reload, the '%' is not
 ;; honored, and canonicalization doesn't care about keeping the same
 ;; register as in destination.  This will happen after insn splitting.
-;; gcc <= 2.7.2.  FIXME: Check for gcc-2.9x
 
  ""
 {
@@ -1291,6 +1290,45 @@ (define_insn "*addi"
   [(set_attr "slottable" "yes")
(set_attr "cc" "none")])
 
+;; This pattern is usually generated after reload, so a '%' is
+;; ineffective; use explicit combinations.
+(define_insn "*addi_b_"
+  [(set (match_operand:BWD 0 "register_operand" "=r,r")
+   (plus:BWD
+(match_operand:BWD 1 "register_operand" "0,r")
+(match_operand:BWD 2 "register_operand" "r,0")))]
+  ""
+  "@
+   addi %2.b,%0
+   addi %1.b,%0"
+  [(set_attr "slottable" "yes")])
+
+;; Strip the dccr clobber from addM3 with register operands, if the
+;; next instruction isn't using it.
+;; Not clobbering dccr may let cmpelim match a later compare with a
+;; previous operation of interest.  This has to run before cmpelim so it
+;; can't be a peephole2.  See gcc.target/cris/pr93372-45.c for a
+;; test-case.
+(define_split ;; "*add3_addi"
+  [(parallel
+[(set (match_operand:BWD 0 "register_operand")
+ (plus:BWD
+  (match_operand:BWD 1 "register_operand")
+  (match_operand:BWD 2 "register_operand")))
+ (clobber (reg:CC CRIS_CC0_REGNUM))])]
+  "reload_completed"
+  [(set (match_dup 0) (plus:BWD (match_dup 1) (match_dup 2)))]
+{
+  rtx reg = operands[0];
+  rtx_insn *i = next_nonnote_nondebug_insn_bb (curr_insn);
+
+  while (i != NULL_RTX && (!INSN_P (i) || DEBUG_INSN_P (i)))
+i = next_nonnote_nondebug_insn_bb (i);
+
+  if (i == NULL_RTX || reg_mentioned_p (reg, i) || BARRIER_P (i))
+FAIL;
+})
+
 (define_insn "mul3"
   [(set (match_operand:WD 0 "register_operand" "=r")
(mult:WD
diff --git a/gcc/testsuite/gcc.target/cris/pr93372-45.c 
b/gcc/testsuite/gcc.target/cris/pr93372-45.c
new file mode 100644
index 000..4fb7a9ac6fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/pr93372-45.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not {\tcmp|\ttest} } } */
+
+extern void foo(void);
+unsigned int x (unsigned int b, unsigned int a, unsigned int *c)
+{
+  unsigned int y = a & 15;
+  unsigned int z = y + b;
+  if (y == 0)
+*c = z;
+  return z;
+}
-- 
2.11.0



[cris 3/4] cris: Remove config/cris/t-cris gt-cris.h cargo

2020-07-13 Thread Hans-Peter Nilsson via Gcc-patches
Getting tired of:

make[1]: Entering directory 'x/gccobj/gcc'
Makefile:2682: warning: overriding recipe for target 'gt-cris.h'
xx/gcc/gcc/config/cris/t-cris:29: warning: ignoring old recipe for target 
'gt-cris.h'

I'm just going to assume it is just stale cruft no longer (if
ever) needed since nothing else but sh/t-sh has it, and the
commit log shows just (x prepended to avoid commit-log parsing
confusion):
xMerge from pch-branch up to tag pch-commit-20020603.
x
xFrom-SVN: r54232

Building "works better"; the related warning is gone.

This effectively empties the t-cris file, but stuff will be
added soon enough that it's kept around.

gcc:
* config/cris/t-cris: Remove gt-cris.h-related excessive cargo.
---
 gcc/config/cris/t-cris | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcc/config/cris/t-cris b/gcc/config/cris/t-cris
index af5535bde41..97d6efa534b 100644
--- a/gcc/config/cris/t-cris
+++ b/gcc/config/cris/t-cris
@@ -24,6 +24,3 @@
 # The makefile macros etc. are included in the order found in the
 # section "Target Fragment" in the gcc info-files (or the paper copy) of
 # "Using and Porting GCC"
-
-$(out_object_file): gt-cris.h
-gt-cris.h : s-gtype ; @true
-- 
2.11.0



[cris 1/4] cris: Correct output templates in define_subst patterns.

2020-07-13 Thread Hans-Peter Nilsson via Gcc-patches
Whoops.  This little gem had the effect of making the output
operand (0) constraints disappear but not the input operand (1)
constraints for define_subst:ed patterns, probably because
there's another (match_dup 1) in the output template (not
investigated).

That went surprisingly unnoticed until I added a pass leaning
just a little bit harder on the define_subst:ed patterns and
then only by the libgfortran library generating assembly with
nominally incorrect syntax.  (There was a move to a special
register from a general register, and it incorrectly matched a
pattern affecting condition codes.)

gcc:
* config/cris/cris.md ("setnz_subst", "setnz_subst", "setcc_subst"):
Use match_dup in output template, not match_operand.
---
 gcc/config/cris/cris.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index c36a5402be3..074f5234402 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -275,7 +275,7 @@ (define_subst "setnz_subst"
   "reload_completed"
   [(set (reg:CC_NZ CRIS_CC0_REGNUM)
(compare:CC_NZ (match_dup 1) (const_int 0)))
-   (set (match_operand 0) (match_operand 1))])
+   (set (match_dup 0) (match_dup 1))])
 
 (define_subst_attr "setnzvc" "setnzvc_subst" "" "_setnzvc")
 (define_subst_attr "ccnzvc" "setnzvc_subst" "" "_enabled")
@@ -288,7 +288,7 @@ (define_subst "setnzvc_subst"
   "reload_completed"
   [(set (reg:CC_NZVC CRIS_CC0_REGNUM)
(compare:CC_NZVC (match_dup 1) (const_int 0)))
-   (set (match_operand 0) (match_operand 1))])
+   (set (match_dup 0) (match_dup 1))])
 
 (define_subst_attr "setcc" "setcc_subst" "" "_setcc")
 (define_subst_attr "" "setcc_subst" "" "_enabled")
@@ -301,7 +301,7 @@ (define_subst "setcc_subst"
   "reload_completed"
   [(set (reg:CC CRIS_CC0_REGNUM)
(compare:CC (match_dup 1) (const_int 0)))
-   (set (match_operand 0) (match_operand 1))])
+   (set (match_dup 0) (match_dup 1))])
 
 ;; Operand and operator predicates.
 
-- 
2.11.0



[PATCH] Fix _mm512_{,mask_}cmp*_p[ds]_mask at -O0 [PR96174]

2020-07-13 Thread Jakub Jelinek via Gcc-patches
Hi!

The _mm512_{,mask_}cmp_p[ds]_mask and also _mm_{,mask_}cmp_s[ds]_mask
intrinsics have an argument which must have a constant passed to it
and so use an inline version only for ifdef __OPTIMIZE__ and have
a #define for -O0.  But the _mm512_{,mask_}cmp*_p[ds]_mask intrinsics
don't need a constant argument, they are essentially the first
set with the constant added to them implicitly based on the comparison
name, and so there is no #define version for them (correctly).
But their inline versions are defined in between the first and s[ds]
set and so inside of ifdef __OPTIMIZE__, which means that with -O0
they aren't defined at all.

This patch fixes that by moving those after the #ifdef __OPTIMIZE #else
use #define #endif block.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/10.2?

2020-07-13  Jakub Jelinek  

PR target/96174
* config/i386/avx512fintrin.h (_mm512_cmpeq_pd_mask,
_mm512_mask_cmpeq_pd_mask, _mm512_cmplt_pd_mask,
_mm512_mask_cmplt_pd_mask, _mm512_cmple_pd_mask,
_mm512_mask_cmple_pd_mask, _mm512_cmpunord_pd_mask,
_mm512_mask_cmpunord_pd_mask, _mm512_cmpneq_pd_mask,
_mm512_mask_cmpneq_pd_mask, _mm512_cmpnlt_pd_mask,
_mm512_mask_cmpnlt_pd_mask, _mm512_cmpnle_pd_mask,
_mm512_mask_cmpnle_pd_mask, _mm512_cmpord_pd_mask,
_mm512_mask_cmpord_pd_mask, _mm512_cmpeq_ps_mask,
_mm512_mask_cmpeq_ps_mask, _mm512_cmplt_ps_mask,
_mm512_mask_cmplt_ps_mask, _mm512_cmple_ps_mask,
_mm512_mask_cmple_ps_mask, _mm512_cmpunord_ps_mask,
_mm512_mask_cmpunord_ps_mask, _mm512_cmpneq_ps_mask,
_mm512_mask_cmpneq_ps_mask, _mm512_cmpnlt_ps_mask,
_mm512_mask_cmpnlt_ps_mask, _mm512_cmpnle_ps_mask,
_mm512_mask_cmpnle_ps_mask, _mm512_cmpord_ps_mask,
_mm512_mask_cmpord_ps_mask): Move outside of __OPTIMIZE__ guarded
section.

* gcc.target/i386/avx512f-vcmppd-3.c: New test.
* gcc.target/i386/avx512f-vcmpps-3.c: New test.

--- gcc/config/i386/avx512fintrin.h.jj  2020-05-28 16:25:00.053715690 +0200
+++ gcc/config/i386/avx512fintrin.h 2020-07-12 23:18:41.042696025 +0200
@@ -15127,6 +15127,88 @@ _mm512_mask_cmp_pd_mask (__mmask8 __U, _
 
 extern __inline __mmask8
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+  (__v2df) __Y, __P,
+  (__mmask8) -1,
+  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+  (__v2df) __Y, __P,
+  (__mmask8) __M,
+  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+  (__v4sf) __Y, __P,
+  (__mmask8) -1,
+  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+  (__v4sf) __Y, __P,
+  (__mmask8) __M,
+  _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm512_cmp_pd_mask(X, Y, P)\
+  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),  \
+   (__v8df)(__m512d)(Y), (int)(P),\
+   
(__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_cmp_ps_mask(X, Y, P)\
+  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
+(__v16sf)(__m512)(Y), (int)(P),\
+
(__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_cmp_pd_mask(M, X, Y, P)
\
+  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),  \
+   (__v8df)(__m512d)(Y), (int)(P),\
+   (__mmask8)(M), 
_MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_cmp_ps_mask(M, X, Y, P)  

[cris 0/4] various cc0-related (and not) regression fixes

2020-07-13 Thread Hans-Peter Nilsson via Gcc-patches
Together with the combine.c patch posted (but remaining a WIP),
all coremark performance regressions are gone for CRIS, compared
to cc0.  Unfortunately, I looked further, and found some issues
when running gcc.c-torture/execute/arith-rand.c and
arith-rand-ll.c, in those functions and the target-specific
library division code (umod, udiv, div).

What remains after this series is a fix for reorg.c, for (IIRC)
fill_slots_from_thread, to do a "filter_flags" approach like in
33c2207d3fda for fill_simple_delay_slots.  Apparently cc0
targets still get better treatment there: there's a delay-slot
not filled with TARGET_FLAGS_REGNUM, seen in e.g.
random_bitstring.  Heads-up to Eric, who did that in 2015.

brgds, H-P


[PATCH v2] [RISC-V] Add support for TLS stack protector canary access

2020-07-13 Thread cooper via Gcc-patches
gcc/
* config/riscv/riscv-opts.h (stack_protector_guard): New enum.
* config/riscv/riscv.c (riscv_option_override): Handle
the new options.
* config/riscv/riscv.md (stack_protect_set): New pattern to handle
flexible stack protector guard settings.
(stack_protect_set_): Ditto.
(stack_protect_test): Ditto.
(stack_protect_test_): Ditto.
* config/riscv/riscv.opt (mstack-protector-guard=,
mstack-protector-guard-reg=, mstack-protector-guard-offset=): New
options.
* doc/invoke.texi (Option Summary) [RISC-V Options]:
Add -mstack-protector-guard=, -mstack-protector-guard-reg=, and
-mstack-protector-guard-offset=.
(RISC-V Options): Ditto.

Signed-off-by: cooper 
Signed-off-by: Guo Ren 
---
 gcc/config/riscv/riscv-opts.h |  6 +++
 gcc/config/riscv/riscv.c  | 47 
 gcc/config/riscv/riscv.md | 80 +++
 gcc/config/riscv/riscv.opt| 28 
 gcc/doc/invoke.texi   | 22 +-
 5 files changed, 182 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 8f12e50b9f1..2a3f9d9eef5 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -51,4 +51,10 @@ enum riscv_align_data {
   riscv_align_data_type_natural
 };
 
+/* Where to get the canary for the stack protector.  */
+enum stack_protector_guard {
+  SSP_TLS, /* per-thread canary in TLS block */
+  SSP_GLOBAL   /* global canary */
+};
+
 #endif /* ! GCC_RISCV_OPTS_H */
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index bfb3885ed08..63b0c3877b0 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -4775,6 +4775,53 @@ riscv_option_override (void)
   " [%<-mriscv-attribute%>]");
 #endif
 
+  if (riscv_stack_protector_guard == SSP_GLOBAL
+  && global_options_set.x_riscv_stack_protector_guard_offset_str)
+{
+  error ("incompatible options %<-mstack-protector-guard=global%> and "
+"%<-mstack-protector-guard-offset=%s%>",
+riscv_stack_protector_guard_offset_str);
+}
+
+  if (riscv_stack_protector_guard == SSP_TLS
+  && !(global_options_set.x_riscv_stack_protector_guard_offset_str
+  && global_options_set.x_riscv_stack_protector_guard_reg_str))
+{
+  error ("both %<-mstack-protector-guard-offset%> and "
+"%<-mstack-protector-guard-reg%> must be used "
+"with %<-mstack-protector-guard=sysreg%>");
+}
+
+  if (global_options_set.x_riscv_stack_protector_guard_reg_str)
+{
+  const char *str = riscv_stack_protector_guard_reg_str;
+  int reg = decode_reg_name (str);
+
+  if (!IN_RANGE (reg, GP_REG_FIRST + 1, GP_REG_LAST))
+   error ("%qs is not a valid base register in %qs", str,
+  "-mstack-protector-guard-reg=");
+
+  riscv_stack_protector_guard_reg = reg;
+}
+
+  if (global_options_set.x_riscv_stack_protector_guard_offset_str)
+{
+  char *end;
+  const char *str = riscv_stack_protector_guard_offset_str;
+  errno = 0;
+  long offs = strtol (riscv_stack_protector_guard_offset_str, , 0);
+
+  if (!*str || *end || errno)
+   error ("%qs is not a valid number in %qs", str,
+  "-mstack-protector-guard-offset=");
+
+  if (!SMALL_OPERAND (offs))
+   error ("%qs is not a valid offset in %qs", str,
+  "-mstack-protector-guard-offset=");
+
+  riscv_stack_protector_guard_offset = offs;
+}
+
 }
 
 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 95a02ecaa34..f15bad3b29e 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -65,6 +65,10 @@
   UNSPECV_BLOCKAGE
   UNSPECV_FENCE
   UNSPECV_FENCE_I
+
+  ;; Stack Smash Protector
+  UNSPEC_SSP_SET
+  UNSPEC_SSP_TEST
 ])
 
 (define_constants
@@ -2523,6 +2527,82 @@
   ""
 {})
 
+;; Named patterns for stack smashing protection.
+
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "memory_operand")]
+  ""
+{
+  machine_mode mode = GET_MODE (operands[0]);
+  if (riscv_stack_protector_guard == SSP_TLS)
+  {
+rtx reg = gen_rtx_REG (Pmode, riscv_stack_protector_guard_reg);
+rtx offset = GEN_INT (riscv_stack_protector_guard_offset);
+rtx addr = gen_rtx_PLUS (Pmode, reg, offset);
+operands[1] = gen_rtx_MEM (Pmode, addr);
+  }
+
+  emit_insn ((mode == DImode
+ ? gen_stack_protect_set_di
+ : gen_stack_protect_set_si) (operands[0], operands[1]));
+  DONE;
+})
+
+;; DO NOT SPLIT THIS PATTERN.  It is important for security reasons that the
+;; canary value does not live beyond the life of this sequence.
+(define_insn "stack_protect_set_"
+  [(set (match_operand:GPR 0 "memory_operand" "=m")
+   (unspec:GPR [(match_operand:GPR 1 

[PATCH] make var-tracking iteration consistent

2020-07-13 Thread Richard Biener
This eliminates the visited bitmap and makes whether a to be processed
block goes to the next or the current iteration only depend on its
position in RPO order rather than on whether it was visited in the
current iteration.  As optimization single-BB iteration is processed
immediately.

Bootstrapped / tested on x86_64-unknown-linux-gnu, pushed.

2020-07-10  Richard Biener  

* var-tracking.c (bb_heap_node_t): Remove unused typedef.
(vt_find_locations): Eliminate visited bitmap in favor of
RPO order check.  Dump statistics about the number of
local BB dataflow computes.
---
 gcc/var-tracking.c | 235 ++---
 1 file changed, 115 insertions(+), 120 deletions(-)

diff --git a/gcc/var-tracking.c b/gcc/var-tracking.c
index 899a5c0290d..cca75064c8b 100644
--- a/gcc/var-tracking.c
+++ b/gcc/var-tracking.c
@@ -118,7 +118,6 @@
 #include "function-abi.h"
 
 typedef fibonacci_heap  bb_heap_t;
-typedef fibonacci_node  bb_heap_node_t;
 
 /* var-tracking.c assumes that tree code with the same value as VALUE rtx code
has no chance to appear in REG_EXPR/MEM_EXPRs and isn't a decl.
@@ -7078,6 +7077,7 @@ vt_find_locations (void)
   int htabsz = 0;
   int htabmax = param_max_vartrack_size;
   bool success = true;
+  unsigned int n_blocks_processed = 0;
 
   timevar_push (TV_VAR_TRACKING_DATAFLOW);
   /* Compute reverse completion order of depth first search of the CFG
@@ -7089,7 +7089,6 @@ vt_find_locations (void)
 bb_order[rc_order[i]] = i;
   free (rc_order);
 
-  auto_sbitmap visited (last_basic_block_for_fn (cfun));
   in_worklist = sbitmap_alloc (last_basic_block_for_fn (cfun));
   in_pending = sbitmap_alloc (last_basic_block_for_fn (cfun));
   bitmap_clear (in_worklist);
@@ -7103,154 +7102,150 @@ vt_find_locations (void)
   std::swap (worklist, pending);
   std::swap (in_worklist, in_pending);
 
-  bitmap_clear (visited);
-
   while (!worklist->empty ())
{
+ bool changed;
+ edge_iterator ei;
+ int oldinsz, oldoutsz;
+
  bb = worklist->extract_min ();
  bitmap_clear_bit (in_worklist, bb->index);
- gcc_assert (!bitmap_bit_p (visited, bb->index));
- if (!bitmap_bit_p (visited, bb->index))
+
+ if (VTI (bb)->in.vars)
{
- bool changed;
- edge_iterator ei;
- int oldinsz, oldoutsz;
+ htabsz -= (shared_hash_htab (VTI (bb)->in.vars)->size ()
++ shared_hash_htab (VTI (bb)->out.vars)->size ());
+ oldinsz = shared_hash_htab (VTI (bb)->in.vars)->elements ();
+ oldoutsz = shared_hash_htab (VTI (bb)->out.vars)->elements ();
+   }
+ else
+   oldinsz = oldoutsz = 0;
 
- bitmap_set_bit (visited, bb->index);
+ if (MAY_HAVE_DEBUG_BIND_INSNS)
+   {
+ dataflow_set *in =  (bb)->in, *first_out = NULL;
+ bool first = true, adjust = false;
 
- if (VTI (bb)->in.vars)
-   {
- htabsz
-   -= shared_hash_htab (VTI (bb)->in.vars)->size ()
-   + shared_hash_htab (VTI (bb)->out.vars)->size ();
- oldinsz = shared_hash_htab (VTI (bb)->in.vars)->elements ();
- oldoutsz
-   = shared_hash_htab (VTI (bb)->out.vars)->elements ();
-   }
- else
-   oldinsz = oldoutsz = 0;
+ /* Calculate the IN set as the intersection of
+predecessor OUT sets.  */
 
- if (MAY_HAVE_DEBUG_BIND_INSNS)
-   {
- dataflow_set *in =  (bb)->in, *first_out = NULL;
- bool first = true, adjust = false;
+ dataflow_set_clear (in);
+ dst_can_be_shared = true;
 
- /* Calculate the IN set as the intersection of
-predecessor OUT sets.  */
+ FOR_EACH_EDGE (e, ei, bb->preds)
+   if (!VTI (e->src)->flooded)
+ gcc_assert (bb_order[bb->index]
+ <= bb_order[e->src->index]);
+   else if (first)
+ {
+   dataflow_set_copy (in,  (e->src)->out);
+   first_out =  (e->src)->out;
+   first = false;
+ }
+   else
+ {
+   dataflow_set_merge (in,  (e->src)->out);
+   adjust = true;
+ }
 
- dataflow_set_clear (in);
- dst_can_be_shared = true;
+ if (adjust)
+   {
+ dataflow_post_merge_adjust (in,  (bb)->permp);
 
- FOR_EACH_EDGE (e, ei, bb->preds)
-   if (!VTI (e->src)->flooded)
- gcc_assert (bb_order[bb->index]
- <= bb_order[e->src->index]);
-   else if (first)
- 

Re: [PATCH] [RISC-V] Add support for TLS stack protector canary access

2020-07-13 Thread cooper via Gcc-patches



On 2020/7/13 上午8:32, Jim Wilson wrote:

This looks like a security leak that the canary value is left in a4.
The i386 implementation operates directly on memory without loading
into registers.  The rs6000 implementation is careful to load 0 into
the other register in the stack_protector_test code after the xor.  I
think this is a bug in the aarch64 code that it isn't clearing the
other register.  And I think it is a bug in your code too.  If we
don't need to clear the canary from the two registers, then you should
eliminate the xor and just use "bne a5,a4,.L4".  But I think the way
you have it is right, you just need to clear the a4 register after the
xor.

If I use an offset outside the const immediate range of -2048 to 2047
then I get an ICE.  You either need to error on invalid offset, or do
some extra work to get valid addresses regardless of the offset.  And
that maybe also requires clearing the extra registers after the load
to avoid leaving the address of the canary in a register after the
sequence depending on how secure this needs to be.

Thanks Jim. I will clear the register after the xor and error on the 
offset outside the const immediate range


to fix these two bugs.


Best regards,

Cooper



Re: [PATCH 2/2] doc/implement-c.texi: About same-as-scalar-type volatile aggregate accesses, PR94600

2020-07-13 Thread Richard Biener via Gcc-patches
On Mon, Jul 13, 2020 at 6:46 AM Hans-Peter Nilsson  wrote:
>
> > From: Richard Biener 
> > Date: Tue, 7 Jul 2020 09:00:22 +0200
>
> > On Tue, Jul 7, 2020 at 6:03 AM Hans-Peter Nilsson via Gcc-patches
> >  wrote:
> > >
> > > We say very little about reads and writes to aggregate /
> > > compound objects, just scalar objects (i.e. assignments don't
> > > cause reads).  Let's lets say something safe about aggregate
> > > objects, but only for those that are the same size as a scalar
> > > type.
> > >
> > > There's an equal-sounding section (Volatiles) in extend.texi,
> > > but this seems a more appropriate place, as specifying the
> > > behavior of a standard qualifier.
> >
> > Hmm, might be true only up to word-mode size, not, say, __int128_t.
>
> I'm not saying a *single* read or write, I'm saying exactly as
> (many as) would happen for the integer type.
>
> > Also very likely only in case the object has the same alignment
> > as the naturally aligned integer type.
>
> Again, just as needed for the integer type.
>
> So WDYT about Martin Sebor's suggestion?

That sounds good.

Thanks,
Richard.

> brgds, H-P


Re: [PATCH] x86: Provide expanders for truncdisi2 and friends.

2020-07-13 Thread Richard Biener via Gcc-patches
On Sun, Jul 12, 2020 at 1:29 AM Roger Sayle  wrote:
>
>
> Even by my standards, this is an odd patch.  This adds expanders to
> i386.md requesting that integer truncations be represented in RTL
> using SUBREGs.  This exactly matches the (current) default behaviour
> when TARGET_TRULY_NOOP_TRUNCATION is undefined.  Hence this patch
> is mostly for documentation/teaching purposes, so that i386.md is a
> template or role model for the patterns that a backend should provide.
>
> As explained in my earlier post, defining TARGET_TRULY_NOOP_TRUNCATION
> to always return false in the i386/x86_64 backend, results in 603
> additional unexpected failures.  Adding these expanders avoids the
> majority (412) of those regressions.
>
> I'm not proposing that i386/x86_64 redefine TARGET_TRULY_NOOP_TRUNCATION
> but these placeholder expanders may provide the backend the flexibility
> of that option in the future, but it also helps to test some less frequently
> invoked corners of the middle-end.
>
> This patch has been tested on x86_64-pc-linux-gnu with a full "make
> bootstrap" and "make -k check" with no new failures, indeed there
> are (should be) absolutely no code changes with this patch.
>
> Might these changes be of interest?  If not, at least this patch
> is now archived on gcc-patches for future generations.

It seems to be improving TARGET_TRULY_NOOP_TRUNCATION documentation
might be useful here - from what it says it suggests to me it applies to
hardregs only and tells for example whether there are lower than word_mode
precision registers like on x86_64 %al?  Because for pseudos there's
always the requirement to use subregs and doing (reg:SI 42) and (reg:QI 42)
accesses to the same pseudo is invalid(?).

The only user (after your patch) of this hook is in function.c for the
function parameter setup btw. and I wonder if there's other hooks
for the RA for example that provide duplicate functionality.

Richard.

>
> 2020-07-12  Roger Sayle  
>
> gcc/ChangeLog
> * config/i386/i386.md (truncdi2, truncsi2,
> trunchiqi2): New expanders to make the intended representation
> of scalar integer truncations explicit.
>
> Thoughts?
> Roger
> --
> Roger Sayle
> NextMove Software
> Cambridge, UK
>
>


Re: [PATCH] middle-end: Remove truly_noop_truncation check from convert.c

2020-07-13 Thread Richard Biener via Gcc-patches
On Sat, Jul 11, 2020 at 8:29 PM Roger Sayle  wrote:
>
>
> This patch eliminates a check of targetm.truly_noop_truncation from
> the early middle-end, where the gimple/generic being generated by
> GCC's front-ends is being inappropriately influenced by the target's
> TRULY_NOOP_TRUNCATION.  The (recent) intention of TRULY_NOOP_TRUNCATION
> is to indicate that a backend requires explicit truncation instructions
> rather than using SUBREGs to perform truncations.  A long standing
> (and probably unintentional) side-effect has been that this setting
> also controls whether the middle-end narrows integer operations at
> the tree-level.  Understandably, GCC and its testsuite assume that
> GIMPLE and GENERIC behave consistently across platforms, and alas
> defining TRULY_NOOP_TRUNCATION away from the default triggers several
> regressions (including gcc.dg/fold-rotate-1.c).
>
> Hopefully, I can appeal to a design philosophy argument that the
> early middle-end (tree-ssa) passes should be as machine independent
> as possible, and target settings influence the late middle-end (RTL)
> passes.  Most of the backend hooks were eliminated from "fold" and
> the tree code with GCC 4.x, but perhaps TRULY_NOOP_TRUNCATION appears
> to have survived, probably because it is so rarely used.  It's only
> defined by three (current) backends: gcn, mips and tilegx.  Everywhere
> else TRULY_NOOP_TRUNCATION is unconditionally true, so that
> convert_to_integer_1 always calls do_narrow.
>
> This check of TRULY_NOOP_TRUNCATION (in convert_to_integer) dates back
> to (before) gcc 0.9 in 1987.  Mysteriously, at that point in time, all
> targets defined TRULY_NOOP_TRUNCATION to 1, so perhaps only Richard
> Stallman remembers why this code was ever added to the compiler.
> Even if truncations require explicit instructions, that shouldn't
> prevent the compiler from generating them (or using narrow integer
> operations).
>
> The following patch has been tested on x86_64-pc-linux-gnu with
> no regressions (where of course you wouldn't expect any changes),
> and on a x86_64-pc-linux-gnu that defines TRULY_NOOP_TRUNCATION
> to false, where it cures 17 of the regressions that are introduced.
> It also fixes all of the regressions introduced by defining
> TRULY_NOOP_TRUNCATION to false on nvptx-none.  I had hoped to
> also test on a MIPS target, but after over 10 days of bootstrap builds
> on the GCC compile farm, gcc23.fsffrance.org has started experiencing
> hardware issues (overheating?), so it may be a while before I can
> fully confirm this change causes no problems (and potentially fixes
> a few) on MIPS.
>
> Ok for mainline?

OK.  The other big early target dependence is of course
LOGICAL_OP_NON_SHORT_CIRCUIT ... (aka BRANCH_COST).

Thanks,
Richard.

> 2020-07-11  Roger Sayle  
>
> gcc/ChangeLog
> * convert.c (convert_to_integer_1): Narrow integer operations
> even on targets that require explicit truncation instructions.
>
> Thanks in advance,
> Roger
> --
> Roger Sayle
> NextMove Software
> Cambridge, UK
>


Re: [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310]

2020-07-13 Thread luoxhu via Gcc-patches
Hi,

On 2020/7/11 08:54, Segher Boessenkool wrote:
> Hi!
> 
> On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:
>> OK, seems the md file needs a format tool too...
> 
> Heh.  Just make sure it looks good (that is, does what it looks like),
> looks like the rest, etc.  It's hard to do anything nice with unspecs,
> [ ] lists do not format well.
> 
 +  "TARGET_NO_SF_SUBREG"
 +  "#"
 +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
>>>
>>> Put this in the insn condition?  And since this is just a predicate,
>>> you can just use it instead of gpc_reg_operand.
>>>
>>> (The split condition becomes "&& 1" then, not "").
>>
>> OK, this seems a bit strange as movsi_from_sf, movsf_from_si and
>> movdi_from_sf_zero_ext all use it as condition...
> 
> Since in your case you *always* split, the split condition should be
> "always".  There are no alternatives that do not split here.
> 
>> And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG
>> "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The
>> TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.
> 
> Good question.  I do not know.
> 
> Well...  Since this define_insn* requires p8 *anyway*, we do not need
> any of these sf_subreg things?  We always know for each one if it should
> be true or false.

Yes, removed the vsx_reg_sfsubreg_ok check.

> 
>> +  "TARGET_NO_SF_SUBREG"
> 
> But here we should require p8 some other way, then.

TARGET_NO_SF_SUBREG is defined to TARGET_DIRECT_MOVE_64BIT, and
TARGET_DIRECT_MOVE_64BIT is TARGET_DIRECT_MOVE && TARGET_P8_VECTOR && 
TARGET_POWERPC64
which means TARGET_P8_VECTOR must be true for TARGET_NO_SF_SUBREG.

> 
>> +  (set_attr "isa" "p8v")])
> 
> (This isn't enough, unfortunately).
> 


Updated patch to removed the vsx_reg_sfsubreg_ok and ICE fix:


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog:

2020-07-13  Xionghu Luo  

PR rtl-optimization/89310
* config/rs6000/rs6000.md (movsf_from_si2): New
define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-13  Xionghu Luo  

PR rtl-optimization/89310
* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md| 31 ++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 
 2 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..480385ed4d2 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"
"*,  *, p9v,   p8v,   *, *,
 p8v,p8v,   p8v,   *")])
 
+;; For extracting high part element from DImode register like:
+;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+   (unspec:SF
+[(subreg:SI
+  (ashiftrt:DI
+   (match_operand:DI 1 "input_operand" "r")
+   (const_int 32))
+  0)]
+UNSPEC_SF_FROM_SI))
+  (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+operands[2] = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
+  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
+  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c 
b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864




Re: [PATCH] ipa-devirt: Fix crash in obj_type_ref_class [PR95114]

2020-07-13 Thread Richard Biener via Gcc-patches
On Sat, Jul 11, 2020 at 10:21 AM Richard Sandiford
 wrote:
>
> Jan Hubicka  writes:
> >> Jan Hubicka  writes:
> >> >> The testcase has failed since r9-5035, because obj_type_ref_class
> >> >> tries to look up an ODR type when no ODR type information is
> >> >> available.  (The information was available earlier in the
> >> >> compilation, but was freed during pass_ipa_free_lang_data.)
> >> >> We then crash dereferencing the null get_odr_type result.
> >> >>
> >> >> The test passes with -O2.  However, it fails again if -fdump-tree-all
> >> >> is used, since obj_type_ref_class is called indirectly from the
> >> >> dump routines.
> >> >>
> >> >> Other code seems to create ODR type entries on the fly by passing
> >> >> "true" as the insert parameter.  But since obj_type_ref_class is
> >> >> used by dump routines, I guess it should have no side-effects and
> >> >> should simply punt in this case.
> >> > Thanks for looking into this!
> >> >
> >> > The ODR type hash is indeed supposed to be populated lazilly except for
> >> > LTO when we populate it at stream in time.  I think just making function
> >> > return NULL in case it should not is fragile, because we may hit missed
> >> > optimizations and other surprised elsewhere.
> >> >
> >> > What about adding new "for_dump" parameter set implicitly to false that
> >> > will make the function return ret in case get_odr_type is NULL?
> >> > For dumping it probably does not matter what ODR variant you see, since
> >> > we end up with a type name in the dump anyway.
> >>
> >> I think there are two problematic cases (at least as far as this
> >> testcase goes).  One is the direct call to obj_type_ref_class in
> >> tree-pretty-print.c.  The other is an indirect call via
> >> virtual_method_call_p, which is also called by tree-pretty-print.c.
> >> So I guess both of them would need a "for_dump" parameter.
> >>
> >> TBH doing it that way feels fragile too.  Even if we get it right now,
> >> it's going to be hard to ensure that all indirect calls to 
> >> obj_type_ref_class
> >> in future make the correct choice between lazy initialisation and remaining
> >> side-effect free.
> >
> > Yep, it is not very pretty :(.  In general the output of optimizations
> > does not depend much on what is inserted into the type inheritance graph
> > (because we must assume that types are derived in other units anyway)
> > except for anonymous types. However speculative devirt is an exception
> > here, so I guess we want to go woth for_dump markers (it is useful to
> > have obj type ref pretty printed in a meaningful way).
>
> OK, this patch (belatedly) does that.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK for trunk and
> GCC 9+?

OK.

Richard.

> Thanks,
> Richard
>
>
> gcc/
> PR middle-end/95114
> * tree.h (virtual_method_call_p): Add a default-false parameter
> that indicates whether the function is being called from dump
> routines.
> (obj_type_ref_class): Likewise.
> * tree.c (virtual_method_call_p): Likewise.
> * ipa-devirt.c (obj_type_ref_class): Likewise.  Lazily add ODR
> type information for the type when the parameter is false.
> * tree-pretty-print.c (dump_generic_node): Update calls to
> virtual_method_call_p and obj_type_ref_class accordingly.
>
> gcc/testsuite/
> PR middle-end/95114
> * g++.target/aarch64/pr95114.C: New test.
> ---
>  gcc/ipa-devirt.c   | 9 ++---
>  gcc/testsuite/g++.target/aarch64/pr95114.C | 3 +++
>  gcc/tree-pretty-print.c| 5 +++--
>  gcc/tree.c | 7 ---
>  gcc/tree.h | 4 ++--
>  5 files changed, 18 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/aarch64/pr95114.C
>
> diff --git a/gcc/tree.h b/gcc/tree.h
> index cf546ed9491..866d9ba8fbc 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -5241,8 +5241,8 @@ extern location_t *block_nonartificial_location (tree);
>  extern location_t tree_nonartificial_location (tree);
>  extern tree block_ultimate_origin (const_tree);
>  extern tree get_binfo_at_offset (tree, poly_int64, tree);
> -extern bool virtual_method_call_p (const_tree);
> -extern tree obj_type_ref_class (const_tree ref);
> +extern bool virtual_method_call_p (const_tree, bool = false);
> +extern tree obj_type_ref_class (const_tree ref, bool = false);
>  extern bool types_same_for_odr (const_tree type1, const_tree type2);
>  extern bool contains_bitfld_component_ref_p (const_tree);
>  extern bool block_may_fallthru (const_tree);
> diff --git a/gcc/tree.c b/gcc/tree.c
> index 342da55bba7..3d9968fd7a0 100644
> --- a/gcc/tree.c
> +++ b/gcc/tree.c
> @@ -12810,10 +12810,11 @@ lhd_gcc_personality (void)
> OBJ_TYPE_REF representing an virtual call of C++ method.
> (As opposed to OBJ_TYPE_REF representing objc calls
> through a cast where middle-end devirtualization machinery
> -   can't apply.) */