Re: [PATCH v2 2/3] doc: -falign-functions is ignored under -Os

2022-10-12 Thread Jan Hubicka via Gcc-patches
> This is implicitly mentioned in the docs, but there were some questions
> in a recent patch.  This makes it more exlicit that -falign-functions is
> meant to be ignored under -Os.
> 
> gcc/doc/ChangeLog
> 
>   * invoke.texi (-falign-functions): Mention -Os
> ---
>  gcc/doc/invoke.texi | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 8326a60dcf1..a24798d5029 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -13164,7 +13164,8 @@ equivalent and mean that functions are not aligned.
>  If @var{n} is not specified or is zero, use a machine-dependent default.
>  The maximum allowed @var{n} option value is 65536.
>  
> -Enabled at levels @option{-O2}, @option{-O3}.
> +Enabled at levels @option{-O2}, @option{-O3}.  This has no behavior under 
> under
> +@option{-Os}.

Maybe we could instead say that the function alignment is included for
functions optimized for size.

This can happen by -Os, -Oz, cold attribute
or because we auto-detect function as cold (i.e. it unavoidably leads to
abort or is only called on such paths)

It would be also nice to mention that for other alignment options (align
jumps, loops and labels are ignored when given jump, loop or label is
optimized for size).

Honza
>  
>  @item -flimit-function-alignment
>  If this option is enabled, the compiler tries to avoid unnecessarily
> -- 
> 2.34.1
> 


[PATCH (pushed)] regenerate configure files

2022-10-12 Thread Martin Liška
Needed after a recent change.

gcc/ChangeLog:

* configure: Regenerate.

libatomic/ChangeLog:

* configure: Regenerate.

libbacktrace/ChangeLog:

* configure: Regenerate.

libcc1/ChangeLog:

* configure: Regenerate.

libffi/ChangeLog:

* configure: Regenerate.

libgfortran/ChangeLog:

* configure: Regenerate.

libgomp/ChangeLog:

* configure: Regenerate.

libitm/ChangeLog:

* configure: Regenerate.

libobjc/ChangeLog:

* configure: Regenerate.

liboffloadmic/ChangeLog:

* configure: Regenerate.
* plugin/configure: Regenerate.

libphobos/ChangeLog:

* configure: Regenerate.

libquadmath/ChangeLog:

* configure: Regenerate.

libsanitizer/ChangeLog:

* configure: Regenerate.

libssp/ChangeLog:

* configure: Regenerate.

libstdc++-v3/ChangeLog:

* configure: Regenerate.

libvtv/ChangeLog:

* configure: Regenerate.

lto-plugin/ChangeLog:

* configure: Regenerate.

zlib/ChangeLog:

* configure: Regenerate.
---
 gcc/configure  |  4 ++--
 libatomic/configure|  4 ++--
 libbacktrace/configure |  4 ++--
 libcc1/configure   |  4 ++--
 libffi/configure   |  4 ++--
 libgfortran/configure  |  4 ++--
 libgomp/configure  |  4 ++--
 libitm/configure   |  4 ++--
 libobjc/configure  |  4 ++--
 liboffloadmic/configure|  4 ++--
 liboffloadmic/plugin/configure |  4 ++--
 libphobos/configure|  4 ++--
 libquadmath/configure  |  4 ++--
 libsanitizer/configure |  4 ++--
 libssp/configure   |  4 ++--
 libstdc++-v3/configure | 16 
 libvtv/configure   |  4 ++--
 lto-plugin/configure   |  4 ++--
 zlib/configure |  4 ++--
 19 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/gcc/configure b/gcc/configure
index db366817cd0..99ba76522d6 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -19713,7 +19713,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19692 "configure"
+#line 19716 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19819,7 +19819,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19798 "configure"
+#line 19822 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libatomic/configure b/libatomic/configure
index 69e6b823ce5..e47d2d7fb35 100755
--- a/libatomic/configure
+++ b/libatomic/configure
@@ -11406,7 +11406,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11385 "configure"
+#line 11409 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11512,7 +11512,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11491 "configure"
+#line 11515 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libbacktrace/configure b/libbacktrace/configure
index 4e0987258e8..a5bd133f4e4 100755
--- a/libbacktrace/configure
+++ b/libbacktrace/configure
@@ -11535,7 +11535,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11514 "configure"
+#line 11538 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11641,7 +11641,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11620 "configure"
+#line 11644 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libcc1/configure b/libcc1/configure
index 2801e6c2ad0..bae3b8712b6 100755
--- a/libcc1/configure
+++ b/libcc1/configure
@@ -10801,7 +10801,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 10780 "configure"
+#line 10804 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -10907,7 +10907,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 10886 "configure"
+#line 10910 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libffi/configure b/libffi/configure
index 523fa66c83e..2bb9f8d83d6 100755
--- a/libffi/configure
+++ b/libffi/configure
@@ -11596,7 +11596,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11575 "configure"
+#line 11599 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11702,7 +11702,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11681 "configure"
+#line 11705 "configure"
 #include

[PATCH] MAINTAINERS: Add myself for write after approval

2022-10-12 Thread Cui,Lili via Gcc-patches
Hi,

I want to add myself in MAINTANINER for write after approval.

OK for master?

ChangeLog:
* MAINTAINERS (Write After Approval): Add myself.

---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 11fa8bc6dbd..e4e7349a6d9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -377,6 +377,7 @@ Andrea Corallo  

 Christian Cornelssen   
 Ludovic Courtès
 Lawrence Crowl 
+Lili Cui   
 Ian Dall   
 David Daney
 Robin Dapp 
-- 
2.17.1



RE: [PATCH] MAINTAINERS: Add myself for write after approval

2022-10-12 Thread Liu, Hongtao via Gcc-patches


> -Original Message-
> From: Cui, Lili 
> Sent: Wednesday, October 12, 2022 3:50 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Liu, Hongtao 
> Subject: [PATCH] MAINTAINERS: Add myself for write after approval
> 
> Hi,
> 
> I want to add myself in MAINTANINER for write after approval.
> 
> OK for master?
Obvious fixes can be committed without prior 
approval(https://gcc.gnu.org/gitwrite.html).
This can be considered as an obvious fix(But you still need to send the patch 
out like this).
> 
> ChangeLog:
>   * MAINTAINERS (Write After Approval): Add myself.
> 
> ---
>  MAINTAINERS | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 11fa8bc6dbd..e4e7349a6d9 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -377,6 +377,7 @@ Andrea Corallo
>   
>  Christian Cornelssen 
>  Ludovic Courtès  
>  Lawrence Crowl   
> +Lili Cui 
>  Ian Dall 
>  David Daney
>   
>  Robin Dapp   
> --
> 2.17.1



Re: [PATCH v2 00/10] [RISC-V] Atomics improvements [PR100265/PR100266]

2022-10-12 Thread Christoph Müllner via Gcc-patches
On Wed, Oct 12, 2022 at 2:15 AM Palmer Dabbelt  wrote:

> On Tue, 11 Oct 2022 16:31:25 PDT (-0700), Vineet Gupta wrote:
> >
> >
> > On 10/11/22 13:46, Christoph Müllner wrote:
> >> On Tue, Oct 11, 2022 at 9:31 PM Palmer Dabbelt 
> wrote:
> >>
> >> On Tue, 11 Oct 2022 12:06:27 PDT (-0700), Vineet Gupta wrote:
> >> > Hi Christoph, Kito,
> >> >
> >> > On 5/5/21 12:36, Christoph Muellner via Gcc-patches wrote:
> >> >> This series provides a cleanup of the current atomics
> >> implementation
> >> >> of RISC-V:
> >> >>
> >> >> * PR100265: Use proper fences for atomic load/store
> >> >> * PR100266: Provide programmatic implementation of CAS
> >> >>
> >> >> As both are very related, I merged the patches into one series.
> >> >>
> >> >> The first patch could be squashed into the following patches,
> >> >> but I found it easier to understand the chances with it in place.
> >> >>
> >> >> The series has been tested as follows:
> >> >> * Building and testing a multilib RV32/64 toolchain
> >> >>(bootstrapped with riscv-gnu-toolchain repo)
> >> >> * Manual review of generated sequences for GCC's atomic
> >> builtins API
> >> >>
> >> >> The programmatic re-implementation of CAS benefits from a REE
> >> improvement
> >> >> (see PR100264):
> >> >> https://gcc.gnu.org/pipermail/gcc-patches/2021-April/568680.html
> >> >> If this patch is not in place, then an additional extension
> >> instruction
> >> >> is emitted after the SC.W (in case of RV64 and CAS for uint32_t).
> >> >>
> >> >> Further, the new CAS code requires cbranch INSN helpers to be
> >> present:
> >> >> https://gcc.gnu.org/pipermail/gcc-patches/2021-May/569689.html
> >> >
> >> > I was wondering is this patchset is blocked on some technical
> >> grounds.
> >>
> >> There's a v3 (though I can't find all of it, so not quite sure what
> >> happened), but IIUC that still has the same fundamental problems
> that
> >> all these have had: changing over to the new fence model may by an
> >> ABI
> >> break and the split CAS implementation doesn't ensure eventual
> >> success
> >> (see Jim's comments).  Not sure if there's other comments floating
> >> around, though, that's just what I remember.
> >>
> >>
> >> v3 was sent on May 27, 2022, when I rebased this on an internal tree:
> >> https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595712.html
> >> I dropped the CAS patch in v3 (issue: stack spilling under extreme
> >> register pressure instead of erroring out) as I thought that this was
> >> the blocker for the series.
> >> I just learned a few weeks ago, when I asked Palmer at the GNU
> >> Cauldron about this series, that the ABI break is the blocker.
> >
> > Yeah I was confused about the ABI aspect as I didn't see any mention of
> > that in the public reviews of v1 and v2.
>
> Sorry, I thought we'd talked about it somewhere but it must have just
> been in meetings and such.  Patrick was writing a similar patch set
> around the same time so it probably just got tied up in that, we ended
> up reducing it to just the strong CAS inline stuff because we couldn't
> sort out the correctness of the rest of it.
>
> >> My initial understanding was that fixing something broken cannot be an
> >> ABI break.
> >> And that the mismatch of the implementation in 2021 and the
> >> recommended mappings in the ratified specification from 2019 is
> >> something that is broken. I still don't know the background here, but
> >> I guess this assumption is incorrect from a historical point of view.
>
> We agreed that we wouldn't break binaries back when we submitted the
> port.  The ISA has changed many times since then, including adding the
> recommended mappings, but those binaries exist and we can't just
> silently break things for users.
>
> >> However, I'm sure that I am not the only one that assumes the mappings
> >> in the specification to be implemented in compilers and tools.
> >> Therefore I still consider the implementation of the RISC-V atomics in
> >> GCC as broken (at least w.r.t. user expectation from people that lack
> >> the historical background and just read the RISC-V specification).
>
> You can't just read one of those RISC-V PDFs and assume that
> implementations that match those words will function correctly.  Those
> words regularly change in ways where reasonable readers would end up
> with incompatible implementations due to those differences.  That's why
> we're so explicit about versions and such these days, we're just getting
> burned by these old mappings because they're from back when we though
> the RISC-V definition of compatibility was going to match the more
> common one and we didn't build in fallbacks.
>

Indeed, read-and-assume might not be the best idea.
But read-and-ignore (in the hope everyone else does as well) won't help
either.

I think it is reasonable to expect this detail

[PATCH v2] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-10-12 Thread Kewen.Lin via Gcc-patches
Hi,

PR106680 shows that -m32 -mpowerpc64 is different from
-mpowerpc64 -m32, this is determined by the way how we
handle option powerpc64 in rs6000_handle_option.

Segher pointed out this difference should be taken as
a bug and we should ensure that option powerpc64 is
independent of -m32/-m64.  So this patch removes the
handlings in rs6000_handle_option and add some necessary
supports in rs6000_option_override_internal instead.

With this patch, if users specify -m{no-,}powerpc64, the
specified value is honoured, otherwise, for 64bit it
always enables OPTION_MASK_POWERPC64; while for 32bit
and TARGET_POWERPC64 and OS_MISSING_POWERPC64, it disables
OPTION_MASK_POWERPC64.

btw, following Segher's suggestion, I did some tries to warn
when OPTION_MASK_POWERPC64 is set for OS_MISSING_POWERPC64.
If warn for the case that powerpc64 is specified explicitly,
there are some TCs using -m32 -mpowerpc64 on ppc64-linux,
they need some updates, meanwhile the artificial run
with "--target_board=unix'{-m32/-mpowerpc64}'" will have
noisy warnings on ppc64-linux.  If warn for the case that
it's specified implicitly, they can just be initialized by
TARGET_DEFAULT (like -m32 on ppc64-linux) or set from the 
given cpu mask, we have to special case them and not to warn.
As Segher's latest comment, I decide not to warn them and
keep it consistent with before.

Bootstrapped and regress-tested on:
  - powerpc64-linux-gnu P7 and P8 {-m64,-m32}
  - powerpc64le-linux-gnu P9 and P10
  - powerpc-ibm-aix7.2.0.0 {-maix64,-maix32}

Hi Iain, could you help to test this new patch on darwin
again?  Thanks in advance!

Is it ok for trunk if darwin testing goes well?

BR,
Kewen
-
PR target/106680

gcc/ChangeLog:

* common/config/rs6000/rs6000-common.cc (rs6000_handle_option): Remove
the adjustment for option powerpc64 in -m64 handling, and remove the
whole -m32 handling.
* config/rs6000/rs6000.cc (rs6000_option_override_internal): When no
explicit powerpc64 option is provided, enable it for -m64.  For 32 bit
and OS_MISSING_POWERPC64, disable powerpc64 if it's enabled but not
specified explicitly.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr106680-1.c: New test.
* gcc.target/powerpc/pr106680-2.c: New test.
* gcc.target/powerpc/pr106680-3.c: New test.
* gcc.target/powerpc/pr106680-4.c: New test.

2022-10-12  Kewen Lin  
Iain Sandoe  
---
 gcc/common/config/rs6000/rs6000-common.cc | 11 --
 gcc/config/rs6000/rs6000.cc   | 37 ++-
 gcc/testsuite/gcc.target/powerpc/pr106680-1.c | 13 +++
 gcc/testsuite/gcc.target/powerpc/pr106680-2.c | 14 +++
 gcc/testsuite/gcc.target/powerpc/pr106680-3.c | 13 +++
 gcc/testsuite/gcc.target/powerpc/pr106680-4.c | 17 +
 6 files changed, 85 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106680-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106680-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106680-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106680-4.c

diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
b/gcc/common/config/rs6000/rs6000-common.cc
index 8e393d08a23..c76b5c27bb6 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -119,19 +119,8 @@ rs6000_handle_option (struct gcc_options *opts, struct 
gcc_options *opts_set,
 #else
 case OPT_m64:
 #endif
-  opts->x_rs6000_isa_flags |= OPTION_MASK_POWERPC64;
   opts->x_rs6000_isa_flags |= (~opts_set->x_rs6000_isa_flags
   & OPTION_MASK_PPC_GFXOPT);
-  opts_set->x_rs6000_isa_flags |= OPTION_MASK_POWERPC64;
-  break;
-
-#ifdef TARGET_USES_AIX64_OPT
-case OPT_maix32:
-#else
-case OPT_m32:
-#endif
-  opts->x_rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
-  opts_set->x_rs6000_isa_flags |= OPTION_MASK_POWERPC64;
   break;

 case OPT_mminimal_toc:
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index e6fa3ad0eb7..e37d99deb61 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3648,17 +3648,12 @@ rs6000_option_override_internal (bool global_init_p)
   rs6000_pointer_size = 32;
 }

-  /* Some OSs don't support saving the high part of 64-bit registers on context
- switch.  Other OSs don't support saving Altivec registers.  On those OSs,
- we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
- if the user wants either, the user must explicitly specify them and we
- won't interfere with the user's specification.  */
+  /* Some OSs don't support saving Altivec registers.  On those OSs, we don't
+ touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
+ must explicitly specify it and we won't interfere with the user's
+ specification.  */

   set_masks = POWERPC_MASKS;
-#ifdef OS_MIS

[PATCH] machmode: Introduce GET_MODE_NEXT_MODE with previous GET_MODE_WIDER_MODE meaning, add new GET_MODE_WIDER_MODE

2022-10-12 Thread Jakub Jelinek via Gcc-patches
On Wed, Oct 05, 2022 at 04:02:25PM -0400, Jason Merrill wrote:
> > > > @@ -5716,7 +5716,13 @@ emit_store_flag_1 (rtx target, enum rtx_
> > > >{
> > > > machine_mode optab_mode = mclass == MODE_CC ? CCmode : 
> > > > compare_mode;
> > > > icode = optab_handler (cstore_optab, optab_mode);
> > > > - if (icode != CODE_FOR_nothing)
> > > > + if (icode != CODE_FOR_nothing
> > > > +/* Don't consider [BH]Fmode as usable wider mode, as neither is
> > > > +   a subset or superset of the other.  */
> > > > +&& (compare_mode == mode
> > > > +|| !SCALAR_FLOAT_MODE_P (compare_mode)
> > > > +|| maybe_ne (GET_MODE_PRECISION (compare_mode),
> > > > + GET_MODE_PRECISION (mode
> > > 
> > > Why do you need to do this here (and in prepare_cmp_insn, and similarly in
> > > can_compare_p)?  Shouldn't get_wider skip over modes that are not actually
> > > wider?
> > 
> > I'm afraid too many places rely on all modes of a certain class to be
> > visible when walking from "narrowest" to "widest" mode, say
> > FOR_EACH_MODE_IN_CLASS/FOR_EACH_MODE/FOR_EACH_MODE_UNTIL/FOR_EACH_WIDER_MODE
> > etc. wouldn't work at all if GET_MODE_WIDER_MODE (BFmode) == SFmode
> > && GET_MODE_WIDER_MODE (HFmode) == SFmode.
> 
> Yes, it seems they need to change now that their assumptions have been
> violated.  I suppose FOR_EACH_MODE_IN_CLASS would need to change to not use
> get_wider, and users of FOR_EACH_MODE/FOR_EACH_MODE_UNTIL need to decide
> whether they want an iteration that uses get_wider (likely with a new name)
> or not.

Here is a patch which does that.
Passes bootstrap/regtest on x86_64-linux and i686-linux.

Though I admit I didn't go carefully through all 24 GET_MODE_WIDER_MODE
uses, 54 FOR_EACH_MODE_IN_CLASS uses, 3 FOR_EACH_MODE uses, 24
FOR_EACH_MODE_FROM, 6 FOR_EACH_MODE_UNTIL and 15 FOR_EACH_WIDER_MODE uses.
It is more important to go through the GET_MODE_WIDER_MODE and
FOR_EACH_WIDER_MODE uses because the patch changes behavior for those,
the rest keep their previous meaning and so can be changed incrementally
if the other meaning is desirable to them (I've of course changed the 3
spots I had to change in the previous BFmode patch and whatever triggered
during the bootstraps).

Thoughts on this?

2022-10-12  Jakub Jelinek  

* genmodes.cc (emit_mode_wider): Emit previous content of
mode_wider array into mode_next array and for mode_wider
emit always VOIDmode for !CLASS_HAS_WIDER_MODES_P classes,
otherwise skip through modes with the same precision.
* machmode.h (mode_next): Declare.
(GET_MODE_NEXT_MODE): New inline function.
(mode_iterator::get_next, mode_iterator::get_known_next): New
function templates.
(FOR_EACH_MODE_IN_CLASS): Use get_next instead of get_wider.
(FOR_EACH_MODE): Use get_known_next instead of get_known_wider.
(FOR_EACH_MODE_FROM): Use get_next instead of get_wider.
(FOR_EACH_WIDER_MODE_FROM): Define.
(FOR_EACH_NEXT_MODE): Define.
* expmed.cc (emit_store_flag_1): Use FOR_EACH_WIDER_MODE_FROM
instead of FOR_EACH_MODE_FROM.
* optabs.cc (prepare_cmp_insn): Likewise.  Remove redundant
!CLASS_HAS_WIDER_MODES_P check.
(prepare_float_lib_cmp): Use FOR_EACH_WIDER_MODE_FROM instead of
FOR_EACH_MODE_FROM.
* config/i386/i386-expand.cc (get_mode_wider_vector): Use
GET_MODE_NEXT_MODE instead of GET_MODE_WIDER_MODE.

--- gcc/genmodes.cc.jj  2022-05-23 21:44:48.080857253 +0200
+++ gcc/genmodes.cc 2022-10-11 22:35:39.680286764 +0200
@@ -1527,7 +1527,7 @@ emit_mode_wider (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
 tagged_printf ("E_%smode",
@@ -1535,6 +1535,37 @@ emit_mode_wider (void)
   m->name);
 
   print_closer ();
+  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
+
+  for_all_modes (c, m)
+{
+  struct mode_data *m2 = 0;
+
+  if (m->cl == MODE_INT
+ || m->cl == MODE_PARTIAL_INT
+ || m->cl == MODE_FLOAT
+ || m->cl == MODE_DECIMAL_FLOAT
+ || m->cl == MODE_COMPLEX_FLOAT
+ || m->cl == MODE_FRACT
+ || m->cl == MODE_UFRACT
+ || m->cl == MODE_ACCUM
+ || m->cl == MODE_UACCUM)
+   for (m2 = m->wider; m2 && m2 != void_mode; m2 = m2->wider)
+ {
+   if (m2->bytesize == m->bytesize
+   && m2->precision == m->precision)
+ continue;
+   break;
+ }
+
+  if (m2 == void_mode)
+   m2 = 0;
+  tagged_printf ("E_%smode",
+m2 ? m2->name : void_mode->name,
+m->name);
+}
+
+  print_closer ();
   print_decl ("unsigned char", "mode_2xwider", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
--- gcc

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-10-12 Thread Kewen.Lin via Gcc-patches
Hi Segher!

on 2022/10/10 21:58, Segher Boessenkool wrote:
> On Mon, Oct 10, 2022 at 10:15:58AM +0800, Kewen.Lin wrote:
>> on 2022/10/4 05:15, Segher Boessenkool wrote:
>>> Right.  If If mpowerpc64 is enabled while OS_MISSING_POWERPC64, warn for
>>> that; 
>>
>> Currently if option powerpc64 is enabled explicitly while 
>> OS_MISSING_POWERPC64,
>> there is no warning.  One typical case is -m32 compilation on ppc64.  I made
>> a patch to warn for this case as you suggested (btw, this change can be taken
>> separately from this rework), it caused some test cases to fail as below:
> 
> "Explicitly" means the user says "-m32 -mpowerpc64".
> 
> I wonder what "on powerpc64" means in what you say, and why that would
> matter?

I guess you meant to ask "on ppc64"?  I meant to say "ppc64-linux", sorry
for the confusion.  On ppc64-linux, OS_MISSING_POWERPC64 is defined as
!TARGET_64BIT, the explicit option "-m32 -mpowerpc64" doesn't warn before
but it's made to warn as the patch mentioned above, then need some test
cases updates.

> 
>> gcc.dg/vect/vect-82_64.c
>> gcc.dg/vect/vect-83_64.c
>> gcc.target/powerpc/bswap64-4.c
>> gcc.target/powerpc/ppc64-double-1.c
>> gcc.target/powerpc/pr106680-4.c 
>> gcc.target/powerpc/rs6000-fpint-2.c
>>
>> It's fine to fix them with one additional option "-w" to disable the warning.
>> But IIUC one concern is that if we want to test with 
>> "--target_board=unix'{-m32,
>> -m32/-mpowerpc64}'", the latter combination will always have this warning,
>> with one extra "-w" (that is -m32/-mpowerpc64/-w) can make some cases which
>> aim to check warning msg ineffective.  So maybe we want to re-consider it
>> (like just leaving it as before)?
> 
> There will always be false positives (and negatives!) if you put any
> warning options in RUNTESTFLAGS.  -w is merely louder than most :-)
> 
> But leave this as further improvement.  Maybe put in a comment.

OK.

> 
>>> and if mpowerpc64 was only implicit, disable it as well (and say
>>> we did!)
>>
>> But on ppc64 linux, for -m32 compilation mpowerpc64 is implicitly enabled
>> since it's with bi-arch supported, I made a patch to disable it as well as
>> warn it, it can't be bootstrapped since it warned for -m32 build (-Werror)
>> and failed.  So I refined it to something like:
>>
>> +  /* With RS6000_BI_ARCH defined (bi-architecture (32/64) 
>> supported),
>> + TARGET_DEFAULT has bit MASK_POWERPC64 on by default, to keep 
>> the
>> + behavior consistent (like: no warnings for -m32 on ppc64), we
>> + just sliently disable it.  Otherwise, disable it and warn.  */
>> +  rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
>> +#ifndef RS6000_BI_ARCH
>> +  warning (0, "powerpc64 is unexpected to be enabled on the "
>> +  "current OS");
>> +#endif
> 
> It has nothing to do with biarch.  Let's just not warn if it is so much
> work to do it correctly.  We never did before, and no one complained,
> how bad can it be :-)
> 

OK, I made a patch v2 which doesn't try to warn for them, fully tested it
and just posted at:

https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603350.html

BR,
Kewen


[COMMITTED] gcov: rename gcov_write_summary

2022-10-12 Thread Martin Liška
Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
I'm going to install it.

Martin

gcc/ChangeLog:

* gcov-io.cc (gcov_write_summary): Rename to ...
(gcov_write_object_summary): ... this.
* gcov-io.h (GCOV_TAG_OBJECT_SUMMARY_LENGTH): Rename from ...
(GCOV_TAG_SUMMARY_LENGTH): ... this.

libgcc/ChangeLog:

* libgcov-driver.c: Use new function.
* libgcov.h (gcov_write_summary): Rename to ...
(gcov_write_object_summary): ... this.
---
 gcc/gcov-io.cc  | 8 
 gcc/gcov-io.h   | 2 +-
 libgcc/libgcov-driver.c | 2 +-
 libgcc/libgcov.h| 5 ++---
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/gcc/gcov-io.cc b/gcc/gcov-io.cc
index 62032ccfa18..af5b13c2cf9 100644
--- a/gcc/gcov-io.cc
+++ b/gcc/gcov-io.cc
@@ -372,13 +372,13 @@ gcov_write_length (gcov_position_t position)
 
 #else /* IN_LIBGCOV */
 
-/* Write a summary structure to the gcov file.  */
+/* Write an object summary structure to the gcov file.  */
 
 GCOV_LINKAGE void
-gcov_write_summary (gcov_unsigned_t tag, const struct gcov_summary *summary)
+gcov_write_object_summary (const struct gcov_summary *summary)
 {
-  gcov_write_unsigned (tag);
-  gcov_write_unsigned (GCOV_TAG_SUMMARY_LENGTH);
+  gcov_write_unsigned (GCOV_TAG_OBJECT_SUMMARY);
+  gcov_write_unsigned (GCOV_TAG_OBJECT_SUMMARY_LENGTH);
   gcov_write_unsigned (summary->runs);
   gcov_write_unsigned (summary->sum_max);
 }
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index 30947634d73..e91cd736556 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -266,8 +266,8 @@ typedef uint64_t gcov_type_unsigned;
 #define GCOV_TAG_COUNTER_LENGTH(NUM) ((NUM) * 2 * GCOV_WORD_SIZE)
 #define GCOV_TAG_COUNTER_NUM(LENGTH) ((LENGTH / GCOV_WORD_SIZE) / 2)
 #define GCOV_TAG_OBJECT_SUMMARY  ((gcov_unsigned_t)0xa100)
+#define GCOV_TAG_OBJECT_SUMMARY_LENGTH (2 * GCOV_WORD_SIZE)
 #define GCOV_TAG_PROGRAM_SUMMARY ((gcov_unsigned_t)0xa300) /* Obsolete */
-#define GCOV_TAG_SUMMARY_LENGTH (2 * GCOV_WORD_SIZE)
 #define GCOV_TAG_AFDO_FILE_NAMES ((gcov_unsigned_t)0xaa00)
 #define GCOV_TAG_AFDO_FUNCTION ((gcov_unsigned_t)0xac00)
 #define GCOV_TAG_AFDO_WORKING_SET ((gcov_unsigned_t)0xaf00)
diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
index aba62d588b8..e1b74c81e07 100644
--- a/libgcc/libgcov-driver.c
+++ b/libgcc/libgcov-driver.c
@@ -520,7 +520,7 @@ write_one_data (const struct gcov_info *gi_ptr,
 
 #ifdef NEED_L_GCOV
   /* Generate whole program statistics.  */
-  gcov_write_summary (GCOV_TAG_OBJECT_SUMMARY, prg_p);
+  gcov_write_object_summary (prg_p);
 #endif
 
   /* Write execution counts for each function.  */
diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h
index c7545cc746e..5e7bd0e3454 100644
--- a/libgcc/libgcov.h
+++ b/libgcc/libgcov.h
@@ -118,7 +118,7 @@ typedef unsigned gcov_type_unsigned __attribute__ ((mode 
(QI)));
 #define gcov_rewrite __gcov_rewrite
 #define gcov_is_error __gcov_is_error
 #define gcov_write_unsigned __gcov_write_unsigned
-#define gcov_write_summary __gcov_write_summary
+#define gcov_write_object_summary __gcov_write_object_summary
 #define gcov_read_unsigned __gcov_read_unsigned
 #define gcov_read_counter __gcov_read_counter
 #define gcov_read_summary __gcov_read_summary
@@ -342,8 +342,7 @@ extern int __gcov_execve (const char *, char  *const [], 
char *const [])
   ATTRIBUTE_HIDDEN;
 
 /* Functions that only available in libgcov.  */
-GCOV_LINKAGE void gcov_write_summary (gcov_unsigned_t /*tag*/,
-  const struct gcov_summary *)
+GCOV_LINKAGE void gcov_write_object_summary (const struct gcov_summary *)
 ATTRIBUTE_HIDDEN;
 GCOV_LINKAGE void gcov_rewrite (void) ATTRIBUTE_HIDDEN;
 
-- 
2.37.3



Re: [Patch][v5] libgomp/nvptx: Prepare for reverse-offload callback handling

2022-10-12 Thread Tobias Burnus

On 11.10.22 13:12, Alexander Monakov wrote:

My understanding is such trickery should not be necessary with
the barrier-based approach, i.e. the sequence of PTX instructions

   st   % plain store
   membar.sys
   st.volatile

should be enough to guarantee that the former store is visible on the host
before the latter, and work all the way back to sm_20.


If I understand it correctly, you mean:

  GOMP_REV_OFFLOAD_VAR->dev_num = GOMP_ADDITIONAL_ICVS.device_num;

  __sync_synchronize ();  /* membar.sys */
  asm volatile ("st.volatile.global.u64 [%0], %1;"
: : "r"(addr_struct_fn), "r" (fn) : "memory");


And then directly followed by the busy wait:

  while (__atomic_load_n (&GOMP_REV_OFFLOAD_VAR->fn, __ATOMIC_ACQUIRE) != 0)
;  /* spin  */

which GCC expands to:

  /* ld.global.u64 %r64,[__gomp_rev_offload_var];
 ld.u64 %r36,[%r64];
 membar.sys;  */

The such updated patch is attached.

(This is the only change + removing the mkoffload.cc part is the only
larger change. Otherwise, it only handles the minor comments by Jakub.
The now removed CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT was used
until commit r10-304-g1f4c5b9bb2eb81880e2bc725435d596fcd2bdfef i.e.
it is a really old left over!)

Otherwise, tested* to work with sm_30 (error by mkoffload, unchanged),
sm_35 and sm_70.

Tobias

*With some added code; until GOMP_OFFLOAD_get_num_devices accepts
GOMP_REQUIRES_UNIFIED_SHARED_MEMORY and GOMP_OFFLOAD_load_image
gets passed a non-NULL for rev_fn_table, the current patch is a no op.

Planned next is the related GCN patch – and the actual change
in libgomp/target.c (+ accepting USM in GOMP_OFFLOAD_get_num_devices)
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp/nvptx: Prepare for reverse-offload callback handling

This patch adds a stub 'gomp_target_rev' in the host's target.c, which will
later handle the reverse offload.
For nvptx, it adds support for forwarding the offload gomp_target_ext call
to the host by setting values in a struct on the device and querying it on
the host - invoking gomp_target_rev on the result.

include/ChangeLog:

	* cuda/cuda.h (enum CUdevice_attribute): Add
	CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING.
	(CU_MEMHOSTALLOC_DEVICEMAP): Define.
	(cuMemHostAlloc): Add prototype.

libgomp/ChangeLog:

	* config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Remove
	'static' for this variable.
	* config/nvptx/libgomp-nvptx.h: New file.
	* config/nvptx/target.c: Include it.
	(GOMP_ADDITIONAL_ICVS): Declare extern var.
	(GOMP_REV_OFFLOAD_VAR): Declare var.
	(GOMP_target_ext): Handle reverse offload.
	* libgomp-plugin.h (GOMP_PLUGIN_target_rev): New prototype.
	* libgomp-plugin.c (GOMP_PLUGIN_target_rev): New, call ...
	* target.c (gomp_target_rev): ... this new stub function.
	* libgomp.h (gomp_target_rev): Declare.
	* libgomp.map (GOMP_PLUGIN_1.4): New; add GOMP_PLUGIN_target_rev.
	* plugin/cuda-lib.def (cuMemHostAlloc): Add.
	* plugin/plugin-nvptx.c: Include libgomp-nvptx.h.
	(struct ptx_device): Add rev_data member. 
	(nvptx_open_device): Remove async_engines query, last used in
	r10-304-g1f4c5b9b; add unified-address assert check.
	(GOMP_OFFLOAD_get_num_devices): Claim unified address
	support.
	(GOMP_OFFLOAD_load_image): Free rev_fn_table if no
	offload functions exist. Make offload var available
	on host and device.
	(rev_off_dev_to_host_cpy, rev_off_host_to_dev_cpy): New.
	(GOMP_OFFLOAD_run): Handle reverse offload.

 include/cuda/cuda.h  |   3 +
 libgomp/config/nvptx/icv-device.c|   2 +-
 libgomp/config/nvptx/libgomp-nvptx.h |  51 +
 libgomp/config/nvptx/target.c|  54 +++---
 libgomp/libgomp-plugin.c |  12 
 libgomp/libgomp-plugin.h |   7 +++
 libgomp/libgomp.h|   5 ++
 libgomp/libgomp.map  |   5 ++
 libgomp/plugin/cuda-lib.def  |   1 +
 libgomp/plugin/plugin-nvptx.c| 107 +++
 libgomp/target.c |  19 +++
 11 files changed, 248 insertions(+), 18 deletions(-)

diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h
index 3938d05..e081f04 100644
--- a/include/cuda/cuda.h
+++ b/include/cuda/cuda.h
@@ -77,6 +77,7 @@ typedef enum {
   CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
   CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
   CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
+  CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
   CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
 } CUdevice_attribute;
 
@@ -113,6 +114,7 @@ enum {
 #define CU_LAUNCH_PARAM_END ((void *) 0)
 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void *) 1)
 #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void *) 2)
+#define CU_MEMHOSTALLOC_DEVICEMAP 0x02U
 
 enum {
   CU_STREAM_DEFAULT = 0,
@@ -169,6 

Re: [PATCH v2] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-10-12 Thread Iain Sandoe
Hi Kewen,

> On 12 Oct 2022, at 09:12, Kewen.Lin  wrote:

> PR106680 shows that -m32 -mpowerpc64 is different from
> -mpowerpc64 -m32, this is determined by the way how we
> handle option powerpc64 in rs6000_handle_option.
> 
> Segher pointed out this difference should be taken as
> a bug and we should ensure that option powerpc64 is
> independent of -m32/-m64.  So this patch removes the
> handlings in rs6000_handle_option and add some necessary
> supports in rs6000_option_override_internal instead.
> 
> With this patch, if users specify -m{no-,}powerpc64, the
> specified value is honoured, otherwise, for 64bit it
> always enables OPTION_MASK_POWERPC64; while for 32bit
> and TARGET_POWERPC64 and OS_MISSING_POWERPC64, it disables
> OPTION_MASK_POWERPC64.
> 
> btw, following Segher's suggestion, I did some tries to warn
> when OPTION_MASK_POWERPC64 is set for OS_MISSING_POWERPC64.
> If warn for the case that powerpc64 is specified explicitly,
> there are some TCs using -m32 -mpowerpc64 on ppc64-linux,
> they need some updates, meanwhile the artificial run
> with "--target_board=unix'{-m32/-mpowerpc64}'" will have
> noisy warnings on ppc64-linux.  If warn for the case that
> it's specified implicitly, they can just be initialized by
> TARGET_DEFAULT (like -m32 on ppc64-linux) or set from the 
> given cpu mask, we have to special case them and not to warn.
> As Segher's latest comment, I decide not to warn them and
> keep it consistent with before.
> 
> Bootstrapped and regress-tested on:
>  - powerpc64-linux-gnu P7 and P8 {-m64,-m32}
>  - powerpc64le-linux-gnu P9 and P10
>  - powerpc-ibm-aix7.2.0.0 {-maix64,-maix32}
> 
> Hi Iain, could you help to test this new patch on darwin
> again?  Thanks in advance!

I kicked off a bootstrap - and 'check-gcc-c' .. if all goes well, there will be 
an 
answer in ≈ 7hours.  If something fails, the answer will be sooner ;)
cheers
Iain

> 
> Is it ok for trunk if darwin testing goes well?
> 
> BR,
> Kewen
> -
>   PR target/106680
> 
> gcc/ChangeLog:
> 
>   * common/config/rs6000/rs6000-common.cc (rs6000_handle_option): Remove
>   the adjustment for option powerpc64 in -m64 handling, and remove the
>   whole -m32 handling.
>   * config/rs6000/rs6000.cc (rs6000_option_override_internal): When no
>   explicit powerpc64 option is provided, enable it for -m64.  For 32 bit
>   and OS_MISSING_POWERPC64, disable powerpc64 if it's enabled but not
>   specified explicitly.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/pr106680-1.c: New test.
>   * gcc.target/powerpc/pr106680-2.c: New test.
>   * gcc.target/powerpc/pr106680-3.c: New test.
>   * gcc.target/powerpc/pr106680-4.c: New test.
> 
> 2022-10-12  Kewen Lin  
>   Iain Sandoe  
> ---
> gcc/common/config/rs6000/rs6000-common.cc | 11 --
> gcc/config/rs6000/rs6000.cc   | 37 ++-
> gcc/testsuite/gcc.target/powerpc/pr106680-1.c | 13 +++
> gcc/testsuite/gcc.target/powerpc/pr106680-2.c | 14 +++
> gcc/testsuite/gcc.target/powerpc/pr106680-3.c | 13 +++
> gcc/testsuite/gcc.target/powerpc/pr106680-4.c | 17 +
> 6 files changed, 85 insertions(+), 20 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106680-1.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106680-2.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106680-3.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106680-4.c
> 
> diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
> b/gcc/common/config/rs6000/rs6000-common.cc
> index 8e393d08a23..c76b5c27bb6 100644
> --- a/gcc/common/config/rs6000/rs6000-common.cc
> +++ b/gcc/common/config/rs6000/rs6000-common.cc
> @@ -119,19 +119,8 @@ rs6000_handle_option (struct gcc_options *opts, struct 
> gcc_options *opts_set,
> #else
> case OPT_m64:
> #endif
> -  opts->x_rs6000_isa_flags |= OPTION_MASK_POWERPC64;
>   opts->x_rs6000_isa_flags |= (~opts_set->x_rs6000_isa_flags
>  & OPTION_MASK_PPC_GFXOPT);
> -  opts_set->x_rs6000_isa_flags |= OPTION_MASK_POWERPC64;
> -  break;
> -
> -#ifdef TARGET_USES_AIX64_OPT
> -case OPT_maix32:
> -#else
> -case OPT_m32:
> -#endif
> -  opts->x_rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
> -  opts_set->x_rs6000_isa_flags |= OPTION_MASK_POWERPC64;
>   break;
> 
> case OPT_mminimal_toc:
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index e6fa3ad0eb7..e37d99deb61 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -3648,17 +3648,12 @@ rs6000_option_override_internal (bool global_init_p)
>   rs6000_pointer_size = 32;
> }
> 
> -  /* Some OSs don't support saving the high part of 64-bit registers on 
> context
> - switch.  Other OSs don't support saving Altivec registers.  On those 
> OSs,
> - we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC 
> settings;
> - if the user wants

Re: [RFC] Teach vectorizer to deal with bitfield reads

2022-10-12 Thread Eric Botcazou via Gcc-patches
> Let me know if you believe this is a good approach? I've ran regression
> tests and this hasn't broken anything so far...

Small regression in Ada though, probably a missing guard somewhere:

=== gnat tests ===


Running target unix
FAIL: gnat.dg/loop_optimization23.adb 3 blank line(s) in output
FAIL: gnat.dg/loop_optimization23.adb (test for excess errors)
UNRESOLVED: gnat.dg/loop_optimization23.adb compilation failed to produce 
execut
able
FAIL: gnat.dg/loop_optimization23_pkg.adb 3 blank line(s) in output
FAIL: gnat.dg/loop_optimization23_pkg.adb (test for excess errors)

In order to reproduce, configure the compiler with Ada enabled, build it, and 
copy $[srcdir)/gcc/testsuite/gnat.dg/loop_optimization23_pkg.ad[sb] into the 
build directory, then just issue:

gcc/gnat1 -quiet loop_optimization23_pkg.adb -O2 -Igcc/ada/rts

eric@fomalhaut:~/build/gcc/native> gcc/gnat1 -quiet 
loop_optimization23_pkg.adb -O2 -Igcc/ada/rts
during GIMPLE pass: vect
+===GNAT BUG DETECTED==+
| 13.0.0 20221012 (experimental) [master ca7f7c3f140] (x86_64-suse-linux) GCC 
error:|
| in exact_div, at poly-int.h:2232 |
| Error detected around loop_optimization23_pkg.adb:5:3|
| Compiling loop_optimization23_pkg.adb

-- 
Eric Botcazou




Re: Restore default 'sorry' 'TARGET_ASM_CONSTRUCTOR', 'TARGET_ASM_DESTRUCTOR' (was: [PATCH 1/3] STABS: remove -gstabs and -gxcoff functionality)

2022-10-12 Thread Martin Liška
On 10/10/22 16:19, Thomas Schwinge wrote:
> Hi!
> 
> On 2022-09-01T12:05:23+0200, Martin Liška  wrote:
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>
>> I've also built all cross compilers.
> 
> First: thanks for that: clean up plus "built all cross compilers"!
> 
> But yet, I've now tracked down an issue related to these changes,
> apparently only visible via the nvptx back end -- and quite
> non-obvious...  ;-)
> 
>> --- a/gcc/config/nvptx/nvptx.cc
>> +++ b/gcc/config/nvptx/nvptx.cc
>> @@ -52,7 +52,6 @@
>>  #include "tm-preds.h"
>>  #include "tm-constrs.h"
>>  #include "langhooks.h"
>> -#include "dbxout.h"
>>  #include "cfgrtl.h"
>>  #include "gimple.h"
>>  #include "stor-layout.h"
> 
>> --- a/gcc/dbxout.cc
>> +++ /dev/null
>> @@ -1,3936 +0,0 @@
>> -/* Output dbx-format symbol table information from GNU compiler.
> 
> The "dbx-format symbol table information" stuff indeed is not relevant
> anymore, but:
> 
>> -/* Record an element in the table of global destructors.  SYMBOL is
>> -   a SYMBOL_REF of the function to be called; PRIORITY is a number
>> -   between 0 and MAX_INIT_PRIORITY.  */
>> -
>> -void
>> -default_stabs_asm_out_destructor (rtx symbol ATTRIBUTE_UNUSED,
>> -   int priority ATTRIBUTE_UNUSED)
>> -{
>> -#if defined DBX_DEBUGGING_INFO || defined XCOFF_DEBUGGING_INFO
>> -  /* Tell GNU LD that this is part of the static destructor set.
>> - This will work for any system that uses stabs, most usefully
>> - aout systems.  */
>> -  dbxout_begin_simple_stabs ("___DTOR_LIST__", 22 /* N_SETT */);
>> -  dbxout_stab_value_label (XSTR (symbol, 0));
>> -#else
>> -  sorry ("global destructors not supported on this target");
>> -#endif
>> -}
>> -
>> -/* Likewise for global constructors.  */
>> -
>> -void
>> -default_stabs_asm_out_constructor (rtx symbol ATTRIBUTE_UNUSED,
>> -int priority ATTRIBUTE_UNUSED)
>> -{
>> -#if defined DBX_DEBUGGING_INFO || defined XCOFF_DEBUGGING_INFO
>> -  /* Tell GNU LD that this is part of the static destructor set.
>> - This will work for any system that uses stabs, most usefully
>> - aout systems.  */
>> -  dbxout_begin_simple_stabs ("___CTOR_LIST__", 22 /* N_SETT */);
>> -  dbxout_stab_value_label (XSTR (symbol, 0));
>> -#else
>> -  sorry ("global constructors not supported on this target");
>> -#endif
>> -}
> 
>> --- a/gcc/dbxout.h
>> +++ /dev/null
>> @@ -1,60 +0,0 @@
>> -/* dbxout.h - Various declarations for functions found in dbxout.cc
> 
>> -extern void default_stabs_asm_out_destructor (rtx, int);
>> -extern void default_stabs_asm_out_constructor (rtx, int);
> 
> ... these two functions, 'default_stabs_asm_out_constructor',
> 'default_stabs_asm_out_destructor' (specifically, now their 'sorry'
> branches only) used to serve as default 'TARGET_ASM_CONSTRUCTOR',
> 'TARGET_ASM_DESTRUCTOR' via...
> 
>> --- a/gcc/target-def.h
>> +++ b/gcc/target-def.h
> 
> |  #if !defined(TARGET_ASM_CONSTRUCTOR) && !defined(USE_COLLECT2)
> |  # ifdef CTORS_SECTION_ASM_OP
> |  #  define TARGET_ASM_CONSTRUCTOR default_ctor_section_asm_out_constructor
>>  # else
>>  #  ifdef TARGET_ASM_NAMED_SECTION
>>  #   define TARGET_ASM_CONSTRUCTOR default_named_section_asm_out_constructor
>> -#  else
>> -#   define TARGET_ASM_CONSTRUCTOR default_stabs_asm_out_constructor
>>  #  endif
>>  # endif
>>  #endif
>> @@ -74,8 +72,6 @@
> |  #if !defined(TARGET_ASM_DESTRUCTOR) && !defined(USE_COLLECT2)
> |  # ifdef DTORS_SECTION_ASM_OP
> |  #  define TARGET_ASM_DESTRUCTOR default_dtor_section_asm_out_destructor
>>  # else
>>  #  ifdef TARGET_ASM_NAMED_SECTION
>>  #   define TARGET_ASM_DESTRUCTOR default_named_section_asm_out_destructor
>> -#  else
>> -#   define TARGET_ASM_DESTRUCTOR default_stabs_asm_out_destructor
>>  #  endif
>>  # endif
>>  #endif
> 
> ... this setup here (manually added some more context to the 'diff').
> 
> That is, if a back end was not 'USE_COLLECT2', nor manually defined
> 'TARGET_ASM_CONSTRUCTOR', 'TARGET_ASM_DESTRUCTOR', or got pointed to the
> respective 'default_[...]'  functions due to 'CTORS_SECTION_ASM_OP',
> 'DTORS_SECTION_ASM_OP', or 'TARGET_ASM_NAMED_SECTION', it got pointed to
> 'default_stabs_asm_out_constructor', 'default_stabs_asm_out_destructor'.
> These would emit 'sorry' for any global constructor/destructor they're
> run into.
> 
> This is now gone, and thus in such a back end configuration case
> 'TARGET_ASM_CONSTRUCTOR', 'TARGET_ASM_DESTRUCTOR' don't get defined
> anymore, and thus the subsequently following:
> 
> #if !defined(TARGET_HAVE_CTORS_DTORS)
> # if defined(TARGET_ASM_CONSTRUCTOR) && defined(TARGET_ASM_DESTRUCTOR)
> # define TARGET_HAVE_CTORS_DTORS true
> # endif
> #endif
> 
> ... doesn't define 'TARGET_HAVE_CTORS_DTORS' anymore, and thus per my
> understanding, 'gcc/final.cc:rest_of_handle_final':
> 
> if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
> && targetm.have_ctors_dtors)
>   targetm.asm_out.constructor (XEXP 

Re: [PATCH][RFT] Vectorization of first-order recurrences

2022-10-12 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> +  /* First-order recurrence autovectorization needs to handle permutation
> + with indices = [nunits-1, nunits, nunits+1, ...].  */
> +  vec_perm_builder sel (nunits, 1, 3);
> +  for (int i = 0; i < 3; ++i)
> +sel.quick_push (nunits - dist + i);
> +  vec_perm_indices indices (sel, 1, nunits * 2);

Should be:

  vec_perm_indices indices (sel, 2, nunits);

With that change, the patch passes testing on SVE.  vect-recurr-6.c
fails to vectorise, but I think that's because SVE doesn't yet support
the required permute.

Thanks,
Richard


Re: [PATCH] middle-end IFN_ASSUME support [PR106654]

2022-10-12 Thread Jakub Jelinek via Gcc-patches
On Tue, Oct 11, 2022 at 02:05:52PM -0400, Andrew MacLeod wrote:
> > Aldy, could ranger handle this?  If it sees .ASSUME call,
> > walk the body of such function from the edge(s) to exit with the
> > assumption that the function returns true, so above set _2 [true, true]
> > and from there derive that i_1(D) [43, 43] and then map the argument
> > in the assumption function to argument passed to IFN_ASSUME (note,
> > args there are shifted by 1)?
> 
> 
> Ranger GORI component could assume the return value is [1,1] and work
> backwards from there. Single basic blocks would be trivial. The problem
> becomes when there are multiple blocks.   The gori engine has no real
> restriction other than it works from within a basic block only
> 
> I see no reason we couldn't wire something up that continues propagating
> values out the top of the block evaluating things for more complicated
> cases.  you would end up with a set of ranges for names which are the
> "maximal" possible range based on the restriction that the return value is
> [1,1].
> 
> 
> > During gimplification it actually gimplifies it into
> >D.2591 = .ASSUME ();
> >if (D.2591 != 0) goto ; else goto ;
> >:
> >{
> >  i = i + 1;
> >  D.2591 = i == 44;
> >}
> >:
> >.ASSUME (D.2591);
> > with the condition wrapped into a GIMPLE_BIND (I admit the above isn't
> > extra clean but it is just something to hold it from gimplifier until
> > gimple low pass; it reassembles if (condition_never_true) { cond; };
> 
> 
> What we really care about is what the SSA form looks like.. thats what
> ranger will deal with.

Sure.

> Is this function inlined?  If it isn't then you'd need LTO/IPA to propagate

Never (the code is not supposed to be actually executed at runtime ever,
it is purely as if, if this function would be executed, then it would return
true, otherwise it would be UB).  But the goal is of course to inline stuff
into it and optimize the function even post IPA.

> the ranges we calculated above for the function. Or some special pass that
> reads assumes, does the processing you mention above and applies it?  Is
> that what you are thinking?

The options would be to evaluate it each time ranger processes .ASSUME,
or to perform this backwards range propagation somewhere late during post
IPA optimizations of the cfun->assume_function and remember it somewhere
(e.g. in SSA_NAME_RANGE_INFO of the default defs of the params) and then
when visiting .ASSUME just look those up.  I think the latter is better,
we'd do it only once - the assumption that the function returns true after
the assume function itself is optimized will always be the same.
It could be a separate pass (gated on fun->assume_function, so done only
for them) somewhere shortly before expansion to RTL (which is what isn't
done and nothing later for those), or could be done say in VRP2 or some
other existing late pass.

> Looking at assume7.C, I see:
> 
> int bar (int x)
> {
>    [local count: 1073741824]:
>   .ASSUME (_Z3bari._assume.0, x_1(D));
>   return x_1(D);
> 
> And:
> 
> bool _Z3bari._assume.0 (int x)
> {
>   bool _2;
> 
>    [local count: 1073741824]:
>   _2 = x_1(D) == 42;
>   return _2;
> 
> 
> Using the above approach, GORI could tell you that if _2 is [1,1] that x_1
> must be [42,42].
> 
> If you are parsing that ASSUME, you could presumably match things pu and we
> could make x_1 have a range of [42,42] in bar() at that call.

If we cache the range info for the assume_function arguments the above way
on SSA_NAME_RANGE_INFO, then you'd just see .ASSUME call and for (n+1)th
argument find nth argument of the 1st argument FUNCTION_DECL's
DECL_ARGUMENTS, ssa_default_def (DECL_STRUCT_FUNCTION (assume_fndecl), parm)
and just union the current range of (n+1)th argument with
SSA_NAME_RANGE_INFO of the ssa_default_def (if non-NULL).
> 
> this would require a bit of processing in fold_using_range for handling
> function calls, checking for this case and so on, but quite doable.
> 
> looking at the more complicated case for
> 
> bool _Z3bazi._assume.0 (int x)
> 
> it seems that the answer is determines without processing most of the
> function. ie:, work from the bottom up:
> 
>    [local count: 670631318]:
>   _8 = x_3 == 43;   x_3 = [43,43]
> 
>    [local count: 1073741824]:
>   # _1 = PHI <0(2), _8(5)>  _8 = [1,1]  2->6 cant happen
>   return _1;    _1 = [1,1]
> 
> you only care about x, so as soon as you find a result that that, you'd
> actually be done.   However, I can imagine cases where you do need to go all
> the way back to the top of the assume function.. and combine values. Ie
> 
> bool assume (int x, int y)
> {
>   if (y > 10)
>     return x == 2;
>   return x > 20;
> }
> 
>    [local count: 1073741824]:
>   if (y_2(D) > 10)
>     goto ; [34.00%]
>   else
>     goto ; [66.00%]
> 
>    [local count: 365072224]:
>   _5 = x_3(D) == 2;                    x_3 = [2,2]
>   goto ; [100.00%]
> 
>    [local

Re: [PATCH] machmode: Introduce GET_MODE_NEXT_MODE with previous GET_MODE_WIDER_MODE meaning, add new GET_MODE_WIDER_MODE

2022-10-12 Thread Richard Sandiford via Gcc-patches
Jakub Jelinek  writes:
> On Wed, Oct 05, 2022 at 04:02:25PM -0400, Jason Merrill wrote:
>> > > > @@ -5716,7 +5716,13 @@ emit_store_flag_1 (rtx target, enum rtx_
>> > > >{
>> > > > machine_mode optab_mode = mclass == MODE_CC ? CCmode : 
>> > > > compare_mode;
>> > > > icode = optab_handler (cstore_optab, optab_mode);
>> > > > - if (icode != CODE_FOR_nothing)
>> > > > + if (icode != CODE_FOR_nothing
>> > > > +   /* Don't consider [BH]Fmode as usable wider mode, as neither is
>> > > > +  a subset or superset of the other.  */
>> > > > +   && (compare_mode == mode
>> > > > +   || !SCALAR_FLOAT_MODE_P (compare_mode)
>> > > > +   || maybe_ne (GET_MODE_PRECISION (compare_mode),
>> > > > +GET_MODE_PRECISION (mode
>> > > 
>> > > Why do you need to do this here (and in prepare_cmp_insn, and similarly 
>> > > in
>> > > can_compare_p)?  Shouldn't get_wider skip over modes that are not 
>> > > actually
>> > > wider?
>> > 
>> > I'm afraid too many places rely on all modes of a certain class to be
>> > visible when walking from "narrowest" to "widest" mode, say
>> > FOR_EACH_MODE_IN_CLASS/FOR_EACH_MODE/FOR_EACH_MODE_UNTIL/FOR_EACH_WIDER_MODE
>> > etc. wouldn't work at all if GET_MODE_WIDER_MODE (BFmode) == SFmode
>> > && GET_MODE_WIDER_MODE (HFmode) == SFmode.
>> 
>> Yes, it seems they need to change now that their assumptions have been
>> violated.  I suppose FOR_EACH_MODE_IN_CLASS would need to change to not use
>> get_wider, and users of FOR_EACH_MODE/FOR_EACH_MODE_UNTIL need to decide
>> whether they want an iteration that uses get_wider (likely with a new name)
>> or not.
>
> Here is a patch which does that.
> Passes bootstrap/regtest on x86_64-linux and i686-linux.
>
> Though I admit I didn't go carefully through all 24 GET_MODE_WIDER_MODE
> uses, 54 FOR_EACH_MODE_IN_CLASS uses, 3 FOR_EACH_MODE uses, 24
> FOR_EACH_MODE_FROM, 6 FOR_EACH_MODE_UNTIL and 15 FOR_EACH_WIDER_MODE uses.
> It is more important to go through the GET_MODE_WIDER_MODE and
> FOR_EACH_WIDER_MODE uses because the patch changes behavior for those,
> the rest keep their previous meaning and so can be changed incrementally
> if the other meaning is desirable to them (I've of course changed the 3
> spots I had to change in the previous BFmode patch and whatever triggered
> during the bootstraps).
>
> Thoughts on this?

Looks good to me, just some minor comments below.

> 2022-10-12  Jakub Jelinek  
>
>   * genmodes.cc (emit_mode_wider): Emit previous content of
>   mode_wider array into mode_next array and for mode_wider
>   emit always VOIDmode for !CLASS_HAS_WIDER_MODES_P classes,
>   otherwise skip through modes with the same precision.
>   * machmode.h (mode_next): Declare.
>   (GET_MODE_NEXT_MODE): New inline function.
>   (mode_iterator::get_next, mode_iterator::get_known_next): New
>   function templates.
>   (FOR_EACH_MODE_IN_CLASS): Use get_next instead of get_wider.
>   (FOR_EACH_MODE): Use get_known_next instead of get_known_wider.
>   (FOR_EACH_MODE_FROM): Use get_next instead of get_wider.
>   (FOR_EACH_WIDER_MODE_FROM): Define.
>   (FOR_EACH_NEXT_MODE): Define.
>   * expmed.cc (emit_store_flag_1): Use FOR_EACH_WIDER_MODE_FROM
>   instead of FOR_EACH_MODE_FROM.
>   * optabs.cc (prepare_cmp_insn): Likewise.  Remove redundant
>   !CLASS_HAS_WIDER_MODES_P check.
>   (prepare_float_lib_cmp): Use FOR_EACH_WIDER_MODE_FROM instead of
>   FOR_EACH_MODE_FROM.
>   * config/i386/i386-expand.cc (get_mode_wider_vector): Use
>   GET_MODE_NEXT_MODE instead of GET_MODE_WIDER_MODE.
>
> --- gcc/genmodes.cc.jj2022-05-23 21:44:48.080857253 +0200
> +++ gcc/genmodes.cc   2022-10-11 22:35:39.680286764 +0200
> @@ -1527,7 +1527,7 @@ emit_mode_wider (void)
>int c;
>struct mode_data *m;
>  
> -  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
> +  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
>  
>for_all_modes (c, m)
>  tagged_printf ("E_%smode",
> @@ -1535,6 +1535,37 @@ emit_mode_wider (void)
>  m->name);
>  
>print_closer ();
> +  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
> +
> +  for_all_modes (c, m)
> +{
> +  struct mode_data *m2 = 0;
> +
> +  if (m->cl == MODE_INT
> +   || m->cl == MODE_PARTIAL_INT
> +   || m->cl == MODE_FLOAT
> +   || m->cl == MODE_DECIMAL_FLOAT
> +   || m->cl == MODE_COMPLEX_FLOAT
> +   || m->cl == MODE_FRACT
> +   || m->cl == MODE_UFRACT
> +   || m->cl == MODE_ACCUM
> +   || m->cl == MODE_UACCUM)
> + for (m2 = m->wider; m2 && m2 != void_mode; m2 = m2->wider)
> +   {
> + if (m2->bytesize == m->bytesize
> + && m2->precision == m->precision)
> +   continue;
> + break;
> +   }
> +
> +  if (m2 == void_mode)
> + m2 = 0;
> +  tagged_printf ("E_%smode",
> +

[PATCH] Add condition coverage profiling

2022-10-12 Thread Jørgen Kvalsvik via Gcc-patches
This patch adds support in gcc+gcov for modified condition/decision
coverage (MC/DC) with the -fprofile-conditions flag. MC/DC is a type of
test/code coverage and it is particularly important in the avation and
automotive industries for safety-critical applications. MC/DC it is
required for or recommended by:

* DO-178C for the most critical software (Level A) in avionics
* IEC 61508 for SIL 4
* ISO 26262-6 for ASIL D

>From the SQLite webpage:

Two methods of measuring test coverage were described above:
"statement" and "branch" coverage. There are many other test
coverage metrics besides these two. Another popular metric is
"Modified Condition/Decision Coverage" or MC/DC. Wikipedia defines
MC/DC as follows:

* Each decision tries every possible outcome.
* Each condition in a decision takes on every possible outcome.
* Each entry and exit point is invoked.
* Each condition in a decision is shown to independently affect
  the outcome of the decision.

In the C programming language where && and || are "short-circuit"
operators, MC/DC and branch coverage are very nearly the same thing.
The primary difference is in boolean vector tests. One can test for
any of several bits in bit-vector and still obtain 100% branch test
coverage even though the second element of MC/DC - the requirement
that each condition in a decision take on every possible outcome -
might not be satisfied.

https://sqlite.org/testing.html#mcdc

Wahlen, Heimdahl, and De Silva "Efficient Test Coverage Measurement for
MC/DC" describes an algorithm for adding instrumentation by carrying
over information from the AST, but my algorithm analyses the the control
flow graph to instrument for coverage. This has the benefit of being
programming language independent and faithful to compiler decisions
and transformations, although I have only tested it on constructs in C
and C++, see testsuite/gcc.misc-tests and testsuite/g++.dg.

Like Wahlen et al this implementation records coverage in fixed-size
bitsets which gcov knows how to interpret. This is very fast, but
introduces a limit on the number of terms in a single boolean
expression, the number of bits in a gcov_unsigned_type (which is
typedef'd to uint64_t), so for most practical purposes this would be
acceptable. This limitation is in the implementation and not the
algorithm, so support for more conditions can be added by also
introducing arbitrary-sized bitsets.

For space overhead, the instrumentation needs two accumulators
(gcov_unsigned_type) per condition in the program which will be written
to the gcov file. In addition, every function gets a pair of local
accumulators, but these accmulators are reused between conditions in the
same function.

For time overhead, there is a zeroing of the local accumulators for
every condition and one or two bitwise operation on every edge taken in
the an expression.

In action it looks pretty similar to the branch coverage. The -g short
opt carries no significance, but was chosen because it was an available
option with the upper-case free too.

gcov --conditions:

3:   17:void fn (int a, int b, int c, int d) {
3:   18:if ((a && (b || c)) && d)
condition outcomes covered 3/8
condition  0 not covered (true false)
condition  1 not covered (true)
condition  2 not covered (true)
condition  3 not covered (true)
1:   19:x = 1;
-:   20:else
2:   21:x = 2;
3:   22:}

gcov --conditions --json-format:

"conditions": [
{
"not_covered_false": [
0
],
"count": 8,
"covered": 3,
"not_covered_true": [
0,
1,
2,
3
]
}
],

Some expressions, mostly those without else-blocks, are effectively
"rewritten" in the CFG construction making the algorithm unable to
distinguish them:

and.c:

if (a && b && c)
x = 1;

ifs.c:

if (a)
if (b)
if (c)
x = 1;

gcc will build the same graph for both these programs, and gcov will
report boths as 3-term expressions. It is vital that it is not
interpreted the other way around (which is consistent with the shape of
the graph) because otherwise the masking would be wrong for the and.c
program which is a more severe error. While surprising, users would
probably expect some minor rewriting of semantically-identical
expressions.

and.c.gcov:
#:2:if (a && b && c)
decisions covered 6/6
#:3:x = 1;

ifs.c.gcov:
#:2:if (a)
#:3:if (b)
#:4:if (c)
#:5:x = 1;
condition outcomes covered 6/6

Adding else clauses alters the program (ifs.c can have 3 elses, and.c
only 1) and coverage becomes less surprising

ifs.c.gcov:
#:2:if (a)
condi

Re: [PATCH] machmode: Introduce GET_MODE_NEXT_MODE with previous GET_MODE_WIDER_MODE meaning, add new GET_MODE_WIDER_MODE

2022-10-12 Thread Eric Botcazou via Gcc-patches
> Though I admit I didn't go carefully through all 24 GET_MODE_WIDER_MODE
> uses, 54 FOR_EACH_MODE_IN_CLASS uses, 3 FOR_EACH_MODE uses, 24
> FOR_EACH_MODE_FROM, 6 FOR_EACH_MODE_UNTIL and 15 FOR_EACH_WIDER_MODE uses.
> It is more important to go through the GET_MODE_WIDER_MODE and
> FOR_EACH_WIDER_MODE uses because the patch changes behavior for those,
> the rest keep their previous meaning and so can be changed incrementally
> if the other meaning is desirable to them (I've of course changed the 3
> spots I had to change in the previous BFmode patch and whatever triggered
> during the bootstraps).
> 
> Thoughts on this?

Can't we declare that one is wider than the other, for example BFmode since it 
has got a larger range?  Though I guess this would mean special-casing them in 
genmodes.cc as they are presumably strictly identical except for the format.

-- 
Eric Botcazou




Re: [PATCH] machmode: Introduce GET_MODE_NEXT_MODE with previous GET_MODE_WIDER_MODE meaning, add new GET_MODE_WIDER_MODE

2022-10-12 Thread Jakub Jelinek via Gcc-patches
On Wed, Oct 12, 2022 at 12:37:39PM +0200, Eric Botcazou wrote:
> > Though I admit I didn't go carefully through all 24 GET_MODE_WIDER_MODE
> > uses, 54 FOR_EACH_MODE_IN_CLASS uses, 3 FOR_EACH_MODE uses, 24
> > FOR_EACH_MODE_FROM, 6 FOR_EACH_MODE_UNTIL and 15 FOR_EACH_WIDER_MODE uses.
> > It is more important to go through the GET_MODE_WIDER_MODE and
> > FOR_EACH_WIDER_MODE uses because the patch changes behavior for those,
> > the rest keep their previous meaning and so can be changed incrementally
> > if the other meaning is desirable to them (I've of course changed the 3
> > spots I had to change in the previous BFmode patch and whatever triggered
> > during the bootstraps).
> > 
> > Thoughts on this?
> 
> Can't we declare that one is wider than the other, for example BFmode since 
> it 
> has got a larger range?  Though I guess this would mean special-casing them 
> in 
> genmodes.cc as they are presumably strictly identical except for the format.

That doesn't work, one of the modes has larger range, the other has larger
floating point precision.  So, neither of the modes is a subset or superset
of the other.  If we don't handle a particular optab for one of these modes
and allow widening, for both of these modes we want to try SFmode next
(which is a true superset of both modes, it has the same range as BFmode
but higher floating point precision than both HFmode and BFmode).

The only way to work around this widening problem would be always make sure
that whenever we implement any optab for HFmode, we also implement the same
optab for BFmode under exact same conditions and vice versa, even if those
optabs just do by hand whatever the generic code would do if the optab
didn't exist.  But that is way too limiting.

It is true that on PowerPC we have a similar situation for the widest
floating point modes, TFmode/IFmode/KFmode and the backend has the ugly hack
of pretending they have different GET_MODE_PRECISION, but as those are the
widest modes and are implemented in hardware or in software emulation, they
have in the backends the same optabs implemented.  While for HFmode/BFmode,
very few optabs are actually implemented directly and the usual intended use
is performing most arithmetics in SFmode.  Even on PowerPC,
ibm_extended_format and ieee_quad_format are neither subset nor superset of
each other, the latter has larger range and in most cases higher floating
point precision, but the former for certain values can have even 10 times
higher floating point precision.

Jakub



Re: [PATCH] Fortran: check types of operands of arithmetic binary operations [PR107217]

2022-10-12 Thread Mikael Morin

Le 11/10/2022 à 22:23, Harald Anlauf via Fortran a écrit :

Dear all,

we need to check that the operands of arithmetic binary operations
are consistent and of numeric type.

The PR reported an issue for multiplication ("*"), but we better
extend this to the other binary operations.

I chose the following solution:
- consistent types for +,-,*,/, keeping an internal error if any
   unhandled type shows up,


I thought it was insufficient for cases where types are consistent but 
invalid, for example:

   print *, [real :: ([.true.])] / [real :: ([.false.])]
but this case is properly caught, and a few other as well, so no problem.


- numeric types for **

Regtested on x86_64-pc-linux-gnu.  OK for mainline?


Yes, thanks.


[PATCH] machmode, v2: Introduce GET_MODE_NEXT_MODE with previous GET_MODE_WIDER_MODE meaning, add new GET_MODE_WIDER_MODE

2022-10-12 Thread Jakub Jelinek via Gcc-patches
On Wed, Oct 12, 2022 at 11:15:40AM +0100, Richard Sandiford wrote:
> Looks good to me, just some minor comments below.

Here is an updated patch.

> How robust is the mechanism that guarantees HF comes before BF,
> and so is the mode that appears in the (new) wider list?

genmodes.cc seems to have cmp_modes which does a lot of different
comparisons to make sure it is a total order.
I think the BFmode vs. HFmode ordering is about the last case:
  if (m->counter < n->counter)
return -1;
  else
return 1;
there because everything else is equal and ->counter is about which
mode is declared first in *-modes.def.
And my code for the new mode_wider in genmodes.cc always uses VOIDmode
for !CLASS_HAS_WIDER_MODES_P classes and for CLASS_HAS_WIDER_MODES_P
classes provides a subset of the total ordering, in the already computed
->wider chain it skips modes with the same size/precision.

> > -  /* Set mode iterator *ITER to the next widest mode in the same class.
> > +  /* Set mode iterator *ITER to the next wider mode in the same class.
> >   Such a mode is known to exist.  */
> 
> I'll take your word for it that this is correct. ;-)  I would say
> "next widest", but it's very likely that I'm wrong.

I'm not a native english speaker, but to me next with superlative would be
if we have the widest mode, next widest would be the one whose only
wider mode is the widest mode.

Everything else changed.

2022-10-12  Jakub Jelinek  

* genmodes.cc (emit_mode_wider): Emit previous content of
mode_wider array into mode_next array and for mode_wider
emit always VOIDmode for !CLASS_HAS_WIDER_MODES_P classes,
otherwise skip through modes with the same precision.
* machmode.h (mode_next): Declare.
(GET_MODE_NEXT_MODE): New inline function.
(mode_iterator::get_next, mode_iterator::get_known_next): New
function templates.
(FOR_EACH_MODE_IN_CLASS): Use get_next instead of get_wider.
(FOR_EACH_MODE): Use get_known_next instead of get_known_wider.
(FOR_EACH_MODE_FROM): Use get_next instead of get_wider.
(FOR_EACH_WIDER_MODE_FROM): Define.
(FOR_EACH_NEXT_MODE): Define.
* expmed.cc (emit_store_flag_1): Use FOR_EACH_WIDER_MODE_FROM
instead of FOR_EACH_MODE_FROM.
* optabs.cc (prepare_cmp_insn): Likewise.  Remove redundant
!CLASS_HAS_WIDER_MODES_P check.
(prepare_float_lib_cmp): Use FOR_EACH_WIDER_MODE_FROM instead of
FOR_EACH_MODE_FROM.
* config/i386/i386-expand.cc (get_mode_wider_vector): Use
GET_MODE_NEXT_MODE instead of GET_MODE_WIDER_MODE.

--- gcc/genmodes.cc.jj  2022-10-12 10:15:21.444381490 +0200
+++ gcc/genmodes.cc 2022-10-12 12:28:02.414528652 +0200
@@ -1527,7 +1527,7 @@ emit_mode_wider (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
 tagged_printf ("E_%smode",
@@ -1535,6 +1535,37 @@ emit_mode_wider (void)
   m->name);
 
   print_closer ();
+  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
+
+  for_all_modes (c, m)
+{
+  struct mode_data *m2 = 0;
+
+  if (m->cl == MODE_INT
+ || m->cl == MODE_PARTIAL_INT
+ || m->cl == MODE_FLOAT
+ || m->cl == MODE_DECIMAL_FLOAT
+ || m->cl == MODE_COMPLEX_FLOAT
+ || m->cl == MODE_FRACT
+ || m->cl == MODE_UFRACT
+ || m->cl == MODE_ACCUM
+ || m->cl == MODE_UACCUM)
+   for (m2 = m->wider; m2 && m2 != void_mode; m2 = m2->wider)
+ {
+   if (m2->bytesize == m->bytesize
+   && m2->precision == m->precision)
+ continue;
+   break;
+ }
+
+  if (m2 == void_mode)
+   m2 = 0;
+  tagged_printf ("E_%smode",
+m2 ? m2->name : void_mode->name,
+m->name);
+}
+
+  print_closer ();
   print_decl ("unsigned char", "mode_2xwider", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
--- gcc/machmode.h.jj   2022-10-12 10:15:21.491380846 +0200
+++ gcc/machmode.h  2022-10-12 12:33:36.795975117 +0200
@@ -28,6 +28,7 @@ extern const unsigned char mode_inner[NU
 extern CONST_MODE_NUNITS poly_uint16_pod mode_nunits[NUM_MACHINE_MODES];
 extern CONST_MODE_UNIT_SIZE unsigned char mode_unit_size[NUM_MACHINE_MODES];
 extern const unsigned short mode_unit_precision[NUM_MACHINE_MODES];
+extern const unsigned char mode_next[NUM_MACHINE_MODES];
 extern const unsigned char mode_wider[NUM_MACHINE_MODES];
 extern const unsigned char mode_2xwider[NUM_MACHINE_MODES];
 
@@ -760,7 +761,23 @@ GET_MODE_NUNITS (const T &mode)
 }
 #endif
 
-/* Get the next wider natural mode (eg, QI -> HI -> SI -> DI -> TI).  */
+/* Get the next natural mode (not narrower, eg, QI -> HI -> SI -> DI -> TI
+   or HF -> BF -> SF -> DF -> XF -> TF).  */
+
+template
+ALWAYS_INLINE opt_mode
+GET_MODE_NEXT_MODE (cons

Re: [PATCH] Complete __gnu_test::basic_string<>::compare support

2022-10-12 Thread Jonathan Wakely via Gcc-patches
On Wed, 10 Aug 2022 at 19:31, François Dumont via Libstdc++
 wrote:
>
> Here is another patch to complete __gnu_debug::basic_string<> Standard
> conformity. This one is adding the missing compare overloads.
>
> I also would like to propose to change how __gnu_debug::basic_string<>
> is tested. I considered activating  checks when
> _GLIBCXX_ASSERTIONS is defined but it turns out that to do so this
> light-debug mode should then also consider _GLIBCXX_DEBUG_PEDANTIC. I
> prefer to avoid this.
>
> So I restored previous behavior. I'm now checking for the
> _GLIBCXX_TEST_DEBUG_STRING macro to force usage of . This
> way I am testing it using:
>
> make check-debug CXXFLAGS=-D_GLIBCXX_TEST_DEBUG_STRING
>
>  libstdc++: Add __gnu_debug::basic_string<>::compare overloads
>
>  Rather than adding those implementations we are ading a:
>  using _Base::compare;
>
>  so that any compare method not implemented at __gnu_debug::basic_string
>  level are injected from the base class.
>
>  Also review how __gnu_debug::basic_string is tested. Now require to
> define
>  _GLIBCXX_TEST_DEBUG_STRING when running 'make check-debug'.
>
>  libstdc++-v3/ChangeLog
>
>  * include/debug/string: Add using _Base::compare.
>  (__gnu_debug::basic_string<>::compare(const
> basic_string<>&)): Remove.
>  (__gnu_debug::basic_string<>::compare(size_type, size_type,
> const basic_string<>&)):
>  Remove.
>  (__gnu_debug::basic_string<>::compare(size_type, size_type,
> const basic_string<>&,
>  size_type, size_type)): Remove.
>  * testsuite/util/testsuite_string.h
> [_GLIBCXX_TEST_DEBUG_STRING]: Include .
>  *
> testsuite/21_strings/basic_string/operations/compare/char/1.cc: Include
> testsuite_string.h
>  and use __gnu_test::string.
>  *
> testsuite/21_strings/basic_string/operations/compare/char/13650.cc:
> Likewise.
>  *
> testsuite/21_strings/basic_string/operations/compare/char/2.cc: Likewise.
>  *
> testsuite/21_strings/basic_string/operations/rfind/char/1.cc: Likewise.
>  *
> testsuite/21_strings/basic_string/operations/rfind/char/2.cc: Likewise.
>  *
> testsuite/21_strings/basic_string/operations/rfind/char/3.cc: Likewise.
>  *
> testsuite/21_strings/basic_string/operations/compare/wchar_t/1.cc:
> Include testsuite_string.h
>  and use __gnu_test::wstring.
>  *
> testsuite/21_strings/basic_string/operations/compare/wchar_t/13650.cc:
> Likewise.
>  *
> testsuite/21_strings/basic_string/operations/compare/wchar_t/2.cc: Likewise.
>
> Tested under Linux x86_64.
>
> Ok to commit ?

OK. I like the change to how debug strings are tested, thanks.



Re: [PATCH] libstdc++: Fixing Error: invalid type argument of unary '*' (have 'int')

2022-10-12 Thread Jonathan Wakely via Gcc-patches

On 04/08/22 12:54 -0400, Seija Kijin wrote:

Had an error compiling tiny-cuda-nn using gcc 12.1. With this minor
patch, I recompiled and the build succeeded.


This looks like a bug in the cuda compiler then. The libstdc++ code is
correct.

N.B. libstdc++ patches need to be CC'd to the libstdc++ list, or they
won't get reviewed.



No behavioral change.
---
libstdc++-v3/include/bits/locale_facets_nonio.tcc | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/locale_facets_nonio.tcc
b/libstdc++-v3/include/bits/locale_facets_nonio.tcc
index 17a2c8d4486e..fc35a9e693e7 100644
--- a/libstdc++-v3/include/bits/locale_facets_nonio.tcc
+++ b/libstdc++-v3/include/bits/locale_facets_nonio.tcc
@@ -1474,8 +1474,8 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
  // calls.  So e.g. if __fmt is "%p %I:%M:%S", we can't handle it
  // properly, because we first handle the %p am/pm specifier and only
  // later the 12-hour format specifier.
-  if ((void*)(this->*(&time_get::do_get)) == (void*)(&time_get::do_get))
- __use_state = true;
+  if ((void*)(this->*(&time_get::do_get)) == (&time_get::do_get))
+__use_state = true;
#pragma GCC diagnostic pop
#endif
  __time_get_state __state = __time_get_state();




Re: [PATCH] libstdc++: async: tolerate slightly shorter sleep

2022-10-12 Thread Jonathan Wakely via Gcc-patches
On Thu, 23 Jun 2022 at 12:38, Alexandre Oliva via Libstdc++
 wrote:
>
> On Jun 22, 2022, Alexandre Oliva  wrote:
>
> > Regstrapped on x86_64-linux-gnu, also tested with a cross to
> > aarch64-rtems6.  Ok to install?
>
> The early wakeups are fixed for rtems6.1, so the same question raised at
> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597102.html apply to
> this one:

Looks like I never reviewed this one, sorry.

The patch to xfail this test for rtems is OK.

>
> libstdc++: xfail nanosleep tests on rtems
>
> From: Alexandre Oliva 
>
> Since it has been determined that nanosleep may return slightly too
> early on RTEMS, due to clock resolution differences, expect
> 30_thread/async tests that have detected too-early wakeups to fail on
> RTEMS targets.
>
>
> for  libstdc++-v3/ChangeLog
>
> * testsuite/30_threads/async/async.cc: xfail on RTEMS.
>
> TN: V608-048
> ---
>  libstdc++-v3/testsuite/30_threads/async/async.cc |1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/libstdc++-v3/testsuite/30_threads/async/async.cc 
> b/libstdc++-v3/testsuite/30_threads/async/async.cc
> index 38943ff1a9a5e..e0b731186c459 100644
> --- a/libstdc++-v3/testsuite/30_threads/async/async.cc
> +++ b/libstdc++-v3/testsuite/30_threads/async/async.cc
> @@ -2,6 +2,7 @@
>  // { dg-additional-options "-pthread" { target pthread } }
>  // { dg-require-effective-target c++11 }
>  // { dg-require-gthreads "" }
> +// { dg-xfail-if "nanosleep may wake up too early" { *-*-rtems* } }
>
>  // Copyright (C) 2010-2022 Free Software Foundation, Inc.
>  //
>
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> Disinformation flourishes because many people care deeply about injustice
> but very few check the facts.  Ask me about 
>



Re: [PATCH] libstdc++: async: tolerate slightly shorter sleep

2022-10-12 Thread Jonathan Wakely via Gcc-patches
On Wed, 12 Oct 2022 at 12:41, Jonathan Wakely wrote:
>
> On Thu, 23 Jun 2022 at 12:38, Alexandre Oliva via Libstdc++
>  wrote:
> >
> > On Jun 22, 2022, Alexandre Oliva  wrote:
> >
> > > Regstrapped on x86_64-linux-gnu, also tested with a cross to
> > > aarch64-rtems6.  Ok to install?
> >
> > The early wakeups are fixed for rtems6.1, so the same question raised at
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597102.html apply to
> > this one:
>
> Looks like I never reviewed this one, sorry.
>
> The patch to xfail this test for rtems is OK.

It's also fine if you just want to drop this patch for the same reason
as https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597105.html

(I'm just going through old patch submissions that never got acked or
nacked and this was one of them.)



Re: [PATCH] machmode, v2: Introduce GET_MODE_NEXT_MODE with previous GET_MODE_WIDER_MODE meaning, add new GET_MODE_WIDER_MODE

2022-10-12 Thread Richard Sandiford via Gcc-patches
Jakub Jelinek  writes:
> On Wed, Oct 12, 2022 at 11:15:40AM +0100, Richard Sandiford wrote:
>> Looks good to me, just some minor comments below.
>
> Here is an updated patch.
>
>> How robust is the mechanism that guarantees HF comes before BF,
>> and so is the mode that appears in the (new) wider list?
>
> genmodes.cc seems to have cmp_modes which does a lot of different
> comparisons to make sure it is a total order.
> I think the BFmode vs. HFmode ordering is about the last case:
>   if (m->counter < n->counter)
> return -1;
>   else
> return 1;
> there because everything else is equal and ->counter is about which
> mode is declared first in *-modes.def.
> And my code for the new mode_wider in genmodes.cc always uses VOIDmode
> for !CLASS_HAS_WIDER_MODES_P classes and for CLASS_HAS_WIDER_MODES_P
> classes provides a subset of the total ordering, in the already computed
> ->wider chain it skips modes with the same size/precision.

OK, I guess that's good enough.

>
>> > -  /* Set mode iterator *ITER to the next widest mode in the same class.
>> > +  /* Set mode iterator *ITER to the next wider mode in the same class.
>> >   Such a mode is known to exist.  */
>> 
>> I'll take your word for it that this is correct. ;-)  I would say
>> "next widest", but it's very likely that I'm wrong.
>
> I'm not a native english speaker, but to me next with superlative would be
> if we have the widest mode, next widest would be the one whose only
> wider mode is the widest mode.
>
> Everything else changed.
>
> 2022-10-12  Jakub Jelinek  
>
>   * genmodes.cc (emit_mode_wider): Emit previous content of
>   mode_wider array into mode_next array and for mode_wider
>   emit always VOIDmode for !CLASS_HAS_WIDER_MODES_P classes,
>   otherwise skip through modes with the same precision.
>   * machmode.h (mode_next): Declare.
>   (GET_MODE_NEXT_MODE): New inline function.
>   (mode_iterator::get_next, mode_iterator::get_known_next): New
>   function templates.
>   (FOR_EACH_MODE_IN_CLASS): Use get_next instead of get_wider.
>   (FOR_EACH_MODE): Use get_known_next instead of get_known_wider.
>   (FOR_EACH_MODE_FROM): Use get_next instead of get_wider.
>   (FOR_EACH_WIDER_MODE_FROM): Define.
>   (FOR_EACH_NEXT_MODE): Define.
>   * expmed.cc (emit_store_flag_1): Use FOR_EACH_WIDER_MODE_FROM
>   instead of FOR_EACH_MODE_FROM.
>   * optabs.cc (prepare_cmp_insn): Likewise.  Remove redundant
>   !CLASS_HAS_WIDER_MODES_P check.
>   (prepare_float_lib_cmp): Use FOR_EACH_WIDER_MODE_FROM instead of
>   FOR_EACH_MODE_FROM.
>   * config/i386/i386-expand.cc (get_mode_wider_vector): Use
>   GET_MODE_NEXT_MODE instead of GET_MODE_WIDER_MODE.

LGTM, but please give others 24 hours to object.

Thanks,
Richard

> --- gcc/genmodes.cc.jj2022-10-12 10:15:21.444381490 +0200
> +++ gcc/genmodes.cc   2022-10-12 12:28:02.414528652 +0200
> @@ -1527,7 +1527,7 @@ emit_mode_wider (void)
>int c;
>struct mode_data *m;
>  
> -  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
> +  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
>  
>for_all_modes (c, m)
>  tagged_printf ("E_%smode",
> @@ -1535,6 +1535,37 @@ emit_mode_wider (void)
>  m->name);
>  
>print_closer ();
> +  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
> +
> +  for_all_modes (c, m)
> +{
> +  struct mode_data *m2 = 0;
> +
> +  if (m->cl == MODE_INT
> +   || m->cl == MODE_PARTIAL_INT
> +   || m->cl == MODE_FLOAT
> +   || m->cl == MODE_DECIMAL_FLOAT
> +   || m->cl == MODE_COMPLEX_FLOAT
> +   || m->cl == MODE_FRACT
> +   || m->cl == MODE_UFRACT
> +   || m->cl == MODE_ACCUM
> +   || m->cl == MODE_UACCUM)
> + for (m2 = m->wider; m2 && m2 != void_mode; m2 = m2->wider)
> +   {
> + if (m2->bytesize == m->bytesize
> + && m2->precision == m->precision)
> +   continue;
> + break;
> +   }
> +
> +  if (m2 == void_mode)
> + m2 = 0;
> +  tagged_printf ("E_%smode",
> +  m2 ? m2->name : void_mode->name,
> +  m->name);
> +}
> +
> +  print_closer ();
>print_decl ("unsigned char", "mode_2xwider", "NUM_MACHINE_MODES");
>  
>for_all_modes (c, m)
> --- gcc/machmode.h.jj 2022-10-12 10:15:21.491380846 +0200
> +++ gcc/machmode.h2022-10-12 12:33:36.795975117 +0200
> @@ -28,6 +28,7 @@ extern const unsigned char mode_inner[NU
>  extern CONST_MODE_NUNITS poly_uint16_pod mode_nunits[NUM_MACHINE_MODES];
>  extern CONST_MODE_UNIT_SIZE unsigned char mode_unit_size[NUM_MACHINE_MODES];
>  extern const unsigned short mode_unit_precision[NUM_MACHINE_MODES];
> +extern const unsigned char mode_next[NUM_MACHINE_MODES];
>  extern const unsigned char mode_wider[NUM_MACHINE_MODES];
>  extern const unsigned char mode_2xwider[NUM_MACHINE_MODES];
>  
> @@ -760,7 +761,23 @@ GET_MODE_NUNITS (const T &mode)
>  }
> 

[PATCH] libgcc: Quote variable in Makefile.in

2022-10-12 Thread Jonathan Wakely via Gcc-patches
This isn't very important as the error is harmless, but it's easy to fix
and so is one less thing that might confuse people when looking at build
logs.

OK for trunk?

-- >8 --

If the xgcc executable has not been built (or has been removed by 'make
clean') then the command to print the multilib dir fails, and so the
MULTIOSDIR variable is empty. That then causes:
/bin/sh: line 0: test: !=: unary operator expected

We can avoid it by quoting the variable.

libgcc/ChangeLog:

* Makefile.in: Quote variable.
---
 libgcc/Makefile.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
index 1fe708a93f7..6e2a0470944 100644
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -310,7 +310,7 @@ CRTSTUFF_T_CFLAGS =
 MULTIDIR := $(shell $(CC) $(CFLAGS) -print-multi-directory)
 MULTIOSDIR := $(shell $(CC) $(CFLAGS) -print-multi-os-directory)
 
-MULTIOSSUBDIR := $(shell if test $(MULTIOSDIR) != .; then echo /$(MULTIOSDIR); 
fi)
+MULTIOSSUBDIR := $(shell if test "$(MULTIOSDIR)" != .; then echo 
/$(MULTIOSDIR); fi)
 inst_libdir = $(libsubdir)$(MULTISUBDIR)
 inst_slibdir = $(slibdir)$(MULTIOSSUBDIR)
 
-- 
2.37.3



Re: [PATCH] 0/19 modula-2 front end patches overview

2022-10-12 Thread Rainer Orth
Hi Gaius,

> Testing
> ===
[...]
> The devel/modula-2 branch has been bootstrapped on:
>
[...]
>sparc64 solaris
>sparc32 solaris

which versions exactly did you run those bootstraps on?  I'm asking
because for Solaris 11.4/SPARCV9 (sparcv9-sun-solaris2.11) was fine,
while Solaris 11.4/SPARC (sparc-sun-solaris2.11) still runs into PR
modula2/101392 (cc1gm2 -fdump-system-exports SEGV on Solaris/SPARC).

For good measure, I also tried Solaris 11.3/SPARC (matching gcc211 in
the cfarm), but that only revealed a couple of additional issues not
seen on 11.4:

modula2/107233  gm2 build hardcodes python3
modula2/107234  Format error in m2pp.cc (m2pp_integer_cst)
modula2/107235  m2/boot-bin/mc leaks file descriptors

before running into the same SEGV in the end.

Just for the record, both Solaris 11.4/amd64 (amd64-pc-solaris2.11) and
Solaris 11.4/i386 (i386-pc-solaris2.11) are fine.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] RISC-V: Add new line at end of file.

2022-10-12 Thread Kito Cheng via Gcc-patches
Most changes has included in this commit:
https://github.com/gcc-mirror/gcc/commit/684d238b8cd7e8222d9e66457815f2a63178730b

On Wed, Oct 12, 2022 at 9:43 AM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-c.cc: Add new line.
> * config/riscv/riscv_vector.h (vwrite_csr): Add new line.
>
> ---
>  gcc/config/riscv/riscv-c.cc | 2 +-
>  gcc/config/riscv/riscv_vector.h | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
> index 6fe4a8aeacf..57eeaebc582 100644
> --- a/gcc/config/riscv/riscv-c.cc
> +++ b/gcc/config/riscv/riscv-c.cc
> @@ -195,4 +195,4 @@ void
>  riscv_register_pragmas (void)
>  {
>c_register_pragma ("riscv", "intrinsic", riscv_pragma_intrinsic);
> -}
> \ No newline at end of file
> +}
> diff --git a/gcc/config/riscv/riscv_vector.h b/gcc/config/riscv/riscv_vector.h
> index 85cc656bc41..1efe3f888b5 100644
> --- a/gcc/config/riscv/riscv_vector.h
> +++ b/gcc/config/riscv/riscv_vector.h
> @@ -97,4 +97,4 @@ vwrite_csr(enum RVV_CSR csr, unsigned long value)
>  }
>  #endif // __cplusplus
>  #endif // __riscv_vector
> -#endif // __RISCV_VECTOR_H
> \ No newline at end of file
> +#endif // __RISCV_VECTOR_H
> --
> 2.36.1
>


Re: [PATCH] RISC-V: Move function place to make it looks better.

2022-10-12 Thread Kito Cheng via Gcc-patches
Moving class declaration to theriscv-vector-builtins.cc file is not
bad idea since the only user is riscv-vector-builtins.cc,
but I don't think moving other code for consistent with ARM's code is
reasonable,
anyway committed with only class declaration movement,

NOTE: I've off-list conversion with Ju-Zhe with this.


On Tue, Oct 11, 2022 at 12:48 PM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins.cc (rvv_switcher::rvv_switcher): 
> Move down like ARM SVE.
> (rvv_switcher::~rvv_switcher): Move down like ARM SVE.
> (mangle_builtin_type): Move down to make it together with other 
> global function.
> (class rvv_switcher): Move from riscv-vector-builtins.h.
> * config/riscv/riscv-vector-builtins.h (class rvv_switcher): Move to 
> riscv-vector-builtins.cc.
>
> ---
>  gcc/config/riscv/riscv-vector-builtins.cc | 79 ++-
>  gcc/config/riscv/riscv-vector-builtins.h  | 19 --
>  2 files changed, 49 insertions(+), 49 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
> b/gcc/config/riscv/riscv-vector-builtins.cc
> index 7033b1fc176..6fd1bb0fcb2 100644
> --- a/gcc/config/riscv/riscv-vector-builtins.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins.cc
> @@ -86,23 +86,6 @@ static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1];
>  extern GTY(()) tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 
> 1];
>  tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
>
> -rvv_switcher::rvv_switcher ()
> -{
> -  /* Set have_regs_of_mode before targetm.init_builtins ().  */
> -  memcpy (m_old_have_regs_of_mode, have_regs_of_mode,
> - sizeof (have_regs_of_mode));
> -  for (int i = 0; i < NUM_MACHINE_MODES; ++i)
> -if (riscv_v_ext_enabled_vector_mode_p ((machine_mode) i))
> -  have_regs_of_mode[i] = true;
> -}
> -
> -rvv_switcher::~rvv_switcher ()
> -{
> -  /* Recover back have_regs_of_mode.  */
> -  memcpy (have_regs_of_mode, m_old_have_regs_of_mode,
> - sizeof (have_regs_of_mode));
> -}
> -
>  /* Add type attributes to builtin type tree, currently only the mangled 
> name. */
>  static void
>  add_vector_type_attribute (tree type, const char *mangled_name)
> @@ -140,19 +123,6 @@ lookup_vector_type_attribute (const_tree type)
>return lookup_attribute ("RVV type", TYPE_ATTRIBUTES (type));
>  }
>
> -/* If TYPE is a built-in type defined by the RVV ABI, return the mangled 
> name,
> -   otherwise return NULL.  */
> -const char *
> -mangle_builtin_type (const_tree type)
> -{
> -  if (TYPE_NAME (type) && TREE_CODE (TYPE_NAME (type)) == TYPE_DECL)
> -type = TREE_TYPE (TYPE_NAME (type));
> -  if (tree attr = lookup_vector_type_attribute (type))
> -if (tree id = TREE_VALUE (chain_index (0, TREE_VALUE (attr
> -  return IDENTIFIER_POINTER (id);
> -  return NULL;
> -}
> -
>  /* Register the built-in RVV ABI types, such as __rvv_int32m1_t.  */
>  static void
>  register_builtin_types ()
> @@ -231,6 +201,55 @@ register_vector_type (vector_type_index type)
>builtin_vector_types[0][type] = vectype;
>  }
>
> +/* RAII class for enabling enough RVV features to define the built-in
> +   types and implement the riscv_vector.h pragma.
> +
> +   Note: According to 'TYPE_MODE' macro implementation, we need set
> +   have_regs_of_mode[mode] to be true if we want to get the exact mode
> +   from 'TYPE_MODE'. However, have_regs_of_mode has not been set yet in
> +   targetm.init_builtins (). We need rvv_switcher to set have_regs_of_mode
> +   before targetm.init_builtins () and recover back have_regs_of_mode
> +   after targetm.init_builtins ().  */
> +class rvv_switcher
> +{
> +public:
> +  rvv_switcher ();
> +  ~rvv_switcher ();
> +
> +private:
> +  bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
> +};
> +
> +rvv_switcher::rvv_switcher ()
> +{
> +  /* Set have_regs_of_mode before targetm.init_builtins ().  */
> +  memcpy (m_old_have_regs_of_mode, have_regs_of_mode,
> + sizeof (have_regs_of_mode));
> +  for (int i = 0; i < NUM_MACHINE_MODES; ++i)
> +if (riscv_v_ext_enabled_vector_mode_p ((machine_mode) i))
> +  have_regs_of_mode[i] = true;
> +}
> +
> +rvv_switcher::~rvv_switcher ()
> +{
> +  /* Recover back have_regs_of_mode.  */
> +  memcpy (have_regs_of_mode, m_old_have_regs_of_mode,
> + sizeof (have_regs_of_mode));
> +}
> +
> +/* If TYPE is a built-in type defined by the RVV ABI, return the mangled 
> name,
> +   otherwise return NULL.  */
> +const char *
> +mangle_builtin_type (const_tree type)
> +{
> +  if (TYPE_NAME (type) && TREE_CODE (TYPE_NAME (type)) == TYPE_DECL)
> +type = TREE_TYPE (TYPE_NAME (type));
> +  if (tree attr = lookup_vector_type_attribute (type))
> +if (tree id = TREE_VALUE (chain_index (0, TREE_VALUE (attr
> +  return IDENTIFIER_POINTER (id);
> +  return NULL;
> +}
> +
>  /* Initialize all compiler built-ins related to RVV that should be
> defined at start-up.  */
>  void
> diff --git a/gcc/

Re: [PATCH] RISC-V: Clang-format vector_type_index.

2022-10-12 Thread Kito Cheng via Gcc-patches
Committed but combined with another one clang-format fixing :)

On Tue, Oct 11, 2022 at 2:36 PM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins.h (DEF_RVV_TYPE): Clang-format 
> it.
>
> ---
>  gcc/config/riscv/riscv-vector-builtins.h | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins.h 
> b/gcc/config/riscv/riscv-vector-builtins.h
> index cd6dad62ad8..ea67da9905c 100644
> --- a/gcc/config/riscv/riscv-vector-builtins.h
> +++ b/gcc/config/riscv/riscv-vector-builtins.h
> @@ -27,8 +27,7 @@ namespace riscv_vector {
> "vector types" for brevity.  */
>  enum vector_type_index
>  {
> -#define DEF_RVV_TYPE(NAME, ABI_NAME, NCHARS, ARGS...)\
> -  VECTOR_TYPE_##NAME,
> +#define DEF_RVV_TYPE(NAME, ABI_NAME, NCHARS, ARGS...) VECTOR_TYPE_##NAME,
>  #include "riscv-vector-builtins.def"
>NUM_VECTOR_TYPES
>  };
> --
> 2.36.1
>


Re: [PATCH] RISC-V: Clang-format add_vector_attribute function.

2022-10-12 Thread Kito Cheng via Gcc-patches
Committed, thanks!

On Tue, Oct 11, 2022 at 2:22 PM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins.cc (add_vector_type_attribute): 
> Clang-format function.
>
> ---
>  gcc/config/riscv/riscv-vector-builtins.cc | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
> b/gcc/config/riscv/riscv-vector-builtins.cc
> index daf180801cc..4814b6ef6e7 100644
> --- a/gcc/config/riscv/riscv-vector-builtins.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins.cc
> @@ -88,8 +88,8 @@ add_vector_type_attribute (tree type, const char 
> *mangled_name)
>  {
>tree mangled_name_tree = get_identifier (mangled_name);
>tree value = tree_cons (NULL_TREE, mangled_name_tree, NULL_TREE);
> -  TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("RVV type"), value,
> - TYPE_ATTRIBUTES (type));
> +  TYPE_ATTRIBUTES (type)
> += tree_cons (get_identifier ("RVV type"), value, TYPE_ATTRIBUTES (type));
>  }
>
>  /* Force TYPE to be a sizeless type.  */
> --
> 2.36.1
>


Re: [PATCH] RISC-V: Remove TUPLE size macro define.

2022-10-12 Thread Kito Cheng via Gcc-patches
Committed, thanks!

On Tue, Oct 11, 2022 at 2:23 PM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins.h: Remove redundant macro.
>
> ---
>  gcc/config/riscv/riscv-vector-builtins.h | 3 ---
>  1 file changed, 3 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins.h 
> b/gcc/config/riscv/riscv-vector-builtins.h
> index 7d4b807f33c..cd6dad62ad8 100644
> --- a/gcc/config/riscv/riscv-vector-builtins.h
> +++ b/gcc/config/riscv/riscv-vector-builtins.h
> @@ -23,9 +23,6 @@
>
>  namespace riscv_vector {
>
> -/* This is for segment instructions.  */
> -const unsigned int MAX_TUPLE_SIZE = 8;
> -
>  /* Enumerates the RVV types, together called
> "vector types" for brevity.  */
>  enum vector_type_index
> --
> 2.36.1
>


Re: [PATCH] RISC-V: Refine register_builtin_types function.

2022-10-12 Thread Kito Cheng via Gcc-patches
Committed with a few minor ChangeLog fixes.

On Tue, Oct 11, 2022 at 2:15 PM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins.cc (GTY): Redefine vector types.
> (build_const_pointer): New function.
> (register_builtin_type): Ditto.
> (DEF_RVV_TYPE): Simplify macro.
> (register_vector_type): Refine implementation.
> * config/riscv/riscv-vector-builtins.h (struct GTY): New struct.
>
> ---
>  gcc/config/riscv/riscv-vector-builtins.cc | 81 ---
>  gcc/config/riscv/riscv-vector-builtins.h  | 10 +++
>  2 files changed, 51 insertions(+), 40 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
> b/gcc/config/riscv/riscv-vector-builtins.cc
> index 6fd1bb0fcb2..daf180801cc 100644
> --- a/gcc/config/riscv/riscv-vector-builtins.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins.cc
> @@ -73,18 +73,14 @@ static CONSTEXPR const vector_type_info vector_types[] = {
>  #include "riscv-vector-builtins.def"
>  };
>
> -/* The scalar type associated with each vector type.  */
> -static GTY (()) tree scalar_types[NUM_VECTOR_TYPES];
> -/* The machine mode associated with each vector type.  */
> -static GTY (()) machine_mode vector_modes[NUM_VECTOR_TYPES];
>  /* The RVV types, with their built-in
> "__rvv..._t" name.  Allow an index of NUM_VECTOR_TYPES, which always
> yields a null tree.  */
>  static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1];
>
>  /* Same, but with the riscv_vector.h "v..._t" name.  */
> -extern GTY(()) tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 
> 1];
> -tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
> +extern GTY (()) rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1];
> +rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1];
>
>  /* Add type attributes to builtin type tree, currently only the mangled 
> name. */
>  static void
> @@ -123,6 +119,39 @@ lookup_vector_type_attribute (const_tree type)
>return lookup_attribute ("RVV type", TYPE_ATTRIBUTES (type));
>  }
>
> +/* Return a representation of "const T *".  */
> +static tree
> +build_const_pointer (tree t)
> +{
> +  return build_pointer_type (build_qualified_type (t, TYPE_QUAL_CONST));
> +}
> +
> +/* Helper function for register a single built-in RVV ABI type.  */
> +static void
> +register_builtin_type (vector_type_index type, tree eltype, machine_mode 
> mode)
> +{
> +  builtin_types[type].scalar = eltype;
> +  builtin_types[type].scalar_ptr = build_pointer_type (eltype);
> +  builtin_types[type].scalar_const_ptr = build_const_pointer (eltype);
> +  if (!riscv_v_ext_enabled_vector_mode_p (mode))
> +return;
> +
> +  tree vectype = build_vector_type_for_mode (eltype, mode);
> +  gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype)) && TYPE_MODE (vectype) == 
> mode
> + && TYPE_MODE_RAW (vectype) == mode && TYPE_ALIGN (vectype) <= 
> 128
> + && known_eq (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
> +  GET_MODE_BITSIZE (mode)));
> +  vectype = build_distinct_type_copy (vectype);
> +  gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype));
> +  SET_TYPE_STRUCTURAL_EQUALITY (vectype);
> +  TYPE_ARTIFICIAL (vectype) = 1;
> +  TYPE_INDIVISIBLE_P (vectype) = 1;
> +  add_vector_type_attribute (vectype, vector_types[type].mangled_name);
> +  make_type_sizeless (vectype);
> +  abi_vector_types[type] = vectype;
> +  lang_hooks.types.register_builtin_type (vectype, 
> vector_types[type].abi_name);
> +}
> +
>  /* Register the built-in RVV ABI types, such as __rvv_int32m1_t.  */
>  static void
>  register_builtin_types ()
> @@ -137,42 +166,12 @@ register_builtin_types ()
>  = TARGET_64BIT ? unsigned_intSI_type_node : long_unsigned_type_node;
>
>machine_mode mode;
> -#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE,\
> -VECTOR_MODE_MIN_VLEN_32) 
>  \
> +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE,   
>   \
> +VECTOR_MODE_MIN_VLEN_32, ARGS...)
>  \
>mode = TARGET_MIN_VLEN > 32 ? VECTOR_MODE##mode
>   \
>   : VECTOR_MODE_MIN_VLEN_32##mode;
>  \
> -  scalar_types[VECTOR_TYPE_##NAME]\
> -= riscv_v_ext_enabled_vector_mode_p (mode) ? SCALAR_TYPE##_type_node 
>   \
> -  : NULL_TREE;   
>  \
> -  vector_modes[VECTOR_TYPE_##NAME]\
> -= riscv_v_ext_enabled_vector_mode_p (mode) ? mode : VOIDmode;
> +  register_builtin_type (VECTOR_TYPE_##NAME, SCALAR_TYPE##_type_node, mode);
>  #include "riscv-vector-builtins.def"
> -
> -  for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i)
> -{
> -  tree eltype = scalar_types[i];
> -  mode = vector_modes[i];
> -  /* We d

Re: [PATCH] 0/19 modula-2 front end patches overview

2022-10-12 Thread Gaius Mulley via Gcc-patches
Rainer Orth  writes:

> Hi Gaius,
>
>> Testing
>> ===
> [...]
>> The devel/modula-2 branch has been bootstrapped on:
>>
> [...]
>>sparc64 solaris
>>sparc32 solaris
>
> which versions exactly did you run those bootstraps on?  I'm asking
> because for Solaris 11.4/SPARCV9 (sparcv9-sun-solaris2.11) was fine,
> while Solaris 11.4/SPARC (sparc-sun-solaris2.11) still runs into PR
> modula2/101392 (cc1gm2 -fdump-system-exports SEGV on Solaris/SPARC).

Hi Rainer,

ah very sorry - I thought I was relaying a summary of your findings.
Thanks for the clarification and detailed correction.

> For good measure, I also tried Solaris 11.3/SPARC (matching gcc211 in
> the cfarm), but that only revealed a couple of additional issues not
> seen on 11.4:
>
> modula2/107233gm2 build hardcodes python3
> modula2/107234Format error in m2pp.cc (m2pp_integer_cst)
> modula2/107235m2/boot-bin/mc leaks file descriptors
>
> before running into the same SEGV in the end.

thanks for the PRs above - will apply fixes to the first two and
examine the 3rd (again).

> Just for the record, both Solaris 11.4/amd64 (amd64-pc-solaris2.11) and
> Solaris 11.4/i386 (i386-pc-solaris2.11) are fine.

great news and thanks for this report,

regards,
Gaius


Re: [PATCH] Optimize nested permutation to single VEC_PERM_EXPR [PR54346]

2022-10-12 Thread Xi Ruoyao via Gcc-patches
On Mon, 2022-09-26 at 14:56 +0800, Liwei Xu via Gcc-patches wrote:
>     This patch implemented the optimization in PR 54346, which Merges
> 
> c = VEC_PERM_EXPR ;
>     d = VEC_PERM_EXPR ;
>     to
>     d = VEC_PERM_EXPR ;
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
>     tree-ssa/forwprop-19.c fail to pass but I'm not sure whether it
>     is ok to removed it.

I'm getting:

FAIL: gcc.dg/pr54346.c scan-tree-dump dse1 "VEC_PERM_EXPR.*{ 3, 6, 0, 0 }"
FAIL: gcc.dg/pr54346.c scan-tree-dump-times dse1 "VEC_PERM_EXPR" 1

on loongarch64-linux-gnu.  Not sure why.


-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[Patch] libgomp: Add offload_device_gcn check, add requires-4a.c test

2022-10-12 Thread Tobias Burnus

This came up because the USM implementation with 
-foffload-memory={unified,pinned}
as posted at https://gcc.gnu.org/pipermail/gcc-patches/2022-July/597976.html
does not handle USM with static variables.

This shows up for the OG12 alias devel/omp/gcc-12 branch as FAIL for 
requires-4.c.

The attached patch prepares for skipping requires-4.c for the gcn/nvptx device
and adds an adjacent requires-4a.c testcase, using heap memory, that can still
run on gcn/nvptx.

Additionally, I commented on no longer used #defined, following the
precedence GOMP_DEVICE_HOST_NONSHM.

Thus, this tests adds another testcase and one effective-target check,
out-comments a unused #define - and that's it.
(Otherwise, it is just a prep patch.)

OK for mainline?

Tobias

PS: Currently, neither the preexisting offload_device_nvptx nor the new
offload_device_gcn target selector is used, neither in old code nor by this 
patch.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp: Add offload_device_gcn check, add requires-4a.c test

Duplicate libgomp.c-c++-common/requires-4.c (as ...-4a.c) but
with using a heap-allocated instead of static memory for a variable.

This change and the added offload_device_gcn check prepare for
pseudo-USM, where the device hardware cannot access all host
memory but only managed and pinned memory; for those, requires-4.c
will fail and the new check permits to add
  target { ! { offload_device_nvptx || offload_device_gcn } }
to requires-4.c; however, it has not been added yet as pseuo-USM
support is not yet on mainline. (Review is pending for the USM
patches.)

include/ChangeLog:

	* gomp-constants.h (GOMP_DEVICE_HSA): Comment (unused).

libgomp/ChangeLog:

	* testsuite/lib/libgomp.exp (check_effective_target_offload_device_gcn):
	New.
	* testsuite/libgomp.c-c++-common/on_device_arch.h (device_arch_gcn,
	on_device_arch_gcn): New.
	* testsuite/libgomp.c-c++-common/requires-4a.c: New test; copied from
	requires-4.c but using heap-allocated memory.

 include/gomp-constants.h   |  2 +-
 libgomp/testsuite/lib/libgomp.exp  | 12 +++
 .../libgomp.c-c++-common/on_device_arch.h  | 13 
 .../testsuite/libgomp.c-c++-common/requires-4a.c   | 39 ++
 4 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index 84316f953d0..fac7316b858 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -229,9 +229,9 @@ enum gomp_map_kind
 /* #define GOMP_DEVICE_HOST_NONSHM	3 removed.  */
 #define GOMP_DEVICE_NOT_HOST		4
 #define GOMP_DEVICE_NVIDIA_PTX		5
 #define GOMP_DEVICE_INTEL_MIC		6
-#define GOMP_DEVICE_HSA			7
+/* #define GOMP_DEVICE_HSA		7 removed.  */
 #define GOMP_DEVICE_GCN			8
 
 /* We have a compatibility issue.  OpenMP 5.2 introduced
omp_initial_device with value of -1 which clashes with our
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index 107a3c2ac9d..4b8c64de8a5 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -414,8 +414,20 @@ proc check_effective_target_offload_device_nvptx { } {
 	}
 } ]
 }
 
+# Return 1 if using a GCN offload device.
+proc check_effective_target_offload_device_gcn { } {
+return [check_runtime_nocache offload_device_gcn {
+  #include 
+  #include "testsuite/libgomp.c-c++-common/on_device_arch.h"
+  int main ()
+	{
+	  return !on_device_arch_gcn ();
+	}
+} ]
+}
+
 # Return 1 if at least one Nvidia GPU is accessible.
 
 proc check_effective_target_openacc_nvidia_accel_present { } {
 return [check_runtime openacc_nvidia_accel_present {
diff --git a/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h b/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h
index f92743b04d7..6f66dbd784c 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h
+++ b/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h
@@ -6,15 +6,22 @@ device_arch_nvptx (void)
 {
   return GOMP_DEVICE_NVIDIA_PTX;
 }
 
+/* static */ int
+device_arch_gcn (void)
+{
+  return GOMP_DEVICE_GCN;
+}
+
 /* static */ int
 device_arch_intel_mic (void)
 {
   return GOMP_DEVICE_INTEL_MIC;
 }
 
 #pragma omp declare variant (device_arch_nvptx) match(construct={target},device={arch(nvptx)})
+#pragma omp declare variant (device_arch_gcn) match(construct={target},device={arch(gcn)})
 #pragma omp declare variant (device_arch_intel_mic) match(construct={target},device={arch(intel_mic)})
 /* static */ int
 device_arch (void)
 {
@@ -36,8 +43,14 @@ on_device_arch_nvptx ()
 {
   return on_device_arch (GOMP_DEVICE_NVIDIA_PTX);
 }
 
+int
+on_device_arch_gcn ()
+{
+  return on_device_arch (GOMP_DEVICE_GCN);
+}
+
 int
 on_device_arch_intel_mic ()
 {
   return on_device_

[PATCH] LoongArch: implement count_{leading,trailing}_zeros

2022-10-12 Thread Xi Ruoyao via Gcc-patches
LoongArch always support clz and ctz instructions, so we can always use
__builtin_{clz,ctz} for count_{leading,trailing}_zeros.  This improves
the code of libgcc, and also benefits Glibc once we merge longlong.h
there.

Bootstrapped and regtested on loongarch64-linux-gnu.

include/ChangeLog:

* longlong.h [__loongarch__] (count_leading_zeros): Define.
[__loongarch__] (count_trailing_zeros): Likewise.
[__loongarch__] (COUNT_LEADING_ZEROS_0): Likewise.
---
 include/longlong.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/include/longlong.h b/include/longlong.h
index 64a7b10f9b2..c3a6f1e7eaa 100644
--- a/include/longlong.h
+++ b/include/longlong.h
@@ -593,6 +593,18 @@ extern UDItype __umulsidi3 (USItype, USItype);
 #define UMUL_TIME 14
 #endif
 
+#ifdef __loongarch__
+# if W_TYPE_SIZE == 32
+#  define count_leading_zeros(count, x)  ((count) = __builtin_clz (x))
+#  define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
+#  define COUNT_LEADING_ZEROS_0 32
+# elif W_TYPE_SIZE == 64
+#  define count_leading_zeros(count, x)  ((count) = __builtin_clzll (x))
+#  define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
+#  define COUNT_LEADING_ZEROS_0 64
+# endif
+#endif
+
 #if defined (__M32R__) && W_TYPE_SIZE == 32
 #define add_ss(sh, sl, ah, al, bh, bl) \
   /* The cmp clears the condition bit.  */ \
-- 
2.38.0



Re: [Patch] libgomp/gcn: Prepare for reverse-offload callback handling

2022-10-12 Thread Tobias Burnus

On 29.09.22 18:24, Andrew Stubbs wrote:

On 27/09/2022 14:16, Tobias Burnus wrote:

Andrew did suggest a while back to piggyback on the console_output
handling,
avoiding another atomic access. - If this is still wanted, I like to
have some
guidance regarding how to actually implement it.

[...]
The point is that you can use the "msg" and "text" fields for whatever
data you want, as long as you invent a new value for "type".
[]
You can make "case 4" do whatever you want. There are enough bytes for
4 pointers, and you could use multiple packets (although it's not safe
to assume they're contiguous or already arrived; maybe "case 4" for
part 1, "case 5" for part 2). It's possible to change this structure,
of course, but the target implementation is in newlib so versioning
becomes a problem.


I think  – also looking at the Newlib write.c implementation - that the
data is contiguous: there is an atomic add, where instead of passing '1'
for a single slot, I could also add '2' for two slots.

Attached is one variant – for the decl of the GOMP_OFFLOAD_target_rev,
it needs the generic parts of the sister nvptx patch.*

2*128 bytes were not enough, I need 3*128 bytes. (Or rather 5*64 + 32.)
As target_ext is blocking, I decided to use a stack local variable for
the remaining arguments and pass it along. Alternatively, I could also
use 2 slots - and process them together. This would avoid one
device->host memory copy but would make console_output less clear.

OK for mainline?

Tobias

* https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603354.html

PS: Currently, device stack variables are private and cannot be accessed
from the host; this will change in a separate patch. It not only affects
the "rest" part as used in this patch but also the actual arrays behind
addr, kinds, and sizes. And quite likely a lot of the map/firstprivate
variables passed to addr.

As num_devices() will return 0 or -1, this is for now a non-issue.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp/gcn: Prepare for reverse-offload callback handling

libgomp/ChangeLog:

	* config/gcn/libgomp-gcn.h: New file; contains
	struct output, declared previously in plugin-gcn.c.
	* config/gcn/target.c: Include it.
	(GOMP_ADDITIONAL_ICVS): Declare as extern var.
	(GOMP_target_ext): Handle reverse offload.
	* plugin/plugin-gcn.c: Include libgomp-gcn.h.
	(struct kernargs): Replace struct def by the one
	from libgomp-gcn.h for output_data.
	(process_reverse_offload): New.
	(console_output): Call it.

 libgomp/config/gcn/libgomp-gcn.h | 61 
 libgomp/config/gcn/target.c  | 44 -
 libgomp/plugin/plugin-gcn.c  | 34 --
 3 files changed, 117 insertions(+), 22 deletions(-)

diff --git a/libgomp/config/gcn/libgomp-gcn.h b/libgomp/config/gcn/libgomp-gcn.h
new file mode 100644
index 000..91560be787f
--- /dev/null
+++ b/libgomp/config/gcn/libgomp-gcn.h
@@ -0,0 +1,61 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+   Contributed by Tobias Burnus .
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+/* This file contains defines and type definitions shared between the
+   nvptx target's libgomp.a and the plugin-nvptx.c, but that is only
+   needef for this target.  */
+
+#ifndef LIBGOMP_GCN_H
+#define LIBGOMP_GCN_H 1
+
+/* This struct is also used in Newlib's libc/sys/amdgcn/write.c.  */
+struct output
+{
+  int return_value;
+  unsigned int next_output;
+  struct printf_data {
+int written;
+union {
+  char msg[128];
+  uint64_t msg_u64[2];
+};
+int type;
+union {
+  int64_t ivalue;
+  double dvalue;
+  char text[128];
+  uint64_t value_u64[2];
+};
+  } queue[1024];
+  unsigned int consumed;
+};
+
+#if (__SIZEOF_SHORT__ !

Re: [PATCH] middle-end IFN_ASSUME support [PR106654]

2022-10-12 Thread Andrew MacLeod via Gcc-patches



On 10/12/22 06:15, Jakub Jelinek wrote:



the ranges we calculated above for the function. Or some special pass that
reads assumes, does the processing you mention above and applies it?  Is
that what you are thinking?

The options would be to evaluate it each time ranger processes .ASSUME,
or to perform this backwards range propagation somewhere late during post
IPA optimizations of the cfun->assume_function and remember it somewhere
(e.g. in SSA_NAME_RANGE_INFO of the default defs of the params) and then
when visiting .ASSUME just look those up.  I think the latter is better,
we'd do it only once - the assumption that the function returns true after
the assume function itself is optimized will always be the same.
It could be a separate pass (gated on fun->assume_function, so done only
for them) somewhere shortly before expansion to RTL (which is what isn't
done and nothing later for those), or could be done say in VRP2 or some
other existing late pass.

I agree, I think it would be better to process once, and store the 
results some where. I could provide a routine which attempts the 
evaluation of the current function, and returns a "safe" range for each 
of the parameters.   By safe, I mean it would assume VARYING for every 
unknown value in the function, reduced by whatever the backward walk 
determined.  We can refine that later by wiring this call in after a 
full ranger walk of VRP for instance to get more precise values, but 
that is not necessary at the moment.


I can also make it so that we always try to look up values from the 
.ASSUME in fold_using_range, which means any VRP or other ranger client 
will pick up the results.  If there is nothing available, it would 
return VARYING as the default.   Any current range would be intersected 
with what the ASSUME query returns.


I presume you are looking to get this working for this release, making 
the priority high? :-)



Looking at assume7.C, I see:

int bar (int x)
{
    [local count: 1073741824]:
   .ASSUME (_Z3bari._assume.0, x_1(D));
   return x_1(D);

And:

bool _Z3bari._assume.0 (int x)
{
   bool _2;

    [local count: 1073741824]:
   _2 = x_1(D) == 42;
   return _2;


Using the above approach, GORI could tell you that if _2 is [1,1] that x_1
must be [42,42].

If you are parsing that ASSUME, you could presumably match things pu and we
could make x_1 have a range of [42,42] in bar() at that call.

If we cache the range info for the assume_function arguments the above way
on SSA_NAME_RANGE_INFO, then you'd just see .ASSUME call and for (n+1)th
argument find nth argument of the 1st argument FUNCTION_DECL's
DECL_ARGUMENTS, ssa_default_def (DECL_STRUCT_FUNCTION (assume_fndecl), parm)
and just union the current range of (n+1)th argument with
SSA_NAME_RANGE_INFO of the ssa_default_def (if non-NULL).
Intersection I believe...?  I think the value from the assume's should 
add restrictions to the range..

this would require a bit of processing in fold_using_range for handling
function calls, checking for this case and so on, but quite doable.

looking at the more complicated case for

bool _Z3bazi._assume.0 (int x)

it seems that the answer is determines without processing most of the
function. ie:, work from the bottom up:

    [local count: 670631318]:
   _8 = x_3 == 43;   x_3 = [43,43]

    [local count: 1073741824]:
   # _1 = PHI <0(2), _8(5)>  _8 = [1,1]  2->6 cant happen
   return _1;    _1 = [1,1]

you only care about x, so as soon as you find a result that that, you'd
actually be done.   However, I can imagine cases where you do need to go all
the way back to the top of the assume function.. and combine values. Ie

bool assume (int x, int y)
{
   if (y > 10)
     return x == 2;
   return x > 20;
}

    [local count: 1073741824]:
   if (y_2(D) > 10)
     goto ; [34.00%]
   else
     goto ; [66.00%]

    [local count: 365072224]:
   _5 = x_3(D) == 2;                    x_3 = [2,2]
   goto ; [100.00%]

    [local count: 708669601]:
   _4 = x_3(D) > 20;                    x_3 = [21, +INF]

    [local count: 1073741824]:
   # _1 = PHI <_5(3), _4(4)>      _5 = [1,1], _4 = [1,1]

   return _1;

And we'd have a range of [2,2][21, +INF]
if you wanted to be able to plug values of Y in, things would get more
complicated, but the framework would all be there.

Yeah.  Note, it is fine to handle say single block assume functions (after
optimizations) first and improve incrementally later, the goal is that
people actually see useful optimizations with simpler (but not simplest)
assume conditions, so they don't say they aren't completely useless, and if
they come up with something more complex that we don't handle yet, they
can file enhancement requests.  Of course, trying to walk all the bbs
backwards would be nicer, though even then it is important to be primarily
correct and so punting on anything we can't handle is fine (e.g. if there
are loops etc.).


Single blocks for the first cut and 

Re: [PATCH][AArch64] Improve bit tests [PR105773]

2022-10-12 Thread Wilco Dijkstra via Gcc-patches
Hi Richard,

> Realise this is awkward, but: CC_NZmode is for operations that set only
> the N and Z flags to useful values.  If we want to take advantage of V
> being zero then I think we need a different mode.
>
> We can't go all the way to CCmode because the carry flag has the opposite
> value compared to subtraction.  But we could have either:
> 
> * CC_INVC (inverted carry) that handles all comparisons, including the
>   redundant unsigned comparisons
>
> * CC_NZV
>
> Guess I've got a slight preference for CC_INVC, but either would be OK IMO.

I've added CC_NZV since that's easier to understand and unsigned comparisons
with zero are always changed into equality comparisons. There were a few cases
where CC_NZ mode was used rather than CC_Z, so I changed those too.

Cheers,
Wilco

v2: Add new CC_NZV mode for cmp+and.

Since AArch64 sets all flags on logical operations, comparisons with zero
can be combined into an AND even if the condition is LE or GT. Add a new
CC_NZV mode used by ANDS/BICS/TST instructions.

Passes regress, OK for commit?

gcc/ChangeLog:

PR target/105773
* config/aarch64/aarch64.cc (aarch64_select_cc_mode): Allow
GT/LE for merging compare with zero into AND.
(aarch64_get_condition_code_1): Add CC_NZVmode support.
* config/aarch64/aarch64-modes.def: Add CC_NZV.
* config/aarch64/aarch64.md: Use CC_NZV in cmp+and patterns.

gcc/testsuite:
PR target/105773
* gcc.target/aarch64/ands_2.c: Test for ANDS.
* gcc.target/aarch64/bics_2.c: Test for BICS.
* gcc.target/aarch64/tst_2.c: Test for TST.
* gcc.target/aarch64/tst_imm_split_1.c: Fix test.

---

diff --git a/gcc/config/aarch64/aarch64-modes.def 
b/gcc/config/aarch64/aarch64-modes.def
index 
d3c9b74434cd2c0d0cb1a2fd26af8c0bf38a4cfa..0fd4c32ad0bd09f8651d1b8a77378fa4504ff488
 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -35,6 +35,7 @@ CC_MODE (CCFPE);
 CC_MODE (CC_SWP);
 CC_MODE (CC_NZC);   /* Only N, Z and C bits of condition flags are valid.
   (Used with SVE predicate tests.)  */
+CC_MODE (CC_NZV);   /* Only N, Z and V bits of condition flags are valid.  */
 CC_MODE (CC_NZ);/* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z); /* Only Z bit of condition flags is valid.  */
 CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition.  */
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
3a4b1f6987487e959648a343bb25180ea419f397..600e0f41d51242a6f100b3643ce8421ea116ec5c
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -11284,7 +11284,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
   if (y == const0_rtx && (REG_P (x) || SUBREG_P (x))
   && (code == EQ || code == NE)
   && (mode_x == HImode || mode_x == QImode))
-return CC_NZmode;
+return CC_Zmode;
 
   /* Similarly, comparisons of zero_extends from shorter modes can
  be performed using an ANDS with an immediate mask.  */
@@ -11292,15 +11292,22 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
   && (mode_x == SImode || mode_x == DImode)
   && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
   && (code == EQ || code == NE))
-return CC_NZmode;
+return CC_Zmode;
+
+  /* ANDS/BICS/TST support equality and all signed comparisons.  */
+  if ((mode_x == SImode || mode_x == DImode)
+  && y == const0_rtx
+  && (code_x == AND || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
+   && CONST_INT_P (XEXP (x, 2
+  && (code == EQ || code == NE || code == LT || code == GE
+ || code == GT || code == LE))
+return CC_NZVmode;
 
+  /* ADDS/SUBS correctly set N and Z flags.  */
   if ((mode_x == SImode || mode_x == DImode)
   && y == const0_rtx
   && (code == EQ || code == NE || code == LT || code == GE)
-  && (code_x == PLUS || code_x == MINUS || code_x == AND
- || code_x == NEG
- || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
- && CONST_INT_P (XEXP (x, 2)
+  && (code_x == PLUS || code_x == MINUS || code_x == NEG))
 return CC_NZmode;
 
   /* A compare with a shifted operand.  Because of canonicalization,
@@ -11437,6 +11444,19 @@ aarch64_get_condition_code_1 (machine_mode mode, enum 
rtx_code comp_code)
}
   break;
 
+case E_CC_NZVmode:
+  switch (comp_code)
+   {
+   case NE: return AARCH64_NE;
+   case EQ: return AARCH64_EQ;
+   case GE: return AARCH64_PL;
+   case LT: return AARCH64_MI;
+   case GT: return AARCH64_GT;
+   case LE: return AARCH64_LE;
+   default: return -1;
+   }
+  break;
+
 case E_CC_NZmode:
   switch (comp_code)
{
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
23ceca48543d23b85beea1f0bf98ef83051d80b6..cc58373144890c617ff6ce16c

Re: [PATCH] middle-end IFN_ASSUME support [PR106654]

2022-10-12 Thread Jakub Jelinek via Gcc-patches
On Wed, Oct 12, 2022 at 10:31:00AM -0400, Andrew MacLeod wrote:
> I presume you are looking to get this working for this release, making the
> priority high? :-)

Yes.  So that we can claim we actually support C++23 Portable Assumptions
and OpenMP assume directive's hold clauses for something non-trivial so
people won't be afraid to actually use it.
Of course, first the posted patch needs to be reviewed and only once it gets
in, the ranger/GORI part can follow.  As the latter is only an optimization,
it can be done incrementally.

> Intersection I believe...?  I think the value from the assume's should add
> restrictions to the range..

Sure, sorry.

> I figured as much, I was just wondering if there might be some way to
> "simplify" certain things by processing it and turning each parameter query
> into a smaller function returning the range we determined from the main
> one...   but perhaps that is more complicated.

We don't really know what the condition is, it can be pretty arbitrary
expression (well, e.g. for C++ conditional expression, so say
[[assume (var = foo ())]];
is not valid but
[[assume ((var = foo ()))]];
is.  And with GNU statement expressions it can do a lot of stuff and until
we e.g. inline into it and optimize it a little, we don't really know what
it will be like.

Jakub



Re: [PATCH][AArch64] Improve immediate expansion [PR106583]

2022-10-12 Thread Wilco Dijkstra via Gcc-patches
Hi Richard,

>>> Sounds good, but could you put it before the mode version,
>>> to avoid the forward declaration?
>>
>> I can swap them around but the forward declaration is still required as
>> aarch64_check_bitmask is 5000 lines before aarch64_bitmask_imm.
>
> OK, how about moving them both above aarch64_check_bitmask?

Sure I've moved them as well as all related helper functions - it makes the diff
quite large but they are all together now which makes sense. I also refactored
aarch64_mov_imm to handle the case of a 64-bit immediate being generated
by a 32-bit MOVZ/MOVN - this simplifies aarch64_internal_move_immediate
and movdi patterns even further.

Cheers,
Wilco

v3: move immediate code together and avoid forward declarations,
further cleanups and simplifications.

Improve immediate expansion of immediates which can be created from a
bitmask immediate and 2 MOVKs.  Simplify, refactor and improve 
efficiency of bitmask checks and move immediate. Move various immediate
handling functions together to avoid forward declarations.
Include 32-bit MOVZ/N as valid 64-bit immediates. Add new constraint so
the movdi pattern only needs a single alternative for move immediate.

This reduces the number of 4-instruction immediates in SPECINT/FP by 10-15%.

Passes bootstrap & regress, OK for commit?

gcc/ChangeLog:

PR target/106583
* config/aarch64/aarch64.cc (aarch64_internal_mov_immediate)
Add support for a bitmask immediate with 2 MOVKs.
(aarch64_check_bitmask): New function after refactorization.
(aarch64_replicate_bitmask_imm): Remove function, merge into...
(aarch64_bitmask_imm): Simplify replication of small modes.
Split function into 64-bit only version for efficiency.
(aarch64_zeroextended_move_imm): New function.
(aarch64_move_imm): Refactor code.
(aarch64_uimm12_shift): Move near other immediate functions.
(aarch64_clamp_to_uimm12_shift): Likewise.
(aarch64_movk_shift): Likewise.
(aarch64_replicate_bitmask_imm): Likewise.
(aarch64_and_split_imm1): Likewise.
(aarch64_and_split_imm2): Likewise.
(aarch64_and_bitmask_imm): Likewise.
(aarch64_movw_imm): Remove.
* config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
constraints into single 'O'.
(mov_aarch64): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
(aarch64_bitmask_imm): Likewise.
(aarch64_uimm12_shift): Likewise.
(aarch64_zeroextended_move_imm): New prototype.
* config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
limit 'N' to 64-bit only moves.

gcc/testsuite:
PR target/106583
* gcc.target/aarch64/pr106583.c: Add new test.

---

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
3e4005c9f4ff1f999f1811c6fb0b2252878dc4ae..b82f9ba7c2bb4cffa16abbf45f87061f72015083
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void);
 poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
 int aarch64_get_condition_code (rtx);
 bool aarch64_address_valid_for_prefetch_p (rtx, bool);
-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
 unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
 unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
 bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode 
mode);
@@ -792,7 +792,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, 
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
 bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
+bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
 machine_mode aarch64_sve_int_mode (machine_mode);
 opt_machine_mode aarch64_sve_pred_mode (unsigned int);
 machine_mode aarch64_sve_pred_mode (machine_mode);
@@ -842,8 +842,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
 bool aarch64_sve_float_mul_immediate_p (rtx);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
-bool aarch64_uimm12_shift (HOST_WIDE_INT);
+bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
 int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
+bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
 bool aarch64_use_return_insn_p (void);
 const char *aarch64_output_casesi (rtx *);
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
4de55beb067ea8f0be0a90060a785c94bdee708b..785ec07692981d423582051ac0897e5dbc3a001f
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aa

[pushed] c++: defer all consteval in default args [DR2631]

2022-10-12 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --

The proposed resolution of CWG2631 extends our current handling of
source_location::current to all consteval functions: default arguments
are not evaluated until they're used in a call, the same should apply to
evaluation of immediate invocations.  And similarly for default member
initializers.

Previously we folded source_location::current in cp_fold_r; now we fold all
consteval calls in default arguments/member initializers in bot_replace.

DR 2631

gcc/cp/ChangeLog:

* cp-tree.h (source_location_current_p): Remove.
* name-lookup.h (struct cp_binding_level): Remove
immediate_fn_ctx_p.
* call.cc (in_immediate_context): All default args
and DMI are potentially immediate context.
(immediate_invocation_p): Don't treat source_location specially.
(struct in_consteval_if_p_temp_override): Move to cp-tree.h.
* constexpr.cc (get_nth_callarg): Move to cp-tree.h.
* cp-gimplify.cc (cp_fold_r): Don't fold consteval.
* name-lookup.cc (begin_scope): Don't set immediate_fn_ctx_p.
* parser.cc (cp_parser_lambda_declarator_opt): Likewise.
(cp_parser_direct_declarator): Likewise.
* pt.cc (tsubst_default_argument): Open sk_function_parms level.
* tree.cc (source_location_current_p): Remove.
(bot_replace): Fold consteval here.
(break_out_target_exprs): Handle errors.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/consteval-defarg3.C: New test.
---
 gcc/cp/cp-tree.h  | 32 +++-
 gcc/cp/name-lookup.h  |  5 +-
 gcc/cp/call.cc| 36 +
 gcc/cp/constexpr.cc   | 20 ---
 gcc/cp/cp-gimplify.cc |  7 ---
 gcc/cp/name-lookup.cc |  2 -
 gcc/cp/parser.cc  | 24 -
 gcc/cp/pt.cc  |  3 ++
 gcc/cp/tree.cc| 52 +--
 .../g++.dg/cpp2a/consteval-defarg3.C  | 23 
 10 files changed, 94 insertions(+), 110 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/consteval-defarg3.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index ab6f85a2490..80037fabb41 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -2030,6 +2030,18 @@ make_temp_override (T& var, type_identity_t overrider)
   return { var, overrider };
 }
 
+/* temp_override for in_consteval_if_p, which can't use make_temp_override
+   because it is a bitfield.  */
+
+struct in_consteval_if_p_temp_override {
+  bool save_in_consteval_if_p;
+  in_consteval_if_p_temp_override ()
+: save_in_consteval_if_p (in_consteval_if_p) {}
+  void reset () { in_consteval_if_p = save_in_consteval_if_p; }
+  ~in_consteval_if_p_temp_override ()
+  { reset (); }
+};
+
 /* The cached class binding level, from the most recently exited
class, or NULL if none.  */
 
@@ -4201,6 +4213,25 @@ more_aggr_init_expr_args_p (const 
aggr_init_expr_arg_iterator *iter)
   for ((arg) = first_aggr_init_expr_arg ((call), &(iter)); (arg);  \
(arg) = next_aggr_init_expr_arg (&(iter)))
 
+/* We have an expression tree T that represents a call, either CALL_EXPR
+   or AGGR_INIT_EXPR.  Return a reference to the Nth argument.  */
+
+static inline tree&
+get_nth_callarg (tree t, int n)
+{
+  switch (TREE_CODE (t))
+{
+case CALL_EXPR:
+  return CALL_EXPR_ARG (t, n);
+
+case AGGR_INIT_EXPR:
+  return AGGR_INIT_EXPR_ARG (t, n);
+
+default:
+  gcc_unreachable ();
+}
+}
+
 /* VEC_INIT_EXPR accessors.  */
 #define VEC_INIT_EXPR_SLOT(NODE) TREE_OPERAND (VEC_INIT_EXPR_CHECK (NODE), 0)
 #define VEC_INIT_EXPR_INIT(NODE) TREE_OPERAND (VEC_INIT_EXPR_CHECK (NODE), 1)
@@ -7880,7 +7911,6 @@ extern tree bind_template_template_parm   (tree, 
tree);
 extern tree array_type_nelts_total (tree);
 extern tree array_type_nelts_top   (tree);
 extern bool array_of_unknown_bound_p   (const_tree);
-extern bool source_location_current_p  (tree);
 extern tree break_out_target_exprs (tree, bool = false);
 extern tree build_ctor_subob_ref   (tree, tree, tree);
 extern tree replace_placeholders   (tree, tree, bool * = NULL);
diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h
index 7201ae8ead8..9e3b69865a6 100644
--- a/gcc/cp/name-lookup.h
+++ b/gcc/cp/name-lookup.h
@@ -307,13 +307,10 @@ struct GTY(()) cp_binding_level {
  'this_entity'.  */
   unsigned defining_class_p : 1;
 
-  /* true for SK_FUNCTION_PARMS of immediate functions.  */
-  unsigned immediate_fn_ctx_p : 1;
-
   /* True for SK_FUNCTION_PARMS of a requires-expression.  */
   unsigned requires_expression: 1;
 
-  /* 21 bits left to fill a 32-bit word.  */
+  /* 22 bits left to fill a 32-bit word.  */
 };
 
 /* The binding level currently in effect.  */
diff 

Re: [PATCH] middle-end, v2: IFN_ASSUME support [PR106654]

2022-10-12 Thread Jason Merrill via Gcc-patches

On 10/11/22 09:36, Jakub Jelinek wrote:

On Mon, Oct 10, 2022 at 11:19:24PM +0200, Jakub Jelinek via Gcc-patches wrote:

On Mon, Oct 10, 2022 at 05:09:29PM -0400, Jason Merrill wrote:

On 10/10/22 04:54, Jakub Jelinek via Gcc-patches wrote:

My earlier patches gimplify the simplest non-side-effects assumptions
into if (cond) ; else __builtin_unreachable (); and throw the rest
on the floor.
The following patch attempts to do something with the rest too.
For -O0, it actually throws even the simplest assumptions on the floor,
we don't expect optimizations and the assumptions are there to allow
optimizations.


I'd think we should trap on failed assume at -O0 (i.e. with
-funreachable-traps).


For the simple conditions?  Perhaps.  But for the side-effects cases
that doesn't seem to be easily possible.


Here is an updated patch which will trap on failed simple assume.

Bootstrapped/regtested successfully on x86_64-linux and i686-linux, the only
change was moving the !optimize handling from before the
if (cond); else __builtin_unreachable ();
gimplification to right after it.

2022-10-11  Jakub Jelinek  

PR c++/106654
gcc/
* function.h (struct function): Add assume_function bitfield.
* gimplify.cc (gimplify_call_expr): If the assumption isn't
simple enough, expand it into IFN_ASSUME guarded block or
for -O0 drop it.
* gimple-low.cc (create_assumption_fn): New function.
(struct lower_assumption_data): New type.
(find_assumption_locals_r, assumption_copy_decl,
adjust_assumption_stmt_r, adjust_assumption_stmt_op,
lower_assumption): New functions.
(lower_stmt): Handle IFN_ASSUME guarded block.
* tree-ssa-ccp.cc (pass_fold_builtins::execute): Remove
IFN_ASSUME calls.
* lto-streamer-out.cc (output_struct_function_base): Pack
assume_function bit.
* lto-streamer-in.cc (input_struct_function_base): And unpack it.
* cgraphunit.cc (cgraph_node::expand): Don't verify assume_function
has TREE_ASM_WRITTEN set and don't release its body.
* cfgexpand.cc (pass_expand::execute): Don't expand assume_function
into RTL, just destroy loops and exit.
* internal-fn.cc (expand_ASSUME): Remove gcc_unreachable.
* passes.cc (pass_rest_of_compilation::gate): Return false also for
fun->assume_function.
* tree-vectorizer.cc (pass_vectorize::gate,
pass_slp_vectorize::gate): Likewise.
* ipa-icf.cc (sem_function::parse): Punt for func->assume_function.
gcc/cp/
* parser.cc (cp_parser_omp_assumption_clauses): Wrap IFN_ASSUME
argument with fold_build_cleanup_point_expr.
* cp-gimplify.cc (process_stmt_assume_attribute): Likewise.
* pt.cc (tsubst_copy_and_build): Likewise.
gcc/testsuite/
* g++.dg/cpp23/attr-assume5.C: New test.
* g++.dg/cpp23/attr-assume6.C: New test.
* g++.dg/cpp23/attr-assume7.C: New test.

--- gcc/function.h.jj   2022-10-10 09:31:22.051478926 +0200
+++ gcc/function.h  2022-10-10 09:59:49.283646705 +0200
@@ -438,6 +438,10 @@ struct GTY(()) function {
  
/* Set if there are any OMP_TARGET regions in the function.  */

unsigned int has_omp_target : 1;
+
+  /* Set for artificial function created for [[assume (cond)]].
+ These should be GIMPLE optimized, but not expanded to RTL.  */
+  unsigned int assume_function : 1;
  };
  
  /* Add the decl D to the local_decls list of FUN.  */

--- gcc/gimplify.cc.jj  2022-10-10 09:31:57.518983613 +0200
+++ gcc/gimplify.cc 2022-10-10 09:59:49.285646677 +0200
@@ -3569,7 +3569,52 @@ gimplify_call_expr (tree *expr_p, gimple
 fndecl, 0));
  return GS_OK;
}
- /* FIXME: Otherwise expand it specially.  */
+ /* If not optimizing, ignore the assumptions.  */
+ if (!optimize)
+   {
+ *expr_p = NULL_TREE;
+ return GS_ALL_DONE;
+   }
+ /* Temporarily, until gimple lowering, transform
+.ASSUME (cond);
+into:
+guard = .ASSUME ();
+if (guard) goto label_true; else label_false;
+label_true:;
+{
+  guard = cond;
+}
+label_false:;
+.ASSUME (guard);
+such that gimple lowering can outline the condition into
+a separate function easily.  */
+ tree guard = create_tmp_var (boolean_type_node);
+ gcall *call = gimple_build_call_internal (ifn, 0);
+ gimple_call_set_nothrow (call, TREE_NOTHROW (*expr_p));
+ gimple_set_location (call, loc);
+ gimple_call_set_lhs (call, guard);
+ gimple_seq_add_stmt (pre_p, call);
+ *expr_p = build2 (MODIFY_EXPR, void_type_node, guard,
+   CALL_EXPR_ARG (*expr_p, 0));
+ *expr_p = build3 (BIND_EXPR, void_type_node, NULL, *expr_p, NULL);
+ 

Re: [PATCH][AArch64] Improve bit tests [PR105773]

2022-10-12 Thread Richard Sandiford via Gcc-patches
Wilco Dijkstra  writes:
> Hi Richard,
>
>> Realise this is awkward, but: CC_NZmode is for operations that set only
>> the N and Z flags to useful values.  If we want to take advantage of V
>> being zero then I think we need a different mode.
>>
>> We can't go all the way to CCmode because the carry flag has the opposite
>> value compared to subtraction.  But we could have either:
>>
>> * CC_INVC (inverted carry) that handles all comparisons, including the
>>   redundant unsigned comparisons
>>
>> * CC_NZV
>>
>> Guess I've got a slight preference for CC_INVC, but either would be OK IMO.
>
> I've added CC_NZV since that's easier to understand and unsigned comparisons
> with zero are always changed into equality comparisons. There were a few cases
> where CC_NZ mode was used rather than CC_Z, so I changed those too.

Thanks, sounds good.

One comment below...

>
> Cheers,
> Wilco
>
> v2: Add new CC_NZV mode for cmp+and.
>
> Since AArch64 sets all flags on logical operations, comparisons with zero
> can be combined into an AND even if the condition is LE or GT. Add a new
> CC_NZV mode used by ANDS/BICS/TST instructions.
>
> Passes regress, OK for commit?
>
> gcc/ChangeLog:
>
> PR target/105773
> * config/aarch64/aarch64.cc (aarch64_select_cc_mode): Allow
> GT/LE for merging compare with zero into AND.
> (aarch64_get_condition_code_1): Add CC_NZVmode support.
> * config/aarch64/aarch64-modes.def: Add CC_NZV.
> * config/aarch64/aarch64.md: Use CC_NZV in cmp+and patterns.
>
> gcc/testsuite:
> PR target/105773
> * gcc.target/aarch64/ands_2.c: Test for ANDS.
> * gcc.target/aarch64/bics_2.c: Test for BICS.
> * gcc.target/aarch64/tst_2.c: Test for TST.
> * gcc.target/aarch64/tst_imm_split_1.c: Fix test.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-modes.def 
> b/gcc/config/aarch64/aarch64-modes.def
> index 
> d3c9b74434cd2c0d0cb1a2fd26af8c0bf38a4cfa..0fd4c32ad0bd09f8651d1b8a77378fa4504ff488
>  100644
> --- a/gcc/config/aarch64/aarch64-modes.def
> +++ b/gcc/config/aarch64/aarch64-modes.def
> @@ -35,6 +35,7 @@ CC_MODE (CCFPE);
>  CC_MODE (CC_SWP);
>  CC_MODE (CC_NZC);   /* Only N, Z and C bits of condition flags are valid.
>(Used with SVE predicate tests.)  */
> +CC_MODE (CC_NZV);   /* Only N, Z and V bits of condition flags are valid.  */
>  CC_MODE (CC_NZ);/* Only N and Z bits of condition flags are valid.  */
>  CC_MODE (CC_Z); /* Only Z bit of condition flags is valid.  */
>  CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition.  
> */
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> 3a4b1f6987487e959648a343bb25180ea419f397..600e0f41d51242a6f100b3643ce8421ea116ec5c
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -11284,7 +11284,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
>if (y == const0_rtx && (REG_P (x) || SUBREG_P (x))
>&& (code == EQ || code == NE)
>&& (mode_x == HImode || mode_x == QImode))
> -return CC_NZmode;
> +return CC_Zmode;
>
>/* Similarly, comparisons of zero_extends from shorter modes can
>   be performed using an ANDS with an immediate mask.  */
> @@ -11292,15 +11292,22 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
>&& (mode_x == SImode || mode_x == DImode)
>&& (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == 
> QImode)
>&& (code == EQ || code == NE))
> -return CC_NZmode;
> +return CC_Zmode;
> +
> +  /* ANDS/BICS/TST support equality and all signed comparisons.  */
> +  if ((mode_x == SImode || mode_x == DImode)
> +  && y == const0_rtx
> +  && (code_x == AND || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 
> 1))
> +   && CONST_INT_P (XEXP (x, 2

Maybe pre-existing, but are ordered comparisons safe for the
ZERO_EXTRACT case?  If we extract the top 8 bits (say), zero extend,
and compare with zero, the result should be >= 0, whereas TST would
set N to the top bit.

Perhaps this needs to be sign_extract if it involves the top bit and
zero_extract otherwise?

If that's right, the patch is OK with just a test for AND.  We can fix
the ZERO_EXTRACT as a separate patch.

Let me know if I've missed something though.

Thanks,
Richard

> +  && (code == EQ || code == NE || code == LT || code == GE
> + || code == GT || code == LE))
> +return CC_NZVmode;
>
> +  /* ADDS/SUBS correctly set N and Z flags.  */
>if ((mode_x == SImode || mode_x == DImode)
>&& y == const0_rtx
>&& (code == EQ || code == NE || code == LT || code == GE)
> -  && (code_x == PLUS || code_x == MINUS || code_x == AND
> - || code_x == NEG
> - || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
> - && CONST_INT_P (XEXP (x, 2)
> +  && (code_x == PLUS || code_x == MINUS || code_x == NEG))
>  return

Re: [PATCH] middle-end IFN_ASSUME support [PR106654]

2022-10-12 Thread Andrew MacLeod via Gcc-patches



On 10/12/22 10:39, Jakub Jelinek wrote:

On Wed, Oct 12, 2022 at 10:31:00AM -0400, Andrew MacLeod wrote:

I presume you are looking to get this working for this release, making the
priority high? :-)

Yes.  So that we can claim we actually support C++23 Portable Assumptions
and OpenMP assume directive's hold clauses for something non-trivial so
people won't be afraid to actually use it.
Of course, first the posted patch needs to be reviewed and only once it gets
in, the ranger/GORI part can follow.  As the latter is only an optimization,
it can be done incrementally.


I will start poking at something to find ranges for parameters from the 
return backwards.




Intersection I believe...?  I think the value from the assume's should add
restrictions to the range..

Sure, sorry.


I figured as much, I was just wondering if there might be some way to
"simplify" certain things by processing it and turning each parameter query
into a smaller function returning the range we determined from the main
one...   but perhaps that is more complicated.

We don't really know what the condition is, it can be pretty arbitrary
expression (well, e.g. for C++ conditional expression, so say
[[assume (var = foo ())]];
is not valid but
[[assume ((var = foo ()))]];
is.  And with GNU statement expressions it can do a lot of stuff and until
we e.g. inline into it and optimize it a little, we don't really know what
it will be like.




No, I just meant that once we finally process the complicated function, 
and decide the final range we are storing is for x_1 is say [20,30], we 
could replace the assume call site with something like


  int assume03_x (x) { if (x>= 20 || x <= 30) return x; 
gcc_unreachable(); }


then at call sites:

   x_5 = assume03_x(x_3);

For that matter, once all the assume functions have been processed, we 
could textually replace the assume call with an expression which 
represents the determined range...  Kind of our own mini inlining?  
Maybe thats even better than adding any kind of support in 
fold_using_range..   just let things naturally fall into place?


.ASSUME_blah ( , , x_4);

where if x is determined to be [20, 30][50,60] could be textually 
"expanded" in the IL with


  if (x<20 || x>60 || (x>30 && x < 50)) gcc_unreachcable();

for each of the parameters?   If we processed this like early inlining, 
we could maybe expose the entire thing to optimization that way?


Andrew



[PATCH v2] c++: ICE with VEC_INIT_EXPR and defarg [PR106925]

2022-10-12 Thread Marek Polacek via Gcc-patches
On Tue, Oct 11, 2022 at 04:28:11PM -0400, Jason Merrill wrote:
> On 10/11/22 16:00, Marek Polacek wrote:
> > Since r12-8066, in cxx_eval_vec_init we perform expand_vec_init_expr
> > while processing the default argument in this test.
> 
> Hmm, why are we calling cxx_eval_vec_init during parsing of the default
> argument?  In particular, any expansion that depends on the enclosing
> function context should be deferred until the default arg is used by a call.

I think this is part of the semantic constraints checking [dcl.fct.default]/5
talks about, as in, this doesn't compile even though the default argument is
not executed:

struct S {
  S() = delete;
};
void foo (S = S()) { }
 
In the test below we parse '= MyVector<1>()' and end up calling mark_used
on the implicit "constexpr MyVector<1>::MyVector() noexcept ()"
ctor.  mark_used calls maybe_instantiate_noexcept.  Since the ctor has
a DEFERRED_NOEXCEPT, we have to figure out if the ctor should be noexcept
or not using get_defaulted_eh_spec.  That means walking the members of
MyVector.  Thus we reach
  /* Core 1351: If the field has an NSDMI that could throw, the
 default constructor is noexcept(false).  */
and call get_nsdmi on 'data'.  There we digest its initializer which is {}.
massage_init_elt calls digest_init_r on the {} and produces
  TARGET_EXPR >>>
and the subsequent fold_non_dependent_init leads to cxx_eval_vec_init
-> expand_vec_init_expr.

I think this is all correct except that the fold_non_dependent_init is
somewhat questionable to me; do we really have to fold in order to say
if the NSDMI init can throw?  Sure, we need to digest the {}, maybe
the field's ctors can throw, but I don't know about the folding.

> But it's certainly true that the "function_body" test is wrong in this
> situation; you might move the c_f_d test into the calculation of that
> variable.  The patch is OK with that change, but please also answer my
> question above.

I like that.  Before I go ahead and apply, please let me know if the answer
above is satisfying.

-- >8 --
Since r12-8066, in cxx_eval_vec_init we perform expand_vec_init_expr
while processing the default argument in this test.  At this point
start_preparsed_function hasn't yet set current_function_decl.
expand_vec_init_expr then leads to maybe_splice_retval_cleanup which
checks DECL_CONSTRUCTOR_P (current_function_decl) without checking that
c_f_d is non-null first.  It seems correct that c_f_d is null here, so
it seems to me that maybe_splice_retval_cleanup should check c_f_d as
in the following patch.

PR c++/106925

gcc/cp/ChangeLog:

* except.cc (maybe_splice_retval_cleanup): Check current_function_decl.
Make the bool const.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/initlist-defarg3.C: New test.
---
 gcc/cp/except.cc  |  7 +--
 gcc/testsuite/g++.dg/cpp0x/initlist-defarg3.C | 13 +
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/initlist-defarg3.C

diff --git a/gcc/cp/except.cc b/gcc/cp/except.cc
index b8a85ed0572..a9114a5f7a5 100644
--- a/gcc/cp/except.cc
+++ b/gcc/cp/except.cc
@@ -1322,9 +1322,12 @@ maybe_splice_retval_cleanup (tree compound_stmt)
 {
   /* If we need a cleanup for the return value, add it in at the same level as
  pushdecl_outermost_localscope.  And also in try blocks.  */
-  bool function_body
+  const bool function_body
 = (current_binding_level->level_chain
-   && current_binding_level->level_chain->kind == sk_function_parms);
+   && current_binding_level->level_chain->kind == sk_function_parms
+  /* When we're processing a default argument, c_f_d may not have been
+set.  */
+   && current_function_decl);
 
   if ((function_body || current_binding_level->kind == sk_try)
   && !DECL_CONSTRUCTOR_P (current_function_decl)
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-defarg3.C 
b/gcc/testsuite/g++.dg/cpp0x/initlist-defarg3.C
new file mode 100644
index 000..5c3e886b306
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist-defarg3.C
@@ -0,0 +1,13 @@
+// PR c++/106925
+// { dg-do compile { target c++11 } }
+
+struct Foo;
+template  struct __array_traits { typedef Foo _Type[_Nm]; };
+template  struct array {
+  typename __array_traits<_Nm>::_Type _M_elems;
+};
+template  struct MyVector { array data{}; };
+struct Foo {
+  float a{0};
+};
+void foo(MyVector<1> = MyVector<1>());

base-commit: fbf423309e103b54f7c9d39b2f7870b9bedfe9d2
-- 
2.37.3



Re: [PATCH] libgcc: Quote variable in Makefile.in

2022-10-12 Thread Jeff Law via Gcc-patches



On 10/12/22 05:52, Jonathan Wakely via Gcc-patches wrote:

This isn't very important as the error is harmless, but it's easy to fix
and so is one less thing that might confuse people when looking at build
logs.

OK for trunk?

-- >8 --

If the xgcc executable has not been built (or has been removed by 'make
clean') then the command to print the multilib dir fails, and so the
MULTIOSDIR variable is empty. That then causes:
/bin/sh: line 0: test: !=: unary operator expected

We can avoid it by quoting the variable.

libgcc/ChangeLog:

* Makefile.in: Quote variable.


OK

jeff




Re: [PATCH v2] c++: ICE with VEC_INIT_EXPR and defarg [PR106925]

2022-10-12 Thread Jason Merrill via Gcc-patches

On 10/12/22 12:27, Marek Polacek wrote:

On Tue, Oct 11, 2022 at 04:28:11PM -0400, Jason Merrill wrote:

On 10/11/22 16:00, Marek Polacek wrote:

Since r12-8066, in cxx_eval_vec_init we perform expand_vec_init_expr
while processing the default argument in this test.


Hmm, why are we calling cxx_eval_vec_init during parsing of the default
argument?  In particular, any expansion that depends on the enclosing
function context should be deferred until the default arg is used by a call.


I think this is part of the semantic constraints checking [dcl.fct.default]/5
talks about, as in, this doesn't compile even though the default argument is
not executed:

struct S {
   S() = delete;
};
void foo (S = S()) { }
  
In the test below we parse '= MyVector<1>()' and end up calling mark_used

on the implicit "constexpr MyVector<1>::MyVector() noexcept ()"
ctor.  mark_used calls maybe_instantiate_noexcept.  Since the ctor has
a DEFERRED_NOEXCEPT, we have to figure out if the ctor should be noexcept
or not using get_defaulted_eh_spec.  That means walking the members of
MyVector.  Thus we reach
   /* Core 1351: If the field has an NSDMI that could throw, the
  default constructor is noexcept(false).  */


Maybe we need a cp_unevaluated here?  The operand of noexcept should be 
unevaluated.



and call get_nsdmi on 'data'.  There we digest its initializer which is {}.
massage_init_elt calls digest_init_r on the {} and produces
   TARGET_EXPR >>>
and the subsequent fold_non_dependent_init leads to cxx_eval_vec_init
-> expand_vec_init_expr.

I think this is all correct except that the fold_non_dependent_init is
somewhat questionable to me; do we really have to fold in order to say
if the NSDMI init can throw?  Sure, we need to digest the {}, maybe
the field's ctors can throw, but I don't know about the folding.


And we can check cp_unevaluated_operand to avoid the 
fold_non_dependent_init?



But it's certainly true that the "function_body" test is wrong in this
situation; you might move the c_f_d test into the calculation of that
variable.  The patch is OK with that change, but please also answer my
question above.


I like that.  Before I go ahead and apply, please let me know if the answer
above is satisfying.

-- >8 --
Since r12-8066, in cxx_eval_vec_init we perform expand_vec_init_expr
while processing the default argument in this test.  At this point
start_preparsed_function hasn't yet set current_function_decl.
expand_vec_init_expr then leads to maybe_splice_retval_cleanup which
checks DECL_CONSTRUCTOR_P (current_function_decl) without checking that
c_f_d is non-null first.  It seems correct that c_f_d is null here, so
it seems to me that maybe_splice_retval_cleanup should check c_f_d as
in the following patch.

PR c++/106925

gcc/cp/ChangeLog:

* except.cc (maybe_splice_retval_cleanup): Check current_function_decl.
Make the bool const.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/initlist-defarg3.C: New test.
---
  gcc/cp/except.cc  |  7 +--
  gcc/testsuite/g++.dg/cpp0x/initlist-defarg3.C | 13 +
  2 files changed, 18 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/initlist-defarg3.C

diff --git a/gcc/cp/except.cc b/gcc/cp/except.cc
index b8a85ed0572..a9114a5f7a5 100644
--- a/gcc/cp/except.cc
+++ b/gcc/cp/except.cc
@@ -1322,9 +1322,12 @@ maybe_splice_retval_cleanup (tree compound_stmt)
  {
/* If we need a cleanup for the return value, add it in at the same level as
   pushdecl_outermost_localscope.  And also in try blocks.  */
-  bool function_body
+  const bool function_body
  = (current_binding_level->level_chain
-   && current_binding_level->level_chain->kind == sk_function_parms);
+   && current_binding_level->level_chain->kind == sk_function_parms
+  /* When we're processing a default argument, c_f_d may not have been
+set.  */
+   && current_function_decl);
  
if ((function_body || current_binding_level->kind == sk_try)

&& !DECL_CONSTRUCTOR_P (current_function_decl)
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-defarg3.C 
b/gcc/testsuite/g++.dg/cpp0x/initlist-defarg3.C
new file mode 100644
index 000..5c3e886b306
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist-defarg3.C
@@ -0,0 +1,13 @@
+// PR c++/106925
+// { dg-do compile { target c++11 } }
+
+struct Foo;
+template  struct __array_traits { typedef Foo _Type[_Nm]; };
+template  struct array {
+  typename __array_traits<_Nm>::_Type _M_elems;
+};
+template  struct MyVector { array data{}; };
+struct Foo {
+  float a{0};
+};
+void foo(MyVector<1> = MyVector<1>());

base-commit: fbf423309e103b54f7c9d39b2f7870b9bedfe9d2




[committed] libgomp: Fix up creation of artificial teams

2022-10-12 Thread Jakub Jelinek via Gcc-patches
Hi!

When not in explicit parallel/target/teams construct, we in some cases create
an artificial parallel with a single thread (either to handle target nowait
or for task reduction purposes).  In those cases, it handled again artificially
created implicit task (created by gomp_new_icv for cases where we needed to 
write
to some ICVs), but as the testcases show, didn't take into account possibility
of this being done from explicit task(s).  The code would destroy/free the 
previous
task and replace it with the new implicit task.  If task is an explicit task
(when teams is NULL, all explicit tasks behave like if (0)), it is a pointer to
a local stack variable, so freeing it doesn't work, and additionally we 
shouldn't
lose the explicit tasks - the new implicit task should instead replace the
ancestor task which is the first implicit one.

Regtested on x86_64-linux and i686-linux, committed to trunk.
Will consider it for backporting later.

2022-10-12  Jakub Jelinek  

* task.c (gomp_create_artificial_team): Fix up handling of invocations
from within explicit task.
* target.c (GOMP_target_ext): Likewise.
* testsuite/libgomp.c/task-7.c: New test.
* testsuite/libgomp.c/task-8.c: New test.
* testsuite/libgomp.c-c++-common/task-reduction-17.c: New test.
* testsuite/libgomp.c-c++-common/task-reduction-18.c: New test.

--- libgomp/task.c.jj   2022-05-25 11:10:32.543261788 +0200
+++ libgomp/task.c  2022-10-12 16:49:03.342493229 +0200
@@ -2465,6 +2465,7 @@ gomp_create_artificial_team (void)
   struct gomp_task_icv *icv;
   struct gomp_team *team = gomp_new_team (1);
   struct gomp_task *task = thr->task;
+  struct gomp_task **implicit_task = &task;
   icv = task ? &task->icv : &gomp_global_icv;
   team->prev_ts = thr->ts;
   thr->ts.team = team;
@@ -2477,17 +2478,25 @@ gomp_create_artificial_team (void)
   thr->ts.static_trip = 0;
   thr->task = &team->implicit_task[0];
   gomp_init_task (thr->task, NULL, icv);
-  if (task)
+  while (*implicit_task
+&& (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
+implicit_task = &(*implicit_task)->parent;
+  if (*implicit_task)
 {
-  thr->task = task;
+  thr->task = *implicit_task;
   gomp_end_task ();
-  free (task);
+  free (*implicit_task);
   thr->task = &team->implicit_task[0];
 }
 #ifdef LIBGOMP_USE_PTHREADS
   else
 pthread_setspecific (gomp_thread_destructor, thr);
 #endif
+  if (implicit_task != &task)
+{
+  *implicit_task = thr->task;
+  thr->task = task;
+}
 }
 
 /* The format of data is:
--- libgomp/target.c.jj 2022-09-09 18:44:27.157255847 +0200
+++ libgomp/target.c2022-10-12 16:43:33.531002003 +0200
@@ -2813,6 +2813,7 @@ GOMP_target_ext (int device, void (*fn)
{
  struct gomp_team *team = gomp_new_team (1);
  struct gomp_task *task = thr->task;
+ struct gomp_task **implicit_task = &task;
  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
  team->prev_ts = thr->ts;
  thr->ts.team = team;
@@ -2825,15 +2826,23 @@ GOMP_target_ext (int device, void (*fn)
  thr->ts.static_trip = 0;
  thr->task = &team->implicit_task[0];
  gomp_init_task (thr->task, NULL, icv);
- if (task)
+ while (*implicit_task
+&& (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
+   implicit_task = &(*implicit_task)->parent;
+ if (*implicit_task)
{
- thr->task = task;
+ thr->task = *implicit_task;
  gomp_end_task ();
- free (task);
+ free (*implicit_task);
  thr->task = &team->implicit_task[0];
}
  else
pthread_setspecific (gomp_thread_destructor, thr);
+ if (implicit_task != &task)
+   {
+ *implicit_task = thr->task;
+ thr->task = task;
+   }
}
   if (thr->ts.team
  && !thr->task->final_task)
--- libgomp/testsuite/libgomp.c/task-7.c.jj 2022-10-12 15:41:25.488494296 
+0200
+++ libgomp/testsuite/libgomp.c/task-7.c2022-10-12 16:03:32.881612519 
+0200
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include 
+#include 
+
+int
+main ()
+{
+  #pragma omp task final (1)
+  {
+if (!omp_in_final ())
+  abort ();
+#pragma omp task
+{
+  if (!omp_in_final ())
+   abort ();
+  #pragma omp target nowait
+  if (omp_in_final ())
+   abort ();
+  if (!omp_in_final ())
+   abort ();
+  #pragma omp taskwait
+}
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/task-8.c.jj 2022-10-12 16:06:38.889097404 
+0200
+++ libgomp/testsuite/libgomp.c/task-8.c2022-10-12 16:07:19.568545781 
+0200
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+
+int
+main ()
+{
+  int i = 0;
+  #pragma omp task
+  {
+#pragma omp target nowait private (i)
+i = 1;
+#pragma omp taskwait
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp

[committed] libgomp: Add omp_in_explicit_task support

2022-10-12 Thread Jakub Jelinek via Gcc-patches
Hi!

This is pretty straightforward, if gomp_thread ()->task is NULL,
it can't be explicit task, otherwise if
gomp_thread ()->task->kind == GOMP_TASK_IMPLICIT, it is an implicit
task, otherwise explicit task.

Regtested on x86_64-linux and i686-linux, committed to trunk.

2022-10-12  Jakub Jelinek  

* omp.h.in (omp_in_explicit_task): Declare.
* omp_lib.h.in (omp_in_explicit_task): Likewise.
* omp_lib.f90.in (omp_in_explicit_task): New interface.
* libgomp.map (OMP_5.2): New symbol version, export
omp_in_explicit_task and omp_in_explicit_task_.
* task.c (omp_in_explicit_task): New function.
* fortran.c (omp_in_explicit_task): Add ialias_redirect.
(omp_in_explicit_task_): New function.
* libgomp.texi (OpenMP 5.2): Mark omp_in_explicit_task as implemented.
* testsuite/libgomp.c-c++-common/task-in-explicit-1.c: New test.
* testsuite/libgomp.c-c++-common/task-in-explicit-2.c: New test.
* testsuite/libgomp.c-c++-common/task-in-explicit-3.c: New test.

--- libgomp/omp.h.in.jj 2022-06-13 14:02:37.231566968 +0200
+++ libgomp/omp.h.in2022-10-12 13:30:00.414777439 +0200
@@ -244,6 +244,7 @@ extern int omp_get_team_size (int) __GOM
 extern int omp_get_active_level (void) __GOMP_NOTHROW;
 
 extern int omp_in_final (void) __GOMP_NOTHROW;
+extern int omp_in_explicit_task (void) __GOMP_NOTHROW;
 
 extern int omp_get_cancellation (void) __GOMP_NOTHROW;
 extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW;
--- libgomp/omp_lib.h.in.jj 2022-06-13 14:02:37.232566958 +0200
+++ libgomp/omp_lib.h.in2022-10-12 13:30:58.761972700 +0200
@@ -220,6 +220,8 @@
 
   external omp_in_final
   logical(4) omp_in_final
+  external omp_in_explicit_task
+  logical(4) omp_in_explicit_task
 
   external omp_get_cancellation
   logical(4) omp_get_cancellation
--- libgomp/omp_lib.f90.in.jj   2022-06-13 14:02:37.231566968 +0200
+++ libgomp/omp_lib.f90.in  2022-10-12 13:30:37.134271001 +0200
@@ -445,6 +445,12 @@
 end interface
 
 interface
+  function omp_in_explicit_task ()
+logical (4) :: omp_in_explicit_task
+  end function omp_in_explicit_task
+end interface
+
+interface
   function omp_get_cancellation ()
 logical (4) :: omp_get_cancellation
   end function omp_get_cancellation
--- libgomp/libgomp.map.jj  2022-05-24 09:12:34.689470538 +0200
+++ libgomp/libgomp.map 2022-10-12 13:36:46.847171857 +0200
@@ -234,6 +234,12 @@ OMP_5.1.1 {
omp_target_memcpy_rect_async;
 } OMP_5.1;
 
+OMP_5.2 {
+  global:
+   omp_in_explicit_task;
+   omp_in_explicit_task_;
+} OMP_5.1.1;
+
 GOMP_1.0 {
   global:
GOMP_atomic_end;
--- libgomp/task.c.jj   2022-05-25 11:10:32.543261788 +0200
+++ libgomp/task.c  2022-10-12 16:49:03.342493229 +0200
@@ -2678,6 +2678,16 @@ omp_in_final (void)
 
 ialias (omp_in_final)
 
+int
+omp_in_explicit_task (void)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_task *task = thr->task;
+  return task && task->kind != GOMP_TASK_IMPLICIT;
+}
+
+ialias (omp_in_explicit_task)
+
 void
 omp_fulfill_event (omp_event_handle_t event)
 {
--- libgomp/fortran.c.jj2022-03-16 16:26:46.236198146 +0100
+++ libgomp/fortran.c   2022-10-12 14:25:03.072193607 +0200
@@ -76,6 +76,7 @@ ialias_redirect (omp_get_ancestor_thread
 ialias_redirect (omp_get_team_size)
 ialias_redirect (omp_get_active_level)
 ialias_redirect (omp_in_final)
+ialias_redirect (omp_in_explicit_task)
 ialias_redirect (omp_get_cancellation)
 ialias_redirect (omp_get_proc_bind)
 ialias_redirect (omp_get_num_places)
@@ -482,6 +483,12 @@ omp_in_final_ (void)
   return omp_in_final ();
 }
 
+int32_t
+omp_in_explicit_task_ (void)
+{
+  return omp_in_explicit_task ();
+}
+
 void
 omp_set_num_teams_ (const int32_t *num_teams)
 {
--- libgomp/libgomp.texi.jj 2022-10-06 08:55:02.776290479 +0200
+++ libgomp/libgomp.texi2022-10-12 17:16:21.809127181 +0200
@@ -360,8 +360,8 @@ to address of matching mapped list item
 
 @multitable @columnfractions .60 .10 .25
 @headitem Description @tab Status @tab Comments
-@item @code{omp_in_explicit_task} routine and @emph{implicit-task-var} ICV
-  @tab N @tab
+@item @code{omp_in_explicit_task} routine and @emph{explicit-task-var} ICV
+  @tab Y @tab
 @item @code{omp}/@code{ompx}/@code{omx} sentinels and @code{omp_}/@code{ompx_}
   namespaces @tab N/A
   @tab warning for @code{ompx/omx} sentinels@footnote{The @code{ompx}
--- libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-1.c.jj  
2022-10-12 14:50:34.515132245 +0200
+++ libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-1.c 2022-10-12 
15:26:56.086277744 +0200
@@ -0,0 +1,106 @@
+/* { dg-do run } */
+
+#include 
+#include 
+
+int
+main ()
+{
+  if (omp_in_explicit_task ())
+abort ();
+  #pragma omp task
+  if (!omp_in_explicit_task ())
+abort ();
+  #pragma omp task final (1)
+  {
+

[committed] libgomp: Fix up OpenMP 5.2 feature bullet

2022-10-12 Thread Jakub Jelinek via Gcc-patches
Hi!

The previous bullet correctly mentions 5.2 added for Fortran
allocators directive which is a replacement of allocate directive
associated with ALLOCATE statement to differentiate it at parse time
from allocate directive as declarative one not associated with ALLOCATE
statement, but the deprecation bullet talks about non-existing allocator
directive.

Committed to trunk.

2022-10-12  Jakub Jelinek  

* libgomp.texi (OpenMP 5.2): Fix up allocator -> allocate directive
in deprecation bullet.

--- libgomp/libgomp.texi.jj 2022-10-12 18:39:10.769431072 +0200
+++ libgomp/libgomp.texi2022-10-12 18:43:03.817239832 +0200
@@ -387,7 +387,7 @@ to address of matching mapped list item
 @item Extended list of directives permitted in Fortran pure procedures
   @tab N @tab
 @item New @code{allocators} directive for Fortran @tab N @tab
-@item Deprecation of @code{allocator} directive for Fortran
+@item Deprecation of @code{allocate} directive for Fortran
   allocatables/pointers @tab N @tab
 @item Optional paired @code{end} directive with @code{dispatch} @tab N @tab
 @item New @code{memspace} and @code{traits} modifiers for 
@code{uses_allocators}

Jakub



Re: [PATCH] testsuite: Only run -fcf-protection test on i?86/x86_64 [PR107213]

2022-10-12 Thread Jeff Law via Gcc-patches



On 10/11/22 10:57, Marek Polacek via Gcc-patches wrote:

This test fails on non-i?86/x86_64 targets because on those targets
we get

   error: '-fcf-protection=full' is not supported for this target

so this patch limits where the test is run.

Tested on x86_64-pc-linux-gnu, ok for trunk?

gcc/testsuite/ChangeLog:

* c-c++-common/pointer-to-fn1.c: Only run on i?86/x86_64.


OK.

jeff




Re: [PATCH] preprocessor: Fix tracking of system header state [PR60014, PR60723]

2022-10-12 Thread Jeff Law via Gcc-patches



On 10/8/22 15:18, Lewis Hyatt via Gcc-patches wrote:

The token_streamer class (which implements gcc mode -E and
-save-temps/-no-integrated-cpp) needs to keep track whether the last tokens
output were in a system header, so that it can generate line marker
annotations as necessary for a downstream consumer to reconstruct the
state. The logic for tracking it, which was added by r5-1863 to resolve
PR60723, has some edge case issues as revealed by the three new test
cases. The first, coming from the original PR60014, was incidentally fixed by
r9-1926 for unrelated reasons. The other two were still failing on master
prior to this commit. Such code paths were not realizable prior to r13-1544,
which made it possible for the token streamer to see CPP_PRAGMA tokens in more
contexts.

The two main issues being corrected here are:

1) print.prev_was_system_token needs to indicate whether the previous token
output was in a system location. However, it was not being set on every token,
only on those that triggered the main code path; specifically it was not
triggered on a CPP_PRAGMA token. Testcase 2 covers this case.

2) The token_streamer uses a variable "line_marker_emitted" to remember
whether a line marker has been emitted while processing a given token, so that
it wouldn't be done more than once in case multiple conditions requiring a
line marker are true. There was no reason for this to be a member variable
that retains its value from token to token, since it is just needed for
tracking the state locally while processing a single given token. The fact
that it could retain its value for a subsequent token is rather difficult to
observe, but testcase 3 demonstrates incorrect behavior resulting from
that. Moving this to a local variable also simplifies understanding the
control flow going forward.

gcc/c-family/ChangeLog:

PR preprocessor/60014
PR preprocessor/60723
* c-ppoutput.cc (class token_streamer): Remove member
line_marker_emitted to...
(token_streamer::stream): ...a local variable here. Set
print.prev_was_system_token on all code paths.

gcc/testsuite/ChangeLog:

PR preprocessor/60014
PR preprocessor/60723
* gcc.dg/cpp/pr60014-1.c: New test.
* gcc.dg/cpp/pr60014-1.h: New test.
* gcc.dg/cpp/pr60014-2.c: New test.
* gcc.dg/cpp/pr60014-2.h: New test.
* gcc.dg/cpp/pr60014-3.c: New test.
* gcc.dg/cpp/pr60014-3.h: New test.


OK

jeff




Re: [Patch] libgomp/gcn: Prepare for reverse-offload callback handling

2022-10-12 Thread Andrew Stubbs

On 12/10/2022 15:29, Tobias Burnus wrote:

On 29.09.22 18:24, Andrew Stubbs wrote:

On 27/09/2022 14:16, Tobias Burnus wrote:
Andrew did suggest a while back to piggyback on the console_output 
handling,
avoiding another atomic access. - If this is still wanted, I like to 
have some

guidance regarding how to actually implement it.

[...]
The point is that you can use the "msg" and "text" fields for whatever 
data you want, as long as you invent a new value for "type".

[]
You can make "case 4" do whatever you want. There are enough bytes for 
4 pointers, and you could use multiple packets (although it's not safe 
to assume they're contiguous or already arrived; maybe "case 4" for 
part 1, "case 5" for part 2). It's possible to change this structure, 
of course, but the target implementation is in newlib so versioning 
becomes a problem.


I think  – also looking at the Newlib write.c implementation - that the 
data is contiguous: there is an atomic add, where instead of passing '1' 
for a single slot, I could also add '2' for two slots.


Right, sorry, the buffer is circular, but the counter is linear.  It 
simplified reservation that way, but it does mean that there's a limit 
to the number of times the buffer can cycle before the counter 
saturates. (You'd need to stream out gigabytes of data to hit the limit 
though.)


Attached is one variant – for the decl of the GOMP_OFFLOAD_target_rev, 
it needs the generic parts of the sister nvptx patch.*


2*128 bytes were not enough, I need 3*128 bytes. (Or rather 5*64 + 32.) 
As target_ext is blocking, I decided to use a stack local variable for 
the remaining arguments and pass it along. Alternatively, I could also 
use 2 slots - and process them together. This would avoid one 
device->host memory copy but would make console_output less clear.


PS: Currently, device stack variables are private and cannot be accessed 
from the host; this will change in a separate patch. It not only affects 
the "rest" part as used in this patch but also the actual arrays behind 
addr, kinds, and sizes. And quite likely a lot of the map/firstprivate 
variables passed to addr.


As num_devices() will return 0 or -1, this is for now a non-issue.


So, the patch, as is, is known to be non-functional? How can you have 
tested it? For the addrs_sizes_kind data to be accessible the asm("s8") 
has to be wrong.


I think the patch looks good, in principle. The use of the existing 
ring-buffer is the right way to do it, IMO.


Can we get the manually allocated stacks patch in first and then follow 
up with these patches when they actually work?


Andrew


Re: [PATCH v2] c++: ICE with VEC_INIT_EXPR and defarg [PR106925]

2022-10-12 Thread Marek Polacek via Gcc-patches
On Wed, Oct 12, 2022 at 12:47:21PM -0400, Jason Merrill wrote:
> On 10/12/22 12:27, Marek Polacek wrote:
> > On Tue, Oct 11, 2022 at 04:28:11PM -0400, Jason Merrill wrote:
> > > On 10/11/22 16:00, Marek Polacek wrote:
> > > > Since r12-8066, in cxx_eval_vec_init we perform expand_vec_init_expr
> > > > while processing the default argument in this test.
> > > 
> > > Hmm, why are we calling cxx_eval_vec_init during parsing of the default
> > > argument?  In particular, any expansion that depends on the enclosing
> > > function context should be deferred until the default arg is used by a 
> > > call.
> > 
> > I think this is part of the semantic constraints checking 
> > [dcl.fct.default]/5
> > talks about, as in, this doesn't compile even though the default argument is
> > not executed:
> > 
> > struct S {
> >S() = delete;
> > };
> > void foo (S = S()) { }
> > In the test below we parse '= MyVector<1>()' and end up calling mark_used
> > on the implicit "constexpr MyVector<1>::MyVector() noexcept 
> > ()"
> > ctor.  mark_used calls maybe_instantiate_noexcept.  Since the ctor has
> > a DEFERRED_NOEXCEPT, we have to figure out if the ctor should be noexcept
> > or not using get_defaulted_eh_spec.  That means walking the members of
> > MyVector.  Thus we reach
> >/* Core 1351: If the field has an NSDMI that could throw, the
> >   default constructor is noexcept(false).  */
> 
> Maybe we need a cp_unevaluated here?  The operand of noexcept should be
> unevaluated.

That wouldn't help since get_nsdmi specifically does "cp_evaluated ev;",
so...
 
> > and call get_nsdmi on 'data'.  There we digest its initializer which is {}.
> > massage_init_elt calls digest_init_r on the {} and produces
> >TARGET_EXPR  >  D.2518
> >  {} 
> > and the subsequent fold_non_dependent_init leads to cxx_eval_vec_init
> > -> expand_vec_init_expr.
> > 
> > I think this is all correct except that the fold_non_dependent_init is
> > somewhat questionable to me; do we really have to fold in order to say
> > if the NSDMI init can throw?  Sure, we need to digest the {}, maybe
> > the field's ctors can throw, but I don't know about the folding.
> 
> And we can check cp_unevaluated_operand to avoid the
> fold_non_dependent_init?

...we'd still fold.  I'm not sure if we want a LOOKUP_ flag that says
"we're just checking if we can throw, don't fold".

Marek



Re: [PATCH v2 00/10] [RISC-V] Atomics improvements [PR100265/PR100266]

2022-10-12 Thread Andrea Parri
> > > +Andrea, in case he has time to look at the memory model / ABI
> > > issues.

> +Jeff, who was offering to help when the threads got crossed.  I'd punted on
> a lot of this in the hope Andrea could help out, as I'm not really a memory
> model guy and this is pretty far down the rabbit hole.  Happy to have the
> help if you're offering, though, as what's there is likely a pretty big
> performance issue for anyone with a reasonable memory system.

Thanks for linking me to the discussion and the remarks, Palmer.  I'm
happy to help (and synchronized with Jeff/the community) as possible,
building a better understanding of the 'issues' at stake.

  Andrea


Re: [Patch] libgomp/gcn: Prepare for reverse-offload callback handling

2022-10-12 Thread Tobias Burnus

On 12.10.22 19:09, Andrew Stubbs wrote:


On 12/10/2022 15:29, Tobias Burnus wrote:

Right, sorry, the buffer is circular, but the counter is linear. It
simplified reservation that way, but it does mean that there's a limit
to the number of times the buffer can cycle before the counter
saturates. (You'd need to stream out gigabytes of data to hit the
limit though.)

Or in other words, you can have 2^32 = 4,294,967,296 (write chunks +
reverse offloads) per kernel launch.

...

PS: Currently, device stack variables are private and cannot be
accessed from the host; this will change in a separate patch. [...]

So, the patch, as is, is known to be non-functional? How can you have
tested it? For the addrs_sizes_kind data to be accessible the
asm("s8") has to be wrong.


I have tested the non-addrs_sizes_kind part only, which permits to run
reverse-offload functions just fine, but only if they do not use
firstprivate or map. — And I actually also tested with the
addrs_sizes_kind part but that unsurprisingly fails hard when trying to
copy the stack data.


I think the patch looks good, in principle. The use of the existing
ring-buffer is the right way to do it, IMO. Can we get the manually
allocated stacks patch in first and then follow up with these patches
when they actually work?


I stash this patch as: "OK – but ams still want to have a glance once
__builtin_gcn_kernarg_ptr is in".

I terms of having fewer *.diff files around, I of course would prefer to
just change one line in a follow-up commit instead of keeping a full
patch around, but holding off until __builtin_gcn_kernarg_ptr is ready +
the default has changed to non-private stack variables is also fine.

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


ifcvt: Fix bitpos calculation in bitfield lowering [PR107229]

2022-10-12 Thread Andre Vieira (lists) via Gcc-patches

Hi,

The bitposition calculation for the bitfield lowering in loop if 
conversion was not

taking DECL_FIELD_OFFSET into account, which meant that it would result in
wrong bitpositions for bitfields that did not end up having representations
starting at the beginning of the struct.

Bootstrappend and regression tested on aarch64-none-linux-gnu and 
x86_64-pc-linux-gnu.


gcc/ChangeLog:

    PR tree-optimization/107229
    * gcc/tree-if-conv.cc (get_bitfield_rep): Fix bitposition calculation.

gcc/testsuite/ChangeLog:

    * gcc.dg/vect/pr107229-1.c: New test.
    * gcc.dg/vect/pr107229-2.c: New test.
    * gcc.dg/vect/pr107229-3.c: New test.
diff --git a/gcc/testsuite/gcc.dg/vect/pr107229-1.c 
b/gcc/testsuite/gcc.dg/vect/pr107229-1.c
new file mode 100644
index 
..67b432383d057a630746aa00af50c25fcb527d8e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr107229-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* PR tree-optimization/107229.  */
+
+int a, c;
+struct {
+  long d;
+  int : 8;
+  int : 27;
+  int e : 21;
+} f;
+void g(int b) { a = a & 1; }
+int main() {
+  while (c)
+g(f.e);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr107229-2.c 
b/gcc/testsuite/gcc.dg/vect/pr107229-2.c
new file mode 100644
index 
..88bffb63d5e8b2d7bcdeae223f4ec6ea4f611bc9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr107229-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* PR tree-optimization/107229.  */
+
+int a, c;
+struct {
+  long f;
+  long g;
+  long d;
+  int : 8;
+  int : 27;
+  int e : 21;
+} f;
+void g(int b) { a = a & 1; }
+int main() {
+  while (c)
+g(f.e);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr107229-3.c 
b/gcc/testsuite/gcc.dg/vect/pr107229-3.c
new file mode 100644
index 
..4abd8c14531b40e9dbe9802a8f9a0eabba673c9f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr107229-3.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* PR tree-optimization/107229.  */
+
+int a, c;
+struct {
+  long f;
+  long g;
+  long d;
+  int : 8;
+  int : 32;
+  int : 2;
+  int e : 21;
+} f;
+void g(int b) { a = a & 1; }
+int main() {
+  while (c)
+g(f.e);
+  return 0;
+}
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 
e468a4659fa28a3a31c3390cf19bee65f4590b80..33160ddef80cbd75c2a927fb50bddd792bbf5dd4
 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -3298,10 +3298,20 @@ get_bitfield_rep (gassign *stmt, bool write, tree 
*bitpos,
 *struct_expr = TREE_OPERAND (comp_ref, 0);
 
   if (bitpos)
-*bitpos
-  = fold_build2 (MINUS_EXPR, bitsizetype,
-DECL_FIELD_BIT_OFFSET (field_decl),
-DECL_FIELD_BIT_OFFSET (rep_decl));
+{
+  tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
+DECL_FIELD_OFFSET (field_decl),
+build_int_cst (bitsizetype, 8));
+  bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
+   DECL_FIELD_BIT_OFFSET (field_decl));
+  tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
+ DECL_FIELD_OFFSET (rep_decl),
+ build_int_cst (bitsizetype, 8));
+  rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
+DECL_FIELD_BIT_OFFSET (rep_decl));
+
+  *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
+}
 
   return rep_decl;
 


vect: Don't pattern match BITFIELD_REF's of non-integrals [PR107226]

2022-10-12 Thread Andre Vieira (lists) via Gcc-patches

Hi,

The original patch supported matching the 
vect_recog_bitfield_ref_pattern for

BITFIELD_REF's where the first operand didn't have a INTEGRAL_TYPE_P type.
That means it would also match vectors, leading to regressions in 
targets that

supported vectorization of those.

Bootstrappend and regression tested on aarch64-none-linux-gnu and 
x86_64-pc-linux-gnu.


gcc/ChangeLog:

    PR tree-optimization/107226
    * tree-vect-patterns.cc (vect_recog_bitfield_ref_pattern): Reject
    BITFIELD_REF's with non integral typed first operands.
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 
0cc315d312667c05a27df4cdf435f0d0e6fd4a52..6afd57a50c4bcb5aec7ccca6e5dc069caa4a5a30
 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -1913,6 +1913,7 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, 
stmt_vec_info stmt_info,
 return NULL;
 
   if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
+  || !INTEGRAL_TYPE_P (TREE_TYPE (container))
   || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
 return NULL;
 
@@ -1921,25 +1922,7 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, 
stmt_vec_info stmt_info,
   tree ret = gimple_assign_lhs (first_stmt);
   tree ret_type = TREE_TYPE (ret);
   bool shift_first = true;
-  tree vectype;
-
-  /* If the first operand of the BIT_FIELD_REF is not an INTEGER type, convert
- it to one of the same width so we can perform the necessary masking and
- shifting.  */
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (container)))
-{
-  unsigned HOST_WIDE_INT container_size =
-   tree_to_uhwi (TYPE_SIZE (TREE_TYPE (container)));
-  tree int_type = build_nonstandard_integer_type (container_size, true);
-  pattern_stmt
-   = gimple_build_assign (vect_recog_temp_ssa_var (int_type),
-  VIEW_CONVERT_EXPR, container);
-  vectype = get_vectype_for_scalar_type (vinfo, int_type);
-  container = gimple_assign_lhs (pattern_stmt);
-  append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
-}
-  else
-vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (container));
+  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (container));
 
   /* We move the conversion earlier if the loaded type is smaller than the
  return type to enable the use of widening loads.  */


Re: [PATCH] improved const shifts for AVR targets

2022-10-12 Thread Jeff Law via Gcc-patches



On 10/4/22 11:06, Alexander Binzberger via Gcc-patches wrote:

Hi,
recently I used some arduino uno for a project and realized some areas
which do not output optimal asm code. Especially around shifts and function
calls.
With this as motivation and hacktoberfest I started patching things.
Since patch files do not provide a good overview and I hope for a
"hacktoberfest-accepted" label on the PR on github I also opened it there:
https://github.com/gcc-mirror/gcc/pull/73

This patch improves shifts with const right hand operand. While 8bit and
16bit shifts where mostly fine 24bit and 32bit where not handled well.

Testing
I checked output with a local installation of compiler explorer in asm and
a tiny unit test comparing shifts with mul/div by 2.
I however did not write any testcases in gcc for it.

Target
This patch is only targeting atmel avr family of chips.

Changelog
improved const shifts for AVR targets


It would be helpful if you could show the before/after code for the 
cases you're changing.  Extra credit if you include cycles & size 
information for those cases.  That would help someone like me who knows 
GCC well, but isn't particularly well versed in the AVR target evaluate 
the overarching goal of the patch (ie, better code).


Changes should include a ChangeLog which indicates what changed. If you 
look at git log you will see examples of what a ChangeLog should look like.


The is large enough that you need either a  copyright assignment or DCO 
certification.


See this page for details:

https://gcc.gnu.org/contribute.html




Patch
-
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 4ed390e4cf9..c7b70812d5c 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -6043,9 +6043,6 @@ out_shift_with_cnt (const char *templ, rtx_insn
*insn, rtx operands[],
op[2] = operands[2];
op[3] = operands[3];

-  if (plen)
-*plen = 0;
-


Doesn't this leave *plen uninitialized for the case where the shift 
count is held in memory or a register or is an out of range constant?  
Is this really safe?





if (CONST_INT_P (operands[2]))
  {
/* Operand 3 is a scratch register if this is a
@@ -6150,96 +6147,68 @@ out_shift_with_cnt (const char *templ, rtx_insn
*insn, rtx operands[],
  /* 8bit shift left ((char)x << i)   */

  const char *
-ashlqi3_out (rtx_insn *insn, rtx operands[], int *len)
+ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen)
  {
if (CONST_INT_P (operands[2]))
  {
-  int k;
-
-  if (!len)
- len = &k;
-


Isn't this wrong for the call to ashlqi3_out from avr.md?  In that call 
len/plen will be zero, which we then pass down.  So the question is why 
did you remove this code?



The patch as-is is relatively large and can easily be broken down into 
more manageable chunks.  I would suggest a patch for each mode.  ie, one 
which changes QImode shifts, another for HImode shifts, another for 
PSImode shifts. etc.  It may seem like more work, but by breaking it 
down reviewers can take action on each patch individually.  So for 
example its relatively easy to work through the QImode changes and those 
could go in fairly quick while the PSImode changes will require 
considerably more time to review.




switch (INTVAL (operands[2]))
   {
   default:
if (INTVAL (operands[2]) < 8)
  break;

-  *len = 1;
-  return "clr %0";
-
- case 1:
-  *len = 1;
-  return "lsl %0";
-
- case 2:
-  *len = 2;
-  return ("lsl %0" CR_TAB
-  "lsl %0");
-
- case 3:
-  *len = 3;
-  return ("lsl %0" CR_TAB
-  "lsl %0" CR_TAB
-  "lsl %0");
+return avr_asm_len ("clr %0", operands, plen, 1);


You've probably got a whitespace problem here.  I think the return 
should line up in the came column as the IF statement. Conceptually this 
seems reasonable as cases 1, 2 and 3 can be trivially handled by 
out_shift_with_cnt.  Tough routing more code through out_shift_with_cnt 
means the comment might need to change since we're routing more cases 
through it that were trivially handled in ashlqi3_out before.





   case 4:
if (test_hard_reg_class (LD_REGS, operands[0]))
  {
-  *len = 2;
-  return ("swap %0" CR_TAB
-  "andi %0,0xf0");
+return avr_asm_len ("swap %0" CR_TAB
+  "andi %0,0xf0", operands, plen, 2);
More indention problems here.  THe return should line up two spaces 
inside the open curly brace.  Otherwise this case seems reasonable since 
it's generating the same code as before.

  }
-  *len = 4;
-  return ("lsl %0" CR_TAB
+return avr_asm_len ("lsl %0" CR_TAB
"lsl %0" CR_TAB
"lsl %0" CR_TAB
-  "lsl %0");
+  "lsl %0", operands, plen, 4);


Gratuitous indentation changes.  Please don't do that unless you're 
fixing cases where the indentation is wrong according to GNU/project 
standards.





   case 5:
if (test_hard_reg_class (LD_REGS, operands[0]))
  {
-  *len = 3;
-  return ("swap %0" CR_TAB
+return avr_asm_len ("swap %0" CR_TAB
"lsl %0"  CR

Re: [PATCH] c++: Implement excess precision support for C++ [PR107097, PR323]

2022-10-12 Thread Jason Merrill via Gcc-patches

On 10/11/22 09:33, Jakub Jelinek wrote:

Hi!

The following patch implements excess precision support for C++.


Great!


Like for C, it uses EXCESS_PRECISION_EXPR tree to say that its operand
is evaluated in excess precision and what the semantic type of the
expression is.
In most places I've followed what the C FE does in similar spots, so
e.g. for binary ops if one or both operands are already
EXCESS_PRECISION_EXPR, strip those away or for operations that might need
excess precision (+, -, *, /) check if the operands should use excess
precision and convert to that type and at the end wrap into
EXCESS_PRECISION_EXPR with the common semantic type.
In general I've tried to follow the C99 handling, C11+ relies on the
C standard saying that in case of integral conversions excess precision
can be used (see PR87390 for more details), but I don't see anything similar
on the C++ standard side.


https://eel.is/c++draft/expr#pre-6 seems identical to C99 (apart from a 
stray "the"?); presumably nobody has proposed to copy the N1531 
clarifications.  But since those are clarifications, I'd prefer to use 
our C11+ semantics to avoid divergence between the default modes of the 
C and C++ front ends.



There are some cases which needed to be handled differently, the C FE can
just strip EXCESS_PRECISION_EXPR (replace it with its operand) when handling
explicit cast, but that IMHO isn't right for C++ - the discovery what exact
conversion should be used (e.g. if user conversion or standard or their
sequence) should be decided based on the semantic type (i.e. type of
EXCESS_PRECISION_EXPR), and that decision continues in convert_like* where
we pick the right user conversion, again, if say some class has ctor
from double and long double and we are on ia32 with standard excess
precision promoting float/double to long double, then we should pick the
ctor from double.  Or when some other class has ctor from just double,
and EXCESS_PRECISION_EXPR semantic type is float, we should choose the
user ctor from double, but actually just convert the long double excess
precision to double and not to float first.


That sounds right.


We need to make sure
even identity conversion converts from excess precision to the semantic one
though, but if identity is chained with other conversions, we don't want
the identity next_conversion to drop to semantic precision only to widen
afterwards.

The existing testcases tweaks were for cases on i686-linux where excess
precision breaks those tests, e.g. if we have
   double d = 4.2;
   if (d == 4.2)
then it does the expected thing only with -fexcess-precision=fast,
because with -fexcess-precision=standard it is actually
   double d = 4.2;
   if ((long double) d == 4.2L)
where 4.2L is different from 4.2.  I've added -fexcess-precision=fast
to some tests and changed other tests to use constants that are exactly
representable and don't suffer from these excess precision issues.

There is one exception, pr68180.C looks like a bug in the patch which is
also present in the C FE (so I'd like to get it resolved incrementally
in both).  Reduced testcase:
typedef float __attribute__((vector_size (16))) float32x4_t;
float32x4_t foo(float32x4_t x, float y) { return x + y; }
with -m32 -std=c11 -Wno-psabi or -m32 -std=c++17 -Wno-psabi
it is rejected with:
pr68180.c:2:52: error: conversion of scalar ‘long double’ to vector 
‘float32x4_t’ {aka ‘__vector(4) float’} involves truncation
but without excess precision (say just -std=c11 -Wno-psabi or -std=c++17 
-Wno-psabi)
it is accepted.  Perhaps we should pass down the semantic type to
scalar_to_vector and use the semantic type rather than excess precision type
in the diagnostics.


Makes sense.


Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-10-11  Jakub Jelinek  

PR middle-end/323
PR c++/107097
gcc/c-family/
* c-opts.cc (c_common_post_options): Handle flag_excess_precision
in C++ the same as in C.
* c-lex.cc (interpret_float): Set const_type to excess_precision ()
even for C++.
gcc/cp/
* parser.cc (cp_parser_primary_expression): Handle
EXCESS_PRECISION_EXPR with REAL_CST operand the same as REAL_CST.
* cvt.cc (cp_ep_convert_and_check): New function.
* call.cc (build_conditional_expr): Add excess precision support.
(convert_like_internal): Likewise.  Add NESTED_P argument, pass true
to recursive calls to convert_like.
(convert_like): Add NESTED_P argument, pass it through to
convert_like_internal.  For other overload pass false to it.
(convert_like_with_context): Pass false to NESTED_P.
(convert_arg_to_ellipsis): Add excess precision support.
(magic_varargs_p): For __builtin_is{finite,inf,inf_sign,nan,normal}
and __builtin_fpclassify return 2 instead of 1, document what it
means.
(build_over_call): Don't handle former magic 2 which is no longer
used, instead for magic 

Re: [PATCH] mips: Add appropriate linker flags when compiling with -static-pie

2022-10-12 Thread Jeff Law via Gcc-patches



On 9/25/22 09:49, linted via Gcc-patches wrote:

Hello,
I'm just checking to see if anyone has had a chance to look at this.

Thank you

On Wed, Sep 14, 2022 at 2:09 PM linted  wrote:


Hello,

This patch fixes missing flags when compiling with -static-pie on mips. I
made these modifications based on the previously submitted static pie patch
for arm as well as the working code for aarch64.

I tested with a host of mips-elf and checked with mips-sim. This patch was
also tested and used with uclibc-ng to generate static pie elfs.

This is my first patch for gcc, so please let me know if there is anything
I missed.



Signed-off-by: linted 
---
  gcc/config/mips/gnu-user.h | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/config/mips/gnu-user.h b/gcc/config/mips/gnu-user.h
index 6aad7192e69..b1c665b7f37 100644
--- a/gcc/config/mips/gnu-user.h
+++ b/gcc/config/mips/gnu-user.h
@@ -56,11 +56,12 @@ along with GCC; see the file COPYING3.  If not see
  #define GNU_USER_TARGET_LINK_SPEC "\
%{G*} %{EB} %{EL} %{mips*} %{shared} \
%{!shared: \
-%{!static: \
+%{!static:%{!static-pie: \
%{rdynamic:-export-dynamic} \
%{mabi=n32: -dynamic-linker " GNU_USER_DYNAMIC_LINKERN32 "} \
%{mabi=64: -dynamic-linker " GNU_USER_DYNAMIC_LINKER64 "} \
-  %{mabi=32: -dynamic-linker " GNU_USER_DYNAMIC_LINKER32 "}} \
+  %{mabi=32: -dynamic-linker " GNU_USER_DYNAMIC_LINKER32 "}}} \
+%{static-pie:-Bstatic -pie --no-dynamic-linker -z text} \
  %{static}} \


This is a bit out of my usual areas of expertise.  But what I find odd 
here is that for -static we essentially do nothing, but for -static-pie 
we need "-Bstatic -pie --no-dynamic-linker -z text".    Is the -Bstatic 
really needed for static-pie  And if it is, then wouldn't it be needed 
for -static as well?    If you look carefully at aarch64, you'll see it 
includes -Bstatic for -static.



Jeff



Re: [PATCH] c++: Implement excess precision support for C++ [PR107097, PR323]

2022-10-12 Thread Marek Polacek via Gcc-patches
On Tue, Oct 11, 2022 at 03:33:23PM +0200, Jakub Jelinek via Gcc-patches wrote:
> Hi!
> 
> The following patch implements excess precision support for C++.
> Like for C, it uses EXCESS_PRECISION_EXPR tree to say that its operand
> is evaluated in excess precision and what the semantic type of the
> expression is.

One trivial thing: c-common.def says "An EXCESS_PRECISION_EXPR, currently
only used for C and Objective C, ..." which will no longer be accurate with
the patch.

Marek



Re: [PATCH v2] c++: ICE with VEC_INIT_EXPR and defarg [PR106925]

2022-10-12 Thread Marek Polacek via Gcc-patches
On Wed, Oct 12, 2022 at 01:12:57PM -0400, Marek Polacek wrote:
> On Wed, Oct 12, 2022 at 12:47:21PM -0400, Jason Merrill wrote:
> > On 10/12/22 12:27, Marek Polacek wrote:
> > > On Tue, Oct 11, 2022 at 04:28:11PM -0400, Jason Merrill wrote:
> > > > On 10/11/22 16:00, Marek Polacek wrote:
> > > > > Since r12-8066, in cxx_eval_vec_init we perform expand_vec_init_expr
> > > > > while processing the default argument in this test.
> > > > 
> > > > Hmm, why are we calling cxx_eval_vec_init during parsing of the default
> > > > argument?  In particular, any expansion that depends on the enclosing
> > > > function context should be deferred until the default arg is used by a 
> > > > call.
> > > 
> > > I think this is part of the semantic constraints checking 
> > > [dcl.fct.default]/5
> > > talks about, as in, this doesn't compile even though the default argument 
> > > is
> > > not executed:
> > > 
> > > struct S {
> > >S() = delete;
> > > };
> > > void foo (S = S()) { }
> > > In the test below we parse '= MyVector<1>()' and end up calling mark_used
> > > on the implicit "constexpr MyVector<1>::MyVector() noexcept 
> > > ()"
> > > ctor.  mark_used calls maybe_instantiate_noexcept.  Since the ctor has
> > > a DEFERRED_NOEXCEPT, we have to figure out if the ctor should be noexcept
> > > or not using get_defaulted_eh_spec.  That means walking the members of
> > > MyVector.  Thus we reach
> > >/* Core 1351: If the field has an NSDMI that could throw, the
> > >   default constructor is noexcept(false).  */
> > 
> > Maybe we need a cp_unevaluated here?  The operand of noexcept should be
> > unevaluated.
> 
> That wouldn't help since get_nsdmi specifically does "cp_evaluated ev;",
> so...
>  
> > > and call get_nsdmi on 'data'.  There we digest its initializer which is 
> > > {}.
> > > massage_init_elt calls digest_init_r on the {} and produces
> > >TARGET_EXPR  > >  D.2518
> > >  {} 
> > > and the subsequent fold_non_dependent_init leads to cxx_eval_vec_init
> > > -> expand_vec_init_expr.
> > > 
> > > I think this is all correct except that the fold_non_dependent_init is
> > > somewhat questionable to me; do we really have to fold in order to say
> > > if the NSDMI init can throw?  Sure, we need to digest the {}, maybe
> > > the field's ctors can throw, but I don't know about the folding.
> > 
> > And we can check cp_unevaluated_operand to avoid the
> > fold_non_dependent_init?
> 
> ...we'd still fold.  I'm not sure if we want a LOOKUP_ flag that says
> "we're just checking if we can throw, don't fold".

Eh, a new flag is overkill.  Maybe don't do cp_evaluated in get_nsdmi if
we're called from walk_field_subobs would be worth a try?

Marek



[COMMITTED] Add range-op entry for floating point NEGATE_EXPR.

2022-10-12 Thread Aldy Hernandez via Gcc-patches
Handling negate is pretty easy, as all you have to do is flip the sign
bit, even for NANs.

gcc/ChangeLog:

* range-op-float.cc (class foperator_negate): New.
(floating_op_table::floating_op_table): Add NEGATE_EXPR
(range_op_float_tests): Add negate tests.
---
 gcc/range-op-float.cc | 62 +++
 1 file changed, 62 insertions(+)

diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc
index 22b7418c197..229b9d23351 100644
--- a/gcc/range-op-float.cc
+++ b/gcc/range-op-float.cc
@@ -1132,6 +1132,52 @@ foperator_ordered::op1_range (frange &r, tree type,
   return true;
 }
 
+class foperator_negate : public range_operator_float
+{
+  using range_operator_float::fold_range;
+  using range_operator_float::op1_range;
+public:
+  bool fold_range (frange &r, tree type,
+  const frange &op1, const frange &op2,
+  relation_kind = VREL_VARYING) const final override
+  {
+if (empty_range_varying (r, type, op1, op2))
+  return true;
+if (op1.known_isnan ())
+  {
+   bool sign;
+   if (op1.nan_signbit_p (sign))
+ r.set_nan (type, !sign);
+   else
+ r.set_nan (type);
+   return true;
+  }
+
+REAL_VALUE_TYPE lh_lb = op1.lower_bound ();
+REAL_VALUE_TYPE lh_ub = op1.upper_bound ();
+lh_lb = real_value_negate (&lh_lb);
+lh_ub = real_value_negate (&lh_ub);
+r.set (type, lh_ub, lh_lb);
+if (op1.maybe_isnan ())
+  {
+   bool sign;
+   if (op1.nan_signbit_p (sign))
+ r.update_nan (!sign);
+   else
+ r.update_nan ();
+  }
+else
+  r.clear_nan ();
+return true;
+  }
+  bool op1_range (frange &r, tree type,
+ const frange &lhs, const frange &op2,
+ relation_kind rel = VREL_VARYING) const final override
+  {
+return fold_range (r, type, lhs, op2, rel);
+  }
+} fop_negate;
+
 class foperator_abs : public range_operator_float
 {
   using range_operator_float::fold_range;
@@ -1593,6 +1639,7 @@ floating_op_table::floating_op_table ()
   set (UNORDERED_EXPR, fop_unordered);
 
   set (ABS_EXPR, fop_abs);
+  set (NEGATE_EXPR, fop_negate);
 }
 
 // Return a pointer to the range_operator_float instance, if there is
@@ -1633,6 +1680,21 @@ frange_float (const char *lb, const char *ub, tree type 
= float_type_node)
 void
 range_op_float_tests ()
 {
+  frange r, r0, r1;
+  frange trange (float_type_node);
+
+  // negate([-5, +10]) => [-10, 5]
+  r0 = frange_float ("-5", "10");
+  fop_negate.fold_range (r, float_type_node, r0, trange);
+  ASSERT_EQ (r, frange_float ("-10", "5"));
+
+  // negate([0, 1] -NAN) => [-1, -0] +NAN
+  r0 = frange_float ("0", "1");
+  r0.update_nan (true);
+  fop_negate.fold_range (r, float_type_node, r0, trange);
+  r1 = frange_float ("-1", "-0");
+  r1.update_nan (false);
+  ASSERT_EQ (r, r1);
 }
 
 } // namespace selftest
-- 
2.37.3



[PATCH] xtensa: Add workaround for pSRAM cache issue in ESP32

2022-10-12 Thread Alexey Lapshin via Gcc-patches
From a2b425031f5b06dd51cd3ca34fe4f3620b93a944 Mon Sep 17 00:00:00 2001
From: Jeroen Domburg 
Date: Sat, 12 Aug 2017 23:10:12 +0800
Subject: [PATCH] xtensa: Add workaround for pSRAM cache issue in ESP32

Xtensa does a load/store inversion when a load and a store to the same
address is found in the 5 affected stages of the pipeline: with a load
done
_after_ the store in code, the Xtensa will move it _before_ the store
in
execution.
Unfortunately, the ESP32 pSRAM cache messes up handling these
when an interrupt happens during these. This reorg step inserts NOPs
between loads and stores so this never occurs.

Workarounds:

  ESP32_PSRAM_FIX_NOPS:
   The handling issue also shows up when doing a store to an 8 or 16-
bit
   memory location followed by a larger (16 or 32-bit) sized load from
that
   location within the time it takes to grab a cache line from external
RAM
   (which is at least 80 cycles). The cache will confuse the load and
store,
   resulting in the bytes not set by the store to be read as garbage.
To fix
   this, we insert a memory barrier with NOP instructions after each
8/16-bit
   store that isn't followed by another store.

  ESP32_PSRAM_FIX_MEMW (default):
   Explicitly insert a memory barrier instead of nops.
   Slower than nops, but faster than just adding memws everywhere.

  ESP32_PSRAM_FIX_DUPLDST:
Explicitly insert a load after every store:
- Instruction is s32i:
Insert l32i from that address to the source register
immediately after,
plus a duplicated s32i after that.
- Instruction is s8i/s16i:
Note and insert a memw before a load.
(The same as ESP32_PSRAM_FIX_MEMW)
- If any of the args are volatile, no touch:
The memw resulting from that will fix everything.
---
 gcc/config.gcc  |   5 +
 gcc/config/xtensa/t-esp32-psram-fix |  22 ++
 gcc/config/xtensa/xtensa-opts.h |  34 +++
 gcc/config/xtensa/xtensa.cc | 444 
 gcc/config/xtensa/xtensa.h  |   1 +
 gcc/config/xtensa/xtensa.md |  46 ++-
 gcc/config/xtensa/xtensa.opt|  31 ++
 7 files changed, 580 insertions(+), 3 deletions(-)
 create mode 100644 gcc/config/xtensa/t-esp32-psram-fix
 create mode 100644 gcc/config/xtensa/xtensa-opts.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index e73cb848c2d..a407e8407f0 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -3457,6 +3457,11 @@ xstormy16-*-elf)
extra_options=stormy16/stormy16.opt
tmake_file="stormy16/t-stormy16"
;;
+xtensa*-esp32-elf*)
+   tm_file="${tm_file} elfos.h newlib-stdint.h xtensa/elf.h"
+   tmake_file="${tmake_file} xtensa/t-esp32-psram-fix"
+   extra_options="${extra_options} xtensa/elf.opt"
+   ;;
 xtensa*-*-elf*)
tm_file="${tm_file} elfos.h newlib-stdint.h xtensa/elf.h"
extra_options="${extra_options} xtensa/elf.opt"
diff --git a/gcc/config/xtensa/t-esp32-psram-fix b/gcc/config/xtensa/t-
esp32-psram-fix
new file mode 100644
index 000..78fe54d4852
--- /dev/null
+++ b/gcc/config/xtensa/t-esp32-psram-fix
@@ -0,0 +1,22 @@
+# Copyright (C) 2022 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# .
+
+$(out_object_file): gt-xtensa.h
+
+MULTILIB_OPTIONS = mfix-esp32-psram-cache-issue
+MULTILIB_DIRNAMES = esp32-psram
diff --git a/gcc/config/xtensa/xtensa-opts.h
b/gcc/config/xtensa/xtensa-opts.h
new file mode 100644
index 000..73c2015a016
--- /dev/null
+++ b/gcc/config/xtensa/xtensa-opts.h
@@ -0,0 +1,34 @@
+/* Definitions of option handling for Tensilica's Xtensa target
machine.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   Contributed by Espressif 
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+
+
+#ifndef XTENSA_OPTS_H
+#define XTENSA_OPTS_H
+
+enum

[wwwdocs] porting_to: Two-stage overload resolution for implicit move removed

2022-10-12 Thread Marek Polacek via Gcc-patches
As I promised in
,
I'd like to update our GCC 13 porting_to.html with the following note.

Does this look OK to commit?  Thanks,

diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
index 84a00f21..243ed29d 100644
--- a/htdocs/gcc-13/porting_to.html
+++ b/htdocs/gcc-13/porting_to.html
@@ -42,5 +42,57 @@ be included explicitly when compiled with GCC 13:
 
 
 
+Two-stage overload resolution for implicit move 
removed
+
+GCC 13 removed the two-stage overload resolution when performing
+implicit move, whereby the compiler does two separate overload resolutions:
+one treating the operand as an rvalue, and then (if that resolution fails)
+another one treating the operand as an lvalue.  In the standard this was
+introduced in C++11 and implemented in gcc in
+https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=4ce8c5dea53d80736b9c0ba6faa7430ed65ed365";>
+r251035.  In
+https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=1722e2013f05f1f1f99379dbaa0c0df356da731f";>
+r11-2412, the fallback overload resolution was disabled in C++20 (but
+not in C++17).  Then C++23 https://wg21.link/p2266";>P2266
+removed the fallback overload resolution, and changed the implicit move
+rules once again.
+
+
+The two overload resolutions approach was complicated and quirky, so users
+should transition to the newer model.  This change means that code that
+previously didn't compile in C++17 will now compile, for example:
+
+
+   struct S1 { S1(S1 &&); };
+   struct S2 : S1 {};
+
+   S1
+   f (S2 s)
+   {
+ return s; // OK, derived-to-base, use S1::S1(S1&&)
+   }
+
+
+
+And conversely, code that used to work in C++17 may not compile anymore:
+
+
+
+   struct W {
+ W();
+   };
+
+   struct F {
+ F(W&);
+ F(W&&) = delete;
+   };
+
+   F fn ()
+   {
+ W w;
+ return w; // use w as rvalue -> use of deleted function F::F(W&&)
+   }
+
+
 
 



[PATCH] Fortran: simplify array constructors with typespec [PR93483, PR107216, PR107219]

2022-10-12 Thread Harald Anlauf via Gcc-patches
Dear Fortranners,

this one was really bugging me for quite some time.  We failed to
properly handle (= simplify) expressions using array constructors
with typespec, and with parentheses and unary '+' and '-'
sprinkled here and there.  When there was no typespec, there was
no related problem.

The underlying issue apparently was that we should simplify
elements of the array constructor before attempting the type
conversion.

Thanks to Gerhard, who insisted by submitted many related PRs.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald

From ee65197f4d0b0050dc61687b5a77f1afe3bd4a27 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Wed, 12 Oct 2022 21:33:36 +0200
Subject: [PATCH] Fortran: simplify array constructors with typespec [PR93483,
 PR107216, PR107219]

gcc/fortran/ChangeLog:

	PR fortran/93483
	PR fortran/107216
	PR fortran/107219
	* array.cc (walk_array_constructor): If an element of an array
	constructor is an EXPR_OP, try simplification before type conversion.

gcc/testsuite/ChangeLog:

	PR fortran/93483
	PR fortran/107216
	PR fortran/107219
	* gfortran.dg/array_constructor_56.f90: New test.
---
 gcc/fortran/array.cc  |  4 
 .../gfortran.dg/array_constructor_56.f90  | 22 +++
 2 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/array_constructor_56.f90

diff --git a/gcc/fortran/array.cc b/gcc/fortran/array.cc
index bbdb5b392fc..9bec299f160 100644
--- a/gcc/fortran/array.cc
+++ b/gcc/fortran/array.cc
@@ -1205,6 +1205,10 @@ walk_array_constructor (gfc_typespec *ts, gfc_constructor_base head)
   for (c = gfc_constructor_first (head); c; c = gfc_constructor_next (c))
 {
   e = c->expr;
+
+  if (e->expr_type == EXPR_OP)
+	gfc_simplify_expr (e, 0);
+
   if (e->expr_type == EXPR_ARRAY && e->ts.type == BT_UNKNOWN
 	  && !e->ref && e->value.constructor)
 	{
diff --git a/gcc/testsuite/gfortran.dg/array_constructor_56.f90 b/gcc/testsuite/gfortran.dg/array_constructor_56.f90
new file mode 100644
index 000..4701fb36225
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/array_constructor_56.f90
@@ -0,0 +1,22 @@
+! { dg-do compile }
+!
+! Test the fix for the following:
+! PR fortran/93483
+! PR fortran/107216
+! PR fortran/107219
+!
+! Contributed by G.Steinmetz
+
+program p
+  real, parameter :: r0(*) = +[real :: +(1) ]
+  real, parameter :: r1(*) = +[real :: +[1] ]
+  real, parameter :: r2(*) = -[real :: [(1)]]
+  real, parameter :: r3(*) = +[real :: [-(1)]]
+  real, parameter :: r4(*) = -[real :: [[(1)]]]
+  real, parameter :: r5(*) = -[real :: -[1, 2]]
+  real, parameter :: r6(*) = +[real :: +[1, 2]]
+  real, parameter :: r7(*) =  [real :: 1, 2] * [real :: 1, (2)]
+  real, parameter :: r8(*) =  [real :: 1, (2)] * [real :: 1, 2]
+  real, parameter :: r9(*) = +[real :: 1, 2] * [real :: 1, (2)]
+  real, parameter :: rr(*) = -[real :: 1, (2)] * [real :: 1, 2]
+end
--
2.35.3



[PATCH] libstdc++: respect with-{headers, newlib} for default hosted value

2022-10-12 Thread Arsen Arsenović via Gcc-patches
This saves us a build flag when building for freestanding targets.

libstdc++-v3/ChangeLog:

* acinclude.m4: Default hosted to off if building without
headers and without newlib.
---
Tested for x86_64-elf.

 libstdc++-v3/acinclude.m4 | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 719eab15c77..8f4e901c909 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -2982,7 +2982,10 @@ AC_DEFUN([GLIBCXX_ENABLE_HOSTED], [
enable_hosted_libstdcxx=no
;;
*)
-   enable_hosted_libstdcxx=yes
+   case "${with_newlib}-${with_headers}" in
+   no-no) enable_hosted_libstdcxx=no ;;
+   *) enable_hosted_libstdcxx=yes ;;
+   esac
;;
  esac])
 
-- 
2.38.0



[PATCH] PR 107189 Remove useless _Alloc_node

2022-10-12 Thread François Dumont via Gcc-patches

    libstdc++: Remove _Alloc_node instance in _Rb_tree [PR107189]

    libstdc++-v3/ChangeLog:

    PR libstdc++/107189
    * include/bits/stl_tree.h 
(_Rb_tree<>::_M_insert_range_equal): Remove

    unused _Alloc_node instance.

Ok to commit ?

François
diff --git a/libstdc++-v3/include/bits/stl_tree.h b/libstdc++-v3/include/bits/stl_tree.h
index a4de6141765..33d25089a1d 100644
--- a/libstdc++-v3/include/bits/stl_tree.h
+++ b/libstdc++-v3/include/bits/stl_tree.h
@@ -1123,7 +1123,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	__enable_if_t::value>
 	_M_insert_range_equal(_InputIterator __first, _InputIterator __last)
 	{
-	  _Alloc_node __an(*this);
 	  for (; __first != __last; ++__first)
 	_M_emplace_equal(*__first);
 	}


Re: [wwwdocs] porting_to: Two-stage overload resolution for implicit move removed

2022-10-12 Thread Jonathan Wakely via Gcc-patches
On Wed, 12 Oct 2022 at 20:39, Marek Polacek  wrote:
>
> As I promised in
> ,
> I'd like to update our GCC 13 porting_to.html with the following note.
>
> Does this look OK to commit?  Thanks,
>
> diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
> index 84a00f21..243ed29d 100644
> --- a/htdocs/gcc-13/porting_to.html
> +++ b/htdocs/gcc-13/porting_to.html
> @@ -42,5 +42,57 @@ be included explicitly when compiled with GCC 13:
>  
>  
>
> +Two-stage overload resolution for implicit move 
> removed
> +
> +GCC 13 removed the two-stage overload resolution when performing
> +implicit move, whereby the compiler does two separate overload resolutions:
> +one treating the operand as an rvalue, and then (if that resolution fails)
> +another one treating the operand as an lvalue.  In the standard this was
> +introduced in C++11 and implemented in gcc in
> + href="https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=4ce8c5dea53d80736b9c0ba6faa7430ed65ed365";>
> +r251035.  In
> + href="https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=1722e2013f05f1f1f99379dbaa0c0df356da731f";>
> +r11-2412, the fallback overload resolution was disabled in C++20 (but
> +not in C++17).  Then C++23 https://wg21.link/p2266";>P2266
> +removed the fallback overload resolution, and changed the implicit move
> +rules once again.
> +
> +
> +The two overload resolutions approach was complicated and quirky, so users
> +should transition to the newer model.  This change means that code that
> +previously didn't compile in C++17 will now compile, for example:
> +
> +
> +   struct S1 { S1(S1 &&); };
> +   struct S2 : S1 {};
> +
> +   S1
> +   f (S2 s)
> +   {
> + return s; // OK, derived-to-base, use S1::S1(S1&&)
> +   }
> +
> +
> +
> +And conversely, code that used to work in C++17 may not compile anymore:
> +
> +
> +
> +   struct W {
> + W();
> +   };
> +
> +   struct F {
> + F(W&);
> + F(W&&) = delete;
> +   };
> +
> +   F fn ()
> +   {
> + W w;
> + return w; // use w as rvalue -> use of deleted function F::F(W&&)

Deleted move constructors are an abomination, and should never occur
in real code. I'm not sure using one even in an example like this
should be encouraged. The example added by P2266 to Annex D is more
realistic (and actually broke a libstdc++ test):

X& foo(X&& x) { return x; }



> +   }
> +
> +
>  
>  
>



Re: [PATCH] libstdc++: async: tolerate slightly shorter sleep

2022-10-12 Thread Alexandre Oliva via Gcc-patches
On Oct 12, 2022, Jonathan Wakely  wrote:

> On Wed, 12 Oct 2022 at 12:41, Jonathan Wakely wrote:
>> 
>> On Thu, 23 Jun 2022 at 12:38, Alexandre Oliva via Libstdc++
>>  wrote:
>> >
>> > On Jun 22, 2022, Alexandre Oliva  wrote:
>> >
>> > > Regstrapped on x86_64-linux-gnu, also tested with a cross to
>> > > aarch64-rtems6.  Ok to install?
>> >
>> > The early wakeups are fixed for rtems6.1, so the same question raised at
>> > https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597102.html apply to
>> > this one:
>> 
>> Looks like I never reviewed this one, sorry.
>> 
>> The patch to xfail this test for rtems is OK.

> It's also fine if you just want to drop this patch for the same reason
> as https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597105.html

Yeah, nanosleep is fixed, no need for this one, thanks, withdrawn.

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


Re: [wwwdocs] porting_to: Two-stage overload resolution for implicit move removed

2022-10-12 Thread Marek Polacek via Gcc-patches
On Wed, Oct 12, 2022 at 09:50:36PM +0100, Jonathan Wakely wrote:
> On Wed, 12 Oct 2022 at 20:39, Marek Polacek  wrote:
> >
> > As I promised in
> > ,
> > I'd like to update our GCC 13 porting_to.html with the following note.
> >
> > Does this look OK to commit?  Thanks,
> >
> > diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
> > index 84a00f21..243ed29d 100644
> > --- a/htdocs/gcc-13/porting_to.html
> > +++ b/htdocs/gcc-13/porting_to.html
> > @@ -42,5 +42,57 @@ be included explicitly when compiled with GCC 13:
> >  
> >  
> >
> > +Two-stage overload resolution for implicit move 
> > removed
> > +
> > +GCC 13 removed the two-stage overload resolution when performing
> > +implicit move, whereby the compiler does two separate overload resolutions:
> > +one treating the operand as an rvalue, and then (if that resolution fails)
> > +another one treating the operand as an lvalue.  In the standard this was
> > +introduced in C++11 and implemented in gcc in
> > + > href="https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=4ce8c5dea53d80736b9c0ba6faa7430ed65ed365";>
> > +r251035.  In
> > + > href="https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=1722e2013f05f1f1f99379dbaa0c0df356da731f";>
> > +r11-2412, the fallback overload resolution was disabled in C++20 (but
> > +not in C++17).  Then C++23 https://wg21.link/p2266";>P2266
> > +removed the fallback overload resolution, and changed the implicit move
> > +rules once again.
> > +
> > +
> > +The two overload resolutions approach was complicated and quirky, so users
> > +should transition to the newer model.  This change means that code that
> > +previously didn't compile in C++17 will now compile, for example:
> > +
> > +
> > +   struct S1 { S1(S1 &&); };
> > +   struct S2 : S1 {};
> > +
> > +   S1
> > +   f (S2 s)
> > +   {
> > + return s; // OK, derived-to-base, use S1::S1(S1&&)
> > +   }
> > +
> > +
> > +
> > +And conversely, code that used to work in C++17 may not compile anymore:
> > +
> > +
> > +
> > +   struct W {
> > + W();
> > +   };
> > +
> > +   struct F {
> > + F(W&);
> > + F(W&&) = delete;
> > +   };
> > +
> > +   F fn ()
> > +   {
> > + W w;
> > + return w; // use w as rvalue -> use of deleted function F::F(W&&)
> 
> Deleted move constructors are an abomination, and should never occur
> in real code. I'm not sure using one even in an example like this
> should be encouraged. The example added by P2266 to Annex D is more
> realistic (and actually broke a libstdc++ test):
> 
> X& foo(X&& x) { return x; }

Right, but this code still compiles in C++17, it only fails to compile
in C++23.  The previous example now doesn't compile even in C++17.  So
how about this improved patch which makes it clear that code with
deleted move constructors should never occur in practice, and adds a new
note, specifically about P2266 and the code you showed?

Thanks for taking a look,

diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
index 84a00f21..a9991e8b 100644
--- a/htdocs/gcc-13/porting_to.html
+++ b/htdocs/gcc-13/porting_to.html
@@ -42,5 +42,71 @@ be included explicitly when compiled with GCC 13:
 
 
 
+Implicit move rules change
+
+GCC 13 implements C++23 https://wg21.link/p2266";>P2266 which
+simplified the rules for implicit move.  As a consequence, valid C++20
+code that relies on a returned id-expression's being an lvalue
+may change behavior or fail to compile in C++23.  For example:
+
+
+   decltype(auto) f(int&& x) { return (x); }  // returns int&&; previously 
returned int&
+   int& g(int&& x) { return x; }  // ill-formed; previously well-formed
+
+
+Two-stage overload resolution for implicit move 
removed
+GCC 13 removed the two-stage overload resolution when performing
+implicit move, whereby the compiler does two separate overload resolutions:
+one treating the operand as an rvalue, and then (if that resolution fails)
+another one treating the operand as an lvalue.  In the standard this was
+introduced in C++11 and implemented in gcc in
+https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=4ce8c5dea53d80736b9c0ba6faa7430ed65ed365";>
+r251035.  In
+https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=1722e2013f05f1f1f99379dbaa0c0df356da731f";>
+r11-2412, the fallback overload resolution was disabled in C++20 (but
+not in C++17).  Then C++23 https://wg21.link/p2266";>P2266
+removed the fallback overload resolution, and changed the implicit move
+rules once again.
+
+The two overload resolutions approach was complicated and quirky, so users
+should transition to the newer model.  This change means that code that
+previously didn't compile in C++17 will now compile, for example:
+
+
+   struct S1 { S1(S1 &&); };
+   struct S2 : S1 {};
+
+   S1
+   f (S2 s)
+   {
+ return s; // OK, derived-to-base, use S1::S1(S1&&)
+   }
+
+
+Conversely, code that used to work in C++17 may not compile anymore.
+For example, the following exa

Ping^2: [PATCH] libcpp: Improve location for macro names [PR66290]

2022-10-12 Thread Lewis Hyatt via Gcc-patches
Hello-

https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599397.html

Since Jeff was kind enough to ack one of my other preprocessor patches
today, I have become emboldened to ping this one again too :). Would
anyone have some time to take a look at it please? Thanks!

-Lewis

On Thu, Sep 15, 2022 at 6:31 PM Lewis Hyatt  wrote:
>
> Hello-
>
> https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599397.html
> May I please ping this patch? Thank you.
>
> -Lewis
>
> On Fri, Aug 5, 2022 at 12:14 PM Lewis Hyatt  wrote:
> >
> >
> > When libcpp reports diagnostics whose locus is a macro name (such as for
> > -Wunused-macros), it uses the location in the cpp_macro object that was
> > stored by _cpp_new_macro. This is currently set to pfile->directive_line,
> > which contains the line number only and no column information. This patch
> > changes the stored location to the src_loc for the token defining the macro
> > name, which includes the location and range information.
> >
> > libcpp/ChangeLog:
> >
> > PR c++/66290
> > * macro.cc (_cpp_create_definition): Add location argument.
> > * internal.h (_cpp_create_definition): Adjust prototype.
> > * directives.cc (do_define): Pass new location argument to
> > _cpp_create_definition.
> > (do_undef): Stop passing inferior location to cpp_warning_with_line;
> > the default from cpp_warning is better.
> > (cpp_pop_definition): Pass new location argument to
> > _cpp_create_definition.
> > * pch.cc (cpp_read_state): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR c++/66290
> > * c-c++-common/cpp/macro-ranges.c: New test.
> > * c-c++-common/cpp/line-2.c: Adapt to check for column information
> > on macro-related libcpp warnings.
> > * c-c++-common/cpp/line-3.c: Likewise.
> > * c-c++-common/cpp/macro-arg-count-1.c: Likewise.
> > * c-c++-common/cpp/pr58844-1.c: Likewise.
> > * c-c++-common/cpp/pr58844-2.c: Likewise.
> > * c-c++-common/cpp/warning-zero-location.c: Likewise.
> > * c-c++-common/pragma-diag-14.c: Likewise.
> > * c-c++-common/pragma-diag-15.c: Likewise.
> > * g++.dg/modules/macro-2_d.C: Likewise.
> > * g++.dg/modules/macro-4_d.C: Likewise.
> > * g++.dg/modules/macro-4_e.C: Likewise.
> > * g++.dg/spellcheck-macro-ordering.C: Likewise.
> > * gcc.dg/builtin-redefine.c: Likewise.
> > * gcc.dg/cpp/Wunused.c: Likewise.
> > * gcc.dg/cpp/redef2.c: Likewise.
> > * gcc.dg/cpp/redef3.c: Likewise.
> > * gcc.dg/cpp/redef4.c: Likewise.
> > * gcc.dg/cpp/ucnid-11-utf8.c: Likewise.
> > * gcc.dg/cpp/ucnid-11.c: Likewise.
> > * gcc.dg/cpp/undef2.c: Likewise.
> > * gcc.dg/cpp/warn-redefined-2.c: Likewise.
> > * gcc.dg/cpp/warn-redefined.c: Likewise.
> > * gcc.dg/cpp/warn-unused-macros-2.c: Likewise.
> > * gcc.dg/cpp/warn-unused-macros.c: Likewise.
> > ---
> >
> > Notes:
> > Hello-
> >
> > The PR (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66290) was 
> > originally
> > about the entirely wrong location for -Wunused-macros in C++ mode, which
> > behavior was fixed by r13-1903, but before closing it out I wanted to 
> > also
> > address a second point brought up in the PR comments, namely that we do 
> > not
> > include column information when emitting diagnostics for macro names, 
> > such as
> > is done for -Wunused-macros. The attached patch updates the location 
> > stored in
> > the cpp_macro object so that it includes the column and range 
> > information for
> > the token comprising the macro name; previously, the location was just 
> > the
> > generic one pointing to the whole line.
> >
> > The change to libcpp is very small, the reason for all the testsuite 
> > changes is
> > that I have updated all tests explicitly looking for the columnless 
> > diagnostics
> > (with the "-:" syntax to dg-warning et al) so that they expect a column
> > instead. I also added a new test which verifies the expected range 
> > information
> > in diagnostics with carets.
> >
> > Bootstrap + regtest on x86-64 Linux looks good. Please let me know if 
> > it looks
> > OK? Thanks!
> >
> > -Lewis
> >
> >  libcpp/directives.cc  |  13 +-
> >  libcpp/internal.h |   2 +-
> >  libcpp/macro.cc   |  12 +-
> >  libcpp/pch.cc |   2 +-
> >  gcc/testsuite/c-c++-common/cpp/line-2.c   |   2 +-
> >  gcc/testsuite/c-c++-common/cpp/line-3.c   |   2 +-
> >  .../c-c++-common/cpp/macro-arg-count-1.c  |   4 +-
> >  gcc/testsuite/c-c++-common/cpp/macro-ranges.c |  52 ++
> >  gcc/testsuite/c-c++-common/cpp/pr58844-1.c|   4 +-
> >  gcc/testsuite/c-c++-common/cpp/pr58844-2.c|   4 +-
> >  .../c-c++-common/cpp/warning-zero

Re: [wwwdocs] porting_to: Two-stage overload resolution for implicit move removed

2022-10-12 Thread Jonathan Wakely via Gcc-patches
On Wed, 12 Oct 2022 at 23:24, Marek Polacek  wrote:
>
> On Wed, Oct 12, 2022 at 09:50:36PM +0100, Jonathan Wakely wrote:
> > On Wed, 12 Oct 2022 at 20:39, Marek Polacek  wrote:
> > >
> > > As I promised in
> > > ,
> > > I'd like to update our GCC 13 porting_to.html with the following note.
> > >
> > > Does this look OK to commit?  Thanks,
> > >
> > > diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
> > > index 84a00f21..243ed29d 100644
> > > --- a/htdocs/gcc-13/porting_to.html
> > > +++ b/htdocs/gcc-13/porting_to.html
> > > @@ -42,5 +42,57 @@ be included explicitly when compiled with GCC 13:
> > >  
> > >  
> > >
> > > +Two-stage overload resolution for implicit move 
> > > removed
> > > +
> > > +GCC 13 removed the two-stage overload resolution when performing
> > > +implicit move, whereby the compiler does two separate overload 
> > > resolutions:
> > > +one treating the operand as an rvalue, and then (if that resolution 
> > > fails)
> > > +another one treating the operand as an lvalue.  In the standard this was
> > > +introduced in C++11 and implemented in gcc in
> > > + > > href="https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=4ce8c5dea53d80736b9c0ba6faa7430ed65ed365";>
> > > +r251035.  In
> > > + > > href="https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=1722e2013f05f1f1f99379dbaa0c0df356da731f";>
> > > +r11-2412, the fallback overload resolution was disabled in C++20 (but
> > > +not in C++17).  Then C++23 https://wg21.link/p2266";>P2266
> > > +removed the fallback overload resolution, and changed the implicit move
> > > +rules once again.
> > > +
> > > +
> > > +The two overload resolutions approach was complicated and quirky, so 
> > > users
> > > +should transition to the newer model.  This change means that code that
> > > +previously didn't compile in C++17 will now compile, for example:
> > > +
> > > +
> > > +   struct S1 { S1(S1 &&); };
> > > +   struct S2 : S1 {};
> > > +
> > > +   S1
> > > +   f (S2 s)
> > > +   {
> > > + return s; // OK, derived-to-base, use S1::S1(S1&&)
> > > +   }
> > > +
> > > +
> > > +
> > > +And conversely, code that used to work in C++17 may not compile anymore:
> > > +
> > > +
> > > +
> > > +   struct W {
> > > + W();
> > > +   };
> > > +
> > > +   struct F {
> > > + F(W&);
> > > + F(W&&) = delete;
> > > +   };
> > > +
> > > +   F fn ()
> > > +   {
> > > + W w;
> > > + return w; // use w as rvalue -> use of deleted function F::F(W&&)
> >
> > Deleted move constructors are an abomination, and should never occur
> > in real code. I'm not sure using one even in an example like this
> > should be encouraged. The example added by P2266 to Annex D is more
> > realistic (and actually broke a libstdc++ test):
> >
> > X& foo(X&& x) { return x; }
>
> Right, but this code still compiles in C++17, it only fails to compile
> in C++23.  The previous example now doesn't compile even in C++17.  So
> how about this improved patch which makes it clear that code with
> deleted move constructors should never occur in practice, and adds a new
> note, specifically about P2266 and the code you showed?

Doh, I've just realised that F(W&&) isn't a move ctor at all. For some
reason I read the example as F(F&&).

I think your original example is fine, and the note would just be
confusing (because it's not a deleted move ctor!)


>
> Thanks for taking a look,
>
> diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
> index 84a00f21..a9991e8b 100644
> --- a/htdocs/gcc-13/porting_to.html
> +++ b/htdocs/gcc-13/porting_to.html
> @@ -42,5 +42,71 @@ be included explicitly when compiled with GCC 13:
>  
>  
>
> +Implicit move rules change
> +
> +GCC 13 implements C++23 https://wg21.link/p2266";>P2266 which
> +simplified the rules for implicit move.  As a consequence, valid C++20
> +code that relies on a returned id-expression's being an lvalue
> +may change behavior or fail to compile in C++23.  For example:
> +
> +
> +   decltype(auto) f(int&& x) { return (x); }  // returns int&&; previously 
> returned int&
> +   int& g(int&& x) { return x; }  // ill-formed; previously well-formed
> +
> +
> +Two-stage overload resolution for implicit move 
> removed
> +GCC 13 removed the two-stage overload resolution when performing
> +implicit move, whereby the compiler does two separate overload resolutions:
> +one treating the operand as an rvalue, and then (if that resolution fails)
> +another one treating the operand as an lvalue.  In the standard this was
> +introduced in C++11 and implemented in gcc in
> + href="https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=4ce8c5dea53d80736b9c0ba6faa7430ed65ed365";>
> +r251035.  In
> + href="https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=1722e2013f05f1f1f99379dbaa0c0df356da731f";>
> +r11-2412, the fallback overload resolution was disabled in C++20 (but
> +not in C++17).  Then C++23 https://wg21.link/p2266";>P2266
> +removed the fa

Re: [wwwdocs] porting_to: Two-stage overload resolution for implicit move removed

2022-10-12 Thread Marek Polacek via Gcc-patches
On Wed, Oct 12, 2022 at 11:38:01PM +0100, Jonathan Wakely wrote:
> On Wed, 12 Oct 2022 at 23:24, Marek Polacek  wrote:
> >
> > On Wed, Oct 12, 2022 at 09:50:36PM +0100, Jonathan Wakely wrote:
> > > On Wed, 12 Oct 2022 at 20:39, Marek Polacek  wrote:
> > > >
> > > > As I promised in
> > > > ,
> > > > I'd like to update our GCC 13 porting_to.html with the following note.
> > > >
> > > > Does this look OK to commit?  Thanks,
> > > >
> > > > diff --git a/htdocs/gcc-13/porting_to.html 
> > > > b/htdocs/gcc-13/porting_to.html
> > > > index 84a00f21..243ed29d 100644
> > > > --- a/htdocs/gcc-13/porting_to.html
> > > > +++ b/htdocs/gcc-13/porting_to.html
> > > > @@ -42,5 +42,57 @@ be included explicitly when compiled with GCC 13:
> > > >  
> > > >  
> > > >
> > > > +Two-stage overload resolution for implicit move 
> > > > removed
> > > > +
> > > > +GCC 13 removed the two-stage overload resolution when performing
> > > > +implicit move, whereby the compiler does two separate overload 
> > > > resolutions:
> > > > +one treating the operand as an rvalue, and then (if that resolution 
> > > > fails)
> > > > +another one treating the operand as an lvalue.  In the standard this 
> > > > was
> > > > +introduced in C++11 and implemented in gcc in
> > > > + > > > href="https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=4ce8c5dea53d80736b9c0ba6faa7430ed65ed365";>
> > > > +r251035.  In
> > > > + > > > href="https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=1722e2013f05f1f1f99379dbaa0c0df356da731f";>
> > > > +r11-2412, the fallback overload resolution was disabled in C++20 
> > > > (but
> > > > +not in C++17).  Then C++23 https://wg21.link/p2266";>P2266
> > > > +removed the fallback overload resolution, and changed the implicit move
> > > > +rules once again.
> > > > +
> > > > +
> > > > +The two overload resolutions approach was complicated and quirky, so 
> > > > users
> > > > +should transition to the newer model.  This change means that code that
> > > > +previously didn't compile in C++17 will now compile, for example:
> > > > +
> > > > +
> > > > +   struct S1 { S1(S1 &&); };
> > > > +   struct S2 : S1 {};
> > > > +
> > > > +   S1
> > > > +   f (S2 s)
> > > > +   {
> > > > + return s; // OK, derived-to-base, use S1::S1(S1&&)
> > > > +   }
> > > > +
> > > > +
> > > > +
> > > > +And conversely, code that used to work in C++17 may not compile 
> > > > anymore:
> > > > +
> > > > +
> > > > +
> > > > +   struct W {
> > > > + W();
> > > > +   };
> > > > +
> > > > +   struct F {
> > > > + F(W&);
> > > > + F(W&&) = delete;
> > > > +   };
> > > > +
> > > > +   F fn ()
> > > > +   {
> > > > + W w;
> > > > + return w; // use w as rvalue -> use of deleted function F::F(W&&)
> > >
> > > Deleted move constructors are an abomination, and should never occur
> > > in real code. I'm not sure using one even in an example like this
> > > should be encouraged. The example added by P2266 to Annex D is more
> > > realistic (and actually broke a libstdc++ test):
> > >
> > > X& foo(X&& x) { return x; }
> >
> > Right, but this code still compiles in C++17, it only fails to compile
> > in C++23.  The previous example now doesn't compile even in C++17.  So
> > how about this improved patch which makes it clear that code with
> > deleted move constructors should never occur in practice, and adds a new
> > note, specifically about P2266 and the code you showed?
> 
> Doh, I've just realised that F(W&&) isn't a move ctor at all. For some
> reason I read the example as F(F&&).

And so did I while adding the note :[.

> I think your original example is fine, and the note would just be
> confusing (because it's not a deleted move ctor!)

I think I'll go ahead with this, then (I've removed the NB).  Thanks!

diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
index 84a00f21..ccd3f08f 100644
--- a/htdocs/gcc-13/porting_to.html
+++ b/htdocs/gcc-13/porting_to.html
@@ -42,5 +42,69 @@ be included explicitly when compiled with GCC 13:
 
 
 
+Implicit move rules change
+
+GCC 13 implements C++23 https://wg21.link/p2266";>P2266 which
+simplified the rules for implicit move.  As a consequence, valid C++20
+code that relies on a returned id-expression's being an lvalue
+may change behavior or fail to compile in C++23.  For example:
+
+
+   decltype(auto) f(int&& x) { return (x); }  // returns int&&; previously 
returned int&
+   int& g(int&& x) { return x; }  // ill-formed; previously well-formed
+
+
+Two-stage overload resolution for implicit move 
removed
+GCC 13 removed the two-stage overload resolution when performing
+implicit move, whereby the compiler does two separate overload resolutions:
+one treating the operand as an rvalue, and then (if that resolution fails)
+another one treating the operand as an lvalue.  In the standard this was
+introduced in C++11 and implemented in gcc in
+https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=4ce8c5dea53d8073

Re: [PATCH v2] c++: parser - Support for target address spaces in C++

2022-10-12 Thread Paul Iannetta via Gcc-patches
On Tue, Oct 11, 2022 at 09:49:43PM -0400, Jason Merrill wrote:
> 
> It surprises that this is the only place we complain about an object with an
> address-space qualifier.  Shouldn't we also complain about e.g. automatic
> variables/parameters or non-static data members with address-space qualified
> type?
> 

Indeed, I was missing quite a few things here.  Thanks.
I used the draft as basis this time and imported from the C
implementation the relevant parts.  This time, the errors get properly
emitted when an address space is unduly specified; and comparisons,
assignments and comparisons are taken care of.

There are quite a few things I would like to clarify concerning some
implementation details.
  - A variable with automatic storage (which is neither a pointer nor
a reference) cannot be qualified with an address space.  I detect
this by the combination of `sc_none' and `! toplevel_bindings_p ()',
but I've also seen the use of `at_function_scope' at other places.
And I'm unsure which one is appropriate here.
This detection happens at the very end of grokdeclarator because I
need to know that the type is a pointer, which is not know until
very late in the function.
  - I'm having some trouble deciding whether I include those three
stub programs as tests, they all compile fine and clang accepts
them as well.

Ex1:
```
int __seg_fs * fs1;
int __seg_gs * gs1;

template struct strip;
template struct strip<__seg_fs T *> { typedef T type; };
template struct strip<__seg_gs T *> { typedef T type; };

int
main ()
{
*(strip::type *) fs1 == *(strip::type *) gs1;
return 0;
}
```

Ex2:
```
int __seg_fs * fs1;
int __seg_fs * fs2;

template auto f (T __seg_fs * a, U __seg_gs * b) { 
return a; }
template auto f (T __seg_gs * a, U __seg_fs * b) { 
return a; }

int
main ()
{
f (fs1, gs1);
f (gs1, fs1);
return 0;
}
```

Ex3:
```
int __seg_fs * fs1;
int __seg_gs * gs1;

template
auto f (T __seg_fs * a, U __seg_gs * b)
{
return *(T *) a == *(U *) b;
}

int
main ()
{
return f (fs1, gs1);
}
```


Add support for custom address spaces in C++

gcc/
* tree.h (ENCODE_QUAL_ADDR_SPACE): Missing parentheses.

gcc/c/
* c-decl.cc: Remove c_register_addr_space.

gcc/c-family/
* c-common.cc (c_register_addr_space): Imported from c-decl.cc
(addr_space_superset): Imported from gcc/c/c-typecheck.cc
* c-common.h: Remove the FIXME.
(addr_space_superset): New declaration.

gcc/cp/
* cp-tree.h (enum cp_decl_spec): Add addr_space support.
(struct cp_decl_specifier_seq): Likewise.
* decl.cc (get_type_quals): Likewise.
(check_tag_decl): Likewise.
(grokdeclarator): Likewise.
* parser.cc (cp_parser_type_specifier): Likewise.
(cp_parser_cv_qualifier_seq_opt): Likewise.
(cp_parser_postfix_expression): Likewise.
(cp_parser_type_specifier): Likewise.
(set_and_check_decl_spec_loc): Likewise.
* typeck.cc (composite_pointer_type): Likewise
(comp_ptr_ttypes_real): Likewise.
(same_type_ignoring_top_level_qualifiers_p): Likewise.
* pt.cc (check_cv_quals_for_unify): Likewise.
(unify): Likewise.
* tree.cc: Remove c_register_addr_space stub.
* mangle.cc (write_CV_qualifiers_for_type): Mangle address spaces
  using the extended qualifier notation.

gcc/doc
* extend.texi (Named Address Spaces): add a mention about C++
  support.

gcc/testsuite/
* g++.dg/abi/mangle-addr-space1.C: New test.
* g++.dg/abi/mangle-addr-space2.C: New test.
* g++.dg/parse/addr-space.C: New test.
* g++.dg/parse/addr-space1.C: New test.
* g++.dg/parse/addr-space2.C: New test.
* g++.dg/parse/template/spec-addr-space.C: New test.
* g++.dg/ext/addr-space-decl.C: New test.
* g++.dg/ext/addr-space-ref.C: New test.
* g++.dg/ext/addr-space-ops.C: New test.

# Please enter the commit message for your changes. Lines starting
# with '#' will be ignored, and an empty message aborts the commit.
#
# Date:  Sun Oct 9 16:02:22 2022 +0200
#
# On branch releases/gcc-12
# Your branch is ahead of 'origin/releases/gcc-12' by 2 commits.
#   (use "git push" to publish your local commits)
#
# Changes to be committed:
#   modified:   gcc/c-family/c-common.cc
#   modified:   gcc/c-family/c-common.h
#   modified:   gcc/c/c-decl.cc
#   modified:   gcc/c/c-typeck.cc
#   modified:   gcc/cp/cp-tree.h
#   modified:   gcc/cp/decl.cc
#   modified:   gcc/cp/mangle.cc
#   modified:   gcc/cp/parser.cc
#   modified:   gcc/cp/pt.cc
#   modified:   gcc/cp/tree.cc
#   modified:   gcc/cp/typeck.cc
#   modified:   gcc/doc/extend.texi
#   new file:   gcc/testsuite/g++.dg/abi/mangle-addr-space1.C
#   new file:   gcc/testsuite/g++.dg/abi/mangle-addr-space2.C
#   new file:   gcc/testsuite/g++.dg/ext/addr-space-decl.C
#   new file:  

[committed] c: Do not use *_IS_IEC_60559 == 2

2022-10-12 Thread Joseph Myers
A late change for C2x (addressing comments from the second round of
editorial review before the CD ballot, postdating the most recent
public working draft) removed the value 2 for *_IS_IEC_60559 (a new
 macro added in C2x).  Adjust the implementation accordingly
not to use this value.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

gcc/
* ginclude/float.h (FLT_IS_IEC_60559, DBL_IS_IEC_60559)
(LDBL_IS_IEC_60559): Update comment.

gcc/c-family/
* c-cppbuiltin.cc (builtin_define_float_constants): Do not use
value 2 for *_IS_IEC_60559.

gcc/testsuite/
* gcc.dg/c2x-float-10.c: Do not expect value 2 for *_IS_IEC_60559.

diff --git a/gcc/c-family/c-cppbuiltin.cc b/gcc/c-family/c-cppbuiltin.cc
index 4b8486c8879..2e39acb9239 100644
--- a/gcc/c-family/c-cppbuiltin.cc
+++ b/gcc/c-family/c-cppbuiltin.cc
@@ -319,14 +319,10 @@ builtin_define_float_constants (const char *name_prefix,
 }
 
   /* For C2x *_IS_IEC_60559.  0 means the type does not match an IEC
- 60559 format, 1 that it matches a format but not operations and 2
- that it matches a format and operations (but may not conform to
- Annex F; we take this as meaning exceptions and rounding modes
- need not be supported).  */
+ 60559 format, 1 that it matches a format but not necessarily
+ operations.  */
   sprintf (name, "__%s_IS_IEC_60559__", name_prefix);
-  builtin_define_with_int_value (name,
-(fmt->ieee_bits == 0
- ? 0 : (fmt->round_towards_zero ? 1 : 2)));
+  builtin_define_with_int_value (name, fmt->ieee_bits != 0);
 }
 
 /* Define __DECx__ constants for TYPE using NAME_PREFIX and SUFFIX. */
diff --git a/gcc/ginclude/float.h b/gcc/ginclude/float.h
index afe4a712878..bc5439d664f 100644
--- a/gcc/ginclude/float.h
+++ b/gcc/ginclude/float.h
@@ -248,8 +248,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 #define DBL_NORM_MAX   __DBL_NORM_MAX__
 #define LDBL_NORM_MAX  __LDBL_NORM_MAX__
 
-/* Whether each type matches an IEC 60559 format (1 for format, 2 for
-   format and operations).  */
+/* Whether each type matches an IEC 60559 format.  */
 #undef FLT_IS_IEC_60559
 #undef DBL_IS_IEC_60559
 #undef LDBL_IS_IEC_60559
diff --git a/gcc/testsuite/gcc.dg/c2x-float-10.c 
b/gcc/testsuite/gcc.dg/c2x-float-10.c
index 7b53a6ab050..7206921db87 100644
--- a/gcc/testsuite/gcc.dg/c2x-float-10.c
+++ b/gcc/testsuite/gcc.dg/c2x-float-10.c
@@ -21,13 +21,13 @@ _Static_assert (FLT_IS_IEC_60559 == 0);
 _Static_assert (DBL_IS_IEC_60559 == 0);
 _Static_assert (LDBL_IS_IEC_60559 == 0);
 #else
-_Static_assert (FLT_IS_IEC_60559 == 2);
-_Static_assert (DBL_IS_IEC_60559 == 2);
+_Static_assert (FLT_IS_IEC_60559 == 1);
+_Static_assert (DBL_IS_IEC_60559 == 1);
 #if LDBL_MANT_DIG == 106 || LDBL_MIN_EXP == -16382
 /* IBM long double and m68k extended format do not meet the definition
of an IEC 60559 interchange or extended format.  */
 _Static_assert (LDBL_IS_IEC_60559 == 0);
 #else
-_Static_assert (LDBL_IS_IEC_60559 == 2);
+_Static_assert (LDBL_IS_IEC_60559 == 1);
 #endif
 #endif

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] Optimize indentical permuation in my last r13-3212-gb88adba751da63

2022-10-12 Thread Liwei Xu via Gcc-patches
Add extra index check when merging VEC_CST, this handles the case when exactly 
op1 needs to be return.

This fixes:
FAIL: gcc.dg/tree-ssa/forwprop-19.c scan-tree-dump-not forwprop1 
"VEC_PERM_EXPR"

gcc/ChangeLog:

PR target/107220
* match.pd: Check the index of VEC_CST and return the op1 if needed.
---
 gcc/match.pd | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 3550c16aaa6..1efdc3abb5d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8106,6 +8106,7 @@ and,
 vec_perm_builder builder0;
 vec_perm_builder builder1;
 vec_perm_builder builder2 (nelts, nelts, 1);
+bool ident_to_1 = true;
 
 if (!tree_to_vec_perm_builder (&builder0, @3)
|| !tree_to_vec_perm_builder (&builder1, @4))
@@ -8115,7 +8116,15 @@ and,
 vec_perm_indices sel1 (builder1, 1, nelts);
 
 for (int i = 0; i < nelts; i++)
-  builder2.quick_push (sel0[sel1[i].to_constant ()]);
+  {
+int tmp_index = sel0[sel1[i].to_constant ()].to_constant ();
+builder2.quick_push (sel0[sel1[i].to_constant ()]);
+if ( i != tmp_index)
+ ident_to_1 = false;
+  }
+
+if (ident_to_1)
+  return @1;
 
 vec_perm_indices sel2 (builder2, 2, nelts);
 
-- 
2.18.2



[PATCH] Optimize identical permutation in my last r13-3212-gb88adba751da63

2022-10-12 Thread Liwei Xu via Gcc-patches
Add extra index check when merging VEC_CST, this handles the case when exactly 
op1 needs to be return.

This fixes:
FAIL: gcc.dg/tree-ssa/forwprop-19.c scan-tree-dump-not forwprop1 
"VEC_PERM_EXPR"

gcc/ChangeLog:

PR target/107220
* match.pd: Check the index of VEC_CST and return the op1 if needed.
---
 gcc/match.pd | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 3550c16aaa6..1efdc3abb5d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8106,6 +8106,7 @@ and,
 vec_perm_builder builder0;
 vec_perm_builder builder1;
 vec_perm_builder builder2 (nelts, nelts, 1);
+bool ident_to_1 = true;
 
 if (!tree_to_vec_perm_builder (&builder0, @3)
|| !tree_to_vec_perm_builder (&builder1, @4))
@@ -8115,7 +8116,15 @@ and,
 vec_perm_indices sel1 (builder1, 1, nelts);
 
 for (int i = 0; i < nelts; i++)
-  builder2.quick_push (sel0[sel1[i].to_constant ()]);
+  {
+int tmp_index = sel0[sel1[i].to_constant ()].to_constant ();
+builder2.quick_push (sel0[sel1[i].to_constant ()]);
+if ( i != tmp_index)
+ ident_to_1 = false;
+  }
+
+if (ident_to_1)
+  return @1;
 
 vec_perm_indices sel2 (builder2, 2, nelts);
 
-- 
2.18.2



Re: [PATCH v2] c++: parser - Support for target address spaces in C++

2022-10-12 Thread Jakub Jelinek via Gcc-patches
On Thu, Oct 13, 2022 at 02:52:59AM +0200, Paul Iannetta via Gcc-patches wrote:
> + if (type != error_mark_node
> + && !ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type))
> + && current_function_decl)
> +   {
> + error
> +   ("compound literal qualified by address-space qualifier");
> + type = error_mark_node;

Can you please write this as:
error ("compound literal qualified by address-space "
   "qualifier");
?  That is how diagnostics that don't fit on one line are usually written.

> @@ -23812,6 +23830,11 @@ cp_parser_cv_qualifier_seq_opt (cp_parser* parser)
> break;
>   }
>  
> +  if (RID_FIRST_ADDR_SPACE <= token->keyword &&

&& should never go at the end of line.

> +   token->keyword <= RID_LAST_ADDR_SPACE)
> + cv_qualifier =

and similarly = (except for aggregate initializers).

> +   ENCODE_QUAL_ADDR_SPACE (token->keyword - RID_FIRST_ADDR_SPACE);

So:

  if (RID_FIRST_ADDR_SPACE <= token->keyword
  && token->keyword <= RID_LAST_ADDR_SPACE)
cv_qualifier
  = ENCODE_QUAL_ADDR_SPACE (token->keyword - RID_FIRST_ADDR_SPACE);

> +   int unified_cv =
> + CLEAR_QUAL_ADDR_SPACE (arg_cv_quals & ~parm_cv_quals)
> + | ENCODE_QUAL_ADDR_SPACE (as_common);

Similarly (but this time with ()s added to ensure correct formatting in
some editors).

  int unified_cv
= (CLEAR_QUAL_ADDR_SPACE (arg_cv_quals & ~parm_cv_quals)
   | ENCODE_QUAL_ADDR_SPACE (as_common));

>result_type
>   = cp_build_qualified_type (void_type_node,
> -(cp_type_quals (TREE_TYPE (t1))
> - | cp_type_quals (TREE_TYPE (t2;
> +(CLEAR_QUAL_ADDR_SPACE (cp_type_quals 
> (TREE_TYPE (t1)))
> + | CLEAR_QUAL_ADDR_SPACE (cp_type_quals 
> (TREE_TYPE (t2)))

The above 2 lines are way too long.
I'd suggest to use temporaries, say
  int quals1 = cp_type_quals (TREE_TYPE (t1));
  int quals2 = cp_type_quals (TREE_TYPE (t2));
and use those.

Jakub



Re: [PATCH] Optimize nested permutation to single VEC_PERM_EXPR [PR54346]

2022-10-12 Thread Levy
Hi RuoYao

It’s probably because loongarch64 doesn’t support 
can_vec_perm_const_p(result_mode, op_mode, sel2, false)

I’m not sure whether if loongarch will support it or should I just limit the 
test target for pr54346.c?

Best Regards
Levy

> On 12 Oct 2022, at 9:51 pm, Xi Ruoyao  wrote:
> 
> pr54346.



Re: [PATCH] Optimize nested permutation to single VEC_PERM_EXPR [PR54346]

2022-10-12 Thread Xi Ruoyao via Gcc-patches
On Thu, 2022-10-13 at 14:15 +0800, Levy wrote:
> Hi RuoYao
> 
> It’s probably because loongarch64 doesn’t support 
> can_vec_perm_const_p(result_mode, op_mode, sel2, false)
> 
> I’m not sure whether if loongarch will support it or should I just
> limit the test target for pr54346.c?

I'm not sure if we can add TARGET_VECTORIZE_VEC_PERM_CONST when we don't
actually support vector.  (LoongArch has SIMD instructions but the
support in GCC won't be added in a very recent future.)

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH] middle-end, v3: IFN_ASSUME support [PR106654]

2022-10-12 Thread Jakub Jelinek via Gcc-patches
On Wed, Oct 12, 2022 at 11:48:27AM -0400, Jason Merrill wrote:
> > --- gcc/cp/pt.cc.jj 2022-10-10 09:31:21.947480379 +0200
> > +++ gcc/cp/pt.cc2022-10-10 09:59:49.299646482 +0200
> > @@ -21105,6 +21105,8 @@ tsubst_copy_and_build (tree t,
> >   ret = error_mark_node;
> >   break;
> > }
> > + if (!processing_template_decl)
> > +   arg = fold_build_cleanup_point_expr (TREE_TYPE (arg), arg);
> >   ret = build_call_expr_internal_loc (EXPR_LOCATION (t),
> >   IFN_ASSUME,
> >   void_type_node, 1,
> 
> This starts to seem worth factoring out a build_assume_call function.

Ok, below is an updated version of the patch that does that.
Bootstrapped/regtested on x86_64-linux and i686-linux.
> 
> I'll leave the middle-end review to others.

Ok.

2022-10-13  Jakub Jelinek  

PR c++/106654
gcc/
* function.h (struct function): Add assume_function bitfield.
* gimplify.cc (gimplify_call_expr): If the assumption isn't
simple enough, expand it into IFN_ASSUME guarded block or
for -O0 drop it.
* gimple-low.cc (create_assumption_fn): New function.
(struct lower_assumption_data): New type.
(find_assumption_locals_r, assumption_copy_decl,
adjust_assumption_stmt_r, adjust_assumption_stmt_op,
lower_assumption): New functions.
(lower_stmt): Handle IFN_ASSUME guarded block.
* tree-ssa-ccp.cc (pass_fold_builtins::execute): Remove
IFN_ASSUME calls.
* lto-streamer-out.cc (output_struct_function_base): Pack
assume_function bit.
* lto-streamer-in.cc (input_struct_function_base): And unpack it.
* cgraphunit.cc (cgraph_node::expand): Don't verify assume_function
has TREE_ASM_WRITTEN set and don't release its body.
* cfgexpand.cc (pass_expand::execute): Don't expand assume_function
into RTL, just destroy loops and exit.
* internal-fn.cc (expand_ASSUME): Remove gcc_unreachable.
* passes.cc (pass_rest_of_compilation::gate): Return false also for
fun->assume_function.
* tree-vectorizer.cc (pass_vectorize::gate,
pass_slp_vectorize::gate): Likewise.
* ipa-icf.cc (sem_function::parse): Punt for func->assume_function.
gcc/cp/
* cp-tree.h (build_assume_call): Declare.
* parser.cc (cp_parser_omp_assumption_clauses): Use build_assume_call.
* cp-gimplify.cc (build_assume_call): New function.
(process_stmt_assume_attribute): Use build_assume_call.
* pt.cc (tsubst_copy_and_build): Likewise.
gcc/testsuite/
* g++.dg/cpp23/attr-assume5.C: New test.
* g++.dg/cpp23/attr-assume6.C: New test.
* g++.dg/cpp23/attr-assume7.C: New test.

--- gcc/function.h.jj   2022-10-10 11:57:40.163722972 +0200
+++ gcc/function.h  2022-10-12 19:48:28.887554771 +0200
@@ -438,6 +438,10 @@ struct GTY(()) function {
 
   /* Set if there are any OMP_TARGET regions in the function.  */
   unsigned int has_omp_target : 1;
+
+  /* Set for artificial function created for [[assume (cond)]].
+ These should be GIMPLE optimized, but not expanded to RTL.  */
+  unsigned int assume_function : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
--- gcc/gimplify.cc.jj  2022-10-10 11:57:40.165722944 +0200
+++ gcc/gimplify.cc 2022-10-12 19:48:28.890554730 +0200
@@ -3569,7 +3569,52 @@ gimplify_call_expr (tree *expr_p, gimple
 fndecl, 0));
  return GS_OK;
}
- /* FIXME: Otherwise expand it specially.  */
+ /* If not optimizing, ignore the assumptions.  */
+ if (!optimize)
+   {
+ *expr_p = NULL_TREE;
+ return GS_ALL_DONE;
+   }
+ /* Temporarily, until gimple lowering, transform
+.ASSUME (cond);
+into:
+guard = .ASSUME ();
+if (guard) goto label_true; else label_false;
+label_true:;
+{
+  guard = cond;
+}
+label_false:;
+.ASSUME (guard);
+such that gimple lowering can outline the condition into
+a separate function easily.  */
+ tree guard = create_tmp_var (boolean_type_node);
+ gcall *call = gimple_build_call_internal (ifn, 0);
+ gimple_call_set_nothrow (call, TREE_NOTHROW (*expr_p));
+ gimple_set_location (call, loc);
+ gimple_call_set_lhs (call, guard);
+ gimple_seq_add_stmt (pre_p, call);
+ *expr_p = build2 (MODIFY_EXPR, void_type_node, guard,
+   CALL_EXPR_ARG (*expr_p, 0));
+ *expr_p = build3 (BIND_EXPR, void_type_node, NULL, *expr_p, NULL);
+ tree label_false = create_artificial_label (UNKNOWN_LOCATION);
+ tree label_true = create_artifi