[Bug tree-optimization/108816] ICE in operator[], at vec.h:889

2023-02-16 Thread rguenth at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108816

Richard Biener  changed:

   What|Removed |Added

 Ever confirmed|0   |1
   Last reconfirmed||2023-02-17
   Assignee|unassigned at gcc dot gnu.org  |rguenth at gcc dot 
gnu.org
 Status|UNCONFIRMED |ASSIGNED

--- Comment #1 from Richard Biener  ---
I will have a look.

[Bug c++/105224] [modules] g++.dg/modules/virt-2_a.C: inline key methods: c++ modules and arm aapcs clash

2023-02-16 Thread aoliva at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105224

Alexandre Oliva  changed:

   What|Removed |Added

URL|https://gcc.gnu.org/piperma |https://gcc.gnu.org/piperma
   |il/gcc-patches/2022-April/5 |il/gcc-patches/2023-Februar
   |92763.html  |y/612175.html

--- Comment #1 from Alexandre Oliva  ---
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612175.html has a
refreshed and retested (xfail) patch.

[PATCH v7] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-02-16 Thread Takayuki 'January June' Suwa via Gcc-patches
In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move (with two exceptions; i. the register saved to/restored
from is the stack pointer, ii. the function needs an additional stack
pointer adjustment to grow the stack).

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
; prologue {
  ...
s32i.n  a14, sp, 16
  ...   ;; no frame pointer needed
;; no additional stack growth
; } prologue
  ...
mov.n   a14, a6 ;; A6 is not SP
  ...
call0   foo
  ...
mov.n   a8, a14 ;; A8 is not SP
  ...
; epilogue {
  ...
l32i.n  a14, sp, 16
  ...
; } epilogue

It can be possible like this:

;; after
; prologue {
  ...
(no save needed)
  ...
; } prologue
  ...
s32i.n  a6, sp, 16  ;; replaced with A14's slot
  ...
call0   foo
  ...
l32i.n  a8, sp, 16  ;; through SP
  ...
; epilogue {
  ...
(no restoration needed)
  ...
; } epilogue

This patch adds the abovementioned logic to the function prologue/epilogue
RTL expander code.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function): Add new member
'eliminated_callee_saved_regs'.
(xtensa_can_eliminate_callee_saved_reg_p): New function to
determine whether the register can be eliminated or not.
(xtensa_expand_prologue): Add invoking the above function and
elimination the use of callee-saved register by using its stack
slot through the stack pointer (or the frame pointer if needed)
directly.
(xtensa_expand_prologue): Modify to not emit register restoration
insn from its stack slot if the register is already eliminated.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/elim_callee_saved.c: New.
---
 gcc/config/xtensa/xtensa.cc   | 134 ++
 .../gcc.target/xtensa/elim_callee_saved.c |  37 +
 2 files changed, 146 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3e2e22d4cbe..d987f1dfede 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -105,6 +105,7 @@ struct GTY(()) machine_function
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
   rtx last_logues_a9_content;
+  bitmap eliminated_callee_saved_regs;
 };
 
 static void xtensa_option_override (void);
@@ -3343,6 +3344,65 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int 
flags)
 cfun->machine->last_logues_a9_content = GEN_INT (offset);
 }
 
+static bool
+xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
+rtx_insn **p_insnS,
+rtx_insn **p_insnR)
+{
+  df_ref ref;
+  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
+  rtx pattern;
+
+  if (!optimize || !df || call_used_or_fixed_reg_p (regno)
+  || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM))
+return false;
+
+  for (ref = DF_REG_DEF_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_DEST (pattern))
+&& REGNO (SET_DEST (pattern)) == regno
+&& REG_NREGS (SET_DEST (pattern)) == 1
+&& REG_P (SET_SRC (pattern)))
+  {
+   if (insnS)
+ return false;
+   insnS = insn;
+   continue;
+  }
+else
+  return false;
+
+  for (ref = DF_REG_USE_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_SRC (pattern))
+&& REGNO (SET_SRC (pattern)) == regno
+&& REG_NREGS (SET_SRC (pattern)) == 1
+&& REG_P (SET_DEST (pattern)))
+  {
+   if (insnR)
+ return false;
+   insnR = insn;
+   continue;
+  }
+else
+  return false;
+
+  if (!insnS || !insnR)
+return false;
+
+  *p_insnS = insnS, *p_insnR = insnR;
+
+  return true;
+}
+
 /* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
@@ -3382,6 +3442,7 @@ xtensa_expand_prologue (void)
   df_ref ref;
   bool stack_pointer_needed = frame_pointer_needed

[Bug libstdc++/77760] get_time needs to set tm_wday amd tm_yday

2023-02-16 Thread aoliva at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77760

--- Comment #8 from Alexandre Oliva  ---
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612198.html has a
simple-minded implementation, that should make it clear what I mean by scratch:
get() pays no regard to the incoming bits in tm, it initializes them with a
zeroed-out state.

Now, I realize that do_get, if called by a derived class with an uninitialized
tm, might do weird things, because it would take some of those bits as state. 
Is this something of concern?  As in, how internal and reserved for the
implementation is the intermediate state of tm between get and do_get?

Re: [PATCH] PR tree-optimization/108697 - Create a lazy ssa_cache

2023-02-16 Thread Richard Biener via Gcc-patches
On Thu, Feb 16, 2023 at 3:34 PM Andrew MacLeod  wrote:
>
>
> On 2/16/23 02:55, Richard Biener wrote:
> > On Wed, Feb 15, 2023 at 6:07 PM Andrew MacLeod via Gcc-patches
> >  wrote:
> >> This patch implements the suggestion that we have an alternative
> >> ssa-cache which does not zero memory, and instead uses a bitmap to track
> >> whether a value is currently set or not.  It roughly mimics what
> >> path_range_query was doing internally.
> >>
> >> For sparsely used cases, expecially in large programs, this is more
> >> efficient.  I changed path_range_query to use this, and removed it old
> >> bitmap (and a hack or two around PHI calculations), and also utilized
> >> this is the assume_query class.
> >>
> >> Performance wise, the patch doesn't affect VRP (since that still uses
> >> the original version).  Switching to the lazy version caused a slowdown
> >> of 2.5% across VRP.
> >>
> >> There was a noticeable improvement elsewhere.,  across 230 GCC source
> >> files, threading ran over 12% faster!.  Overall compilation improved by
> >> 0.3%  Not sure it makes much difference in compiler.i, but it shouldn't
> >> hurt.
> >>
> >> bootstraps on x86_64-pc-linux-gnu with no regressions.   OK for trunk?
> >> or do you want to wait for the next release...
> > I see
> >
> > @@ -365,16 +335,8 @@ path_range_query::compute_ranges_in_phis (basic_block 
> > bb)
> >
> > Value_Range r (TREE_TYPE (name));
> > if (range_defined_in_block (r, name, bb))
> > -   {
> > - unsigned v = SSA_NAME_VERSION (name);
> > - set_cache (r, name);
> > - bitmap_set_bit (phi_set, v);
> > - // Pretend we don't have a cache entry for this name until
> > - // we're done with all PHIs.
> > - bitmap_clear_bit (m_has_cache_entry, v);
> > -   }
> > +   m_cache.set_global_range (name, r);
> >   }
> > -  bitmap_ior_into (m_has_cache_entry, phi_set);
> >   }
> >
> >   // Return TRUE if relations may be invalidated after crossing edge E.
> >
> > which I think is not correct - if we have
> >
> >   # _1 = PHI <..., _2>
> >   # _2 = PHI <..., _1>
> >
> > then their effects are supposed to be executed in parallel, that is,
> > both PHI argument _2 and _1 are supposed to see the "old" version.
> > The previous code tried to make sure the range of the new _1 doesn't
> > get seen when processing the argument _1 in the definition of _2.
> >
> > The new version drops this, possibly resulting in wrong-code.
>
> This is dropped because it is actually handled properly in
> range_defined_in_block now.  (which I think Aldy was describing).
>
> It didnt make sense to me why it was handled here like this, so I traced
> through the call chain to find out if it was still actually needed and
> discussed it with Aldy.  I think it was mostly a leftover wart.

Ah, thanks for checking.

> >
> > While I think it's appropriate to sort out compile-time issues like this
> > during stage4 at least the above makes me think it should be defered
> > to next stage1.
>
> I am happy to defer it since its a marginal increase anyway.

Sure - thus OK for stage1.

Thanks,
Richard.

>
> Andrew
>
>


[Bug target/94649] 16-byte aligned atomic_compare_exchange doesn not generate cmpxcg16b on x86_64

2023-02-16 Thread balder at yahooinc dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94649

--- Comment #7 from Henning Baldersheim  ---
Thanks, perhaps add 104688 to the see also list.

Re: [PATCH] -Wdangling-pointer: don't mark SSA lhs sets as stores

2023-02-16 Thread Richard Biener via Gcc-patches
On Fri, Feb 17, 2023 at 8:09 AM Alexandre Oliva via Gcc-patches
 wrote:
>
>
> check_dangling_stores has some weirdnesses that causes its behavior to
> change when the target ABI requires C++ ctors to return this: while
> scanning stmts backwards in e.g. the AS ctor on a target that returns
> this in ctors, the scan first encounters a copy of this to the SSA
> name used to hold the return value.  m_ptr_query.get_ref resolves lhs
> (the return SSA name) to the rhs (the default SSA name for this), does
> not skip it because auto_var_p is false for SSA_NAMEs, and proceeds to
> add it to stores, which seems to prevent later attempts to add stores
> into *this from succeeding, which disables warnings that should have
> triggered.
>
> This is also the case when the backwards search finds unrelated stores
> to other fields of *this before it reaches stores that IMHO should be
> warned about.  The store found first disables checking of other
> stores, as if the store appearing later in the code would necessarily
> overwrite the store that should be warned about.  I've added an
> xfailed variant of the existing test (struct An) that triggers this
> problem, but I'm not sure how to go about fixing it.
>
> Meanwhile, this patch prevents assignments with SSA_NAMEs in the lhs
> from being regarded as stores, which is enough to remove the
> undesirable side effect on -Wdangling-pointer of ABI-mandated ctors'
> returning this.  Another variant of the existing test (struct Al) that
> demonstrates the problem regardless of this aspect of the ABI, and
> that gets the desired warning with the proposed patch, but not
> without.
>
> Curiously, this fix exposes yet another problem in
> Wdangling-pointer-5.c: it is the return stmt of the unrelated pointer
> p, not the store into possibly-overlapping *vpp2, that caused the
> warning to not be issued for the store in *vpp1.  I'm not sure whether
> we should or should not warn in that case, but this patch adjusts the
> test to reflect the behavior change.
>
> Regstrapped on x86_64-linux-gnu.
> Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

It seems the case should run into

  else if (TREE_CODE (lhs_ref.ref) == SSA_NAME)
{
  gimple *def_stmt = SSA_NAME_DEF_STMT (lhs_ref.ref);
  if (!gimple_nop_p (def_stmt))
/* Avoid looking at or before stores into unknown objects.  */
return;

  tree var = SSA_NAME_VAR (lhs_ref.ref);
  if (TREE_CODE (var) == PARM_DECL && DECL_BY_REFERENCE (var))
/* Avoid by-value arguments transformed into by-reference.  */
continue;

and what your patch tried to avoid is running into

  if (stores.add (lhs_ref.ref))
continue;

?  I wonder what the circumstances are that we want the latter to happen if
the former condition is true?

> for  gcc/ChangeLog
>
> * gimple-ssa-warn-access.cc
> (pass_waccess::check_dangling_stores): Skip non-stores.
>
> for  gcc/testsuite/ChangeLog
>
> * g++.dg/warn/Wdangling-pointer.C (warn_init_ref_member): Add
> two new variants, one fixed, one xfailed.
> * c-c++-common/Wdangling-pointer-5.c
> (nowarn_store_arg_store_arg): Add now-expected warnings.
> ---
>  gcc/gimple-ssa-warn-access.cc|3 ++
>  gcc/testsuite/c-c++-common/Wdangling-pointer-5.c |4 ++-
>  gcc/testsuite/g++.dg/warn/Wdangling-pointer.C|   29 
> +-
>  3 files changed, 32 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
> index 2eab1d59abd05..c0efb3fdb4e52 100644
> --- a/gcc/gimple-ssa-warn-access.cc
> +++ b/gcc/gimple-ssa-warn-access.cc
> @@ -4511,7 +4511,8 @@ pass_waccess::check_dangling_stores (basic_block bb,
>use the escaped locals.  */
> return;
>
> -  if (!is_gimple_assign (stmt) || gimple_clobber_p (stmt))
> +  if (!is_gimple_assign (stmt) || gimple_clobber_p (stmt)
> + || !gimple_store_p (stmt))
> continue;
>
>access_ref lhs_ref;
> diff --git a/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c 
> b/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
> index 2a165cea76768..cb6da9e86394d 100644
> --- a/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
> +++ b/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
> @@ -75,9 +75,9 @@ void nowarn_store_arg_store (void **vpp)
>
>  void* nowarn_store_arg_store_arg (void **vpp1, void **vpp2)
>  {
> -  int x;
> +  int x;  // { dg-message "'x' declared here" }
>void **p = (void**)sink (0);
> -  *vpp1 =  // warn here?
> +  *vpp1 =  // { dg-warning "storing the address of local variable 
> 'x' in '\\*vpp1'" }
>*vpp2 = 0;  // might overwrite *vpp1
>return p;
>  }
> diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C 
> b/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C
> index 22c559e4adafe..a94477a647666 100644
> --- 

[PATCH] [PR77760] [libstdc++] encode __time_get_state in tm

2023-02-16 Thread Alexandre Oliva via Gcc-patches


On platforms that fail the ptrtomemfn-cast-to-pfn hack, such as
arm-*-vxworks*, time_get fails with %I and %p because the state is not
preserved across do_get calls.

This patch introduces an alternate hack, that encodes the state in
unused bits of struct tm before calling do_get, extracts them in
do_get, does the processing, and encodes it back, so that get extracts
it.

The finalizer is adjusted for idempotence, because both do_get and get
may call it.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  libstdc++-v3/ChangeLog

PR libstdc++/77760
* include/bits/locale_facets_nonio.h (__time_get_state): Add
_M_state_tm, _M_save_to and _M_restore_from.
* include/bits/locale_facets_nonio.tcc (time_get::get): Drop
do_get-overriding hack.  Use state unconditionally, and encode
it in tm around do_get.
(time_get::do_get): Extract state from tm, and encode it back,
around parsing and finalizing.
* src/c++98/locale_facets.cc
(__time_get_state::_M_finalize_state): Make tm_hour and
tm_year idempotent.
---
 libstdc++-v3/include/bits/locale_facets_nonio.h   |   80 +
 libstdc++-v3/include/bits/locale_facets_nonio.tcc |   43 ++-
 libstdc++-v3/src/c++98/locale_facets.cc   |8 ++
 3 files changed, 93 insertions(+), 38 deletions(-)

diff --git a/libstdc++-v3/include/bits/locale_facets_nonio.h 
b/libstdc++-v3/include/bits/locale_facets_nonio.h
index 372cf0429501d..711bede158427 100644
--- a/libstdc++-v3/include/bits/locale_facets_nonio.h
+++ b/libstdc++-v3/include/bits/locale_facets_nonio.h
@@ -361,6 +361,86 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 void
 _M_finalize_state(tm* __tm);
 
+  private:
+void
+_M_state_tm(tm* __tm, bool __totm)
+{
+  // Check we don't invade the in-range tm bits, even if int is
+  // 16-bits wide.
+#define _M_min_shift_tm_sec 6
+#define _M_min_shift_tm_min 6
+#define _M_min_shift_tm_hour 5
+#define _M_min_shift_tm_mday 5
+#define _M_min_shift_tm_mon 4
+#define _M_min_shift_tm_year 16 // 14, but signed, so avoid it.
+#define _M_min_shift_tm_wday 3
+#define _M_min_shift_tm_yday 9
+#define _M_min_shift_tm_isdst 1
+  // Represent __STF in __WDT bits of __TMF up to the __MSB bit.
+  // In __MSB, 0 stands for the most significant bit of __TMF,
+  // 1 the bit next to it, and so on.
+#define _M_time_get_state_bitfield_inout(__tmf, __msb, __wdt, __stf)   \
+  do   \
+  {\
+const unsigned __shift = (sizeof (__tm->__tmf) * __CHAR_BIT__  \
+ - (__msb) - (__wdt)); \
+static char __attribute__ ((__unused__))   \
+  __check_parms_##__tmf[(__msb) >= 0 && (__wdt) > 0
\
+   && __shift >= (_M_min_shift_##__tmf \
+  + (sizeof (__tm->__tmf)  \
+ * __CHAR_BIT__) - 16) \
+   ? 1 : -1];  \
+const unsigned __mask = ((1 << (__wdt)) - 1) << __shift;   \
+if (!__totm)   \
+  this->__stf = (__tm->__tmf & __mask) >> __shift; \
+__tm->__tmf &= ~__mask;\
+if (__totm)
\
+  __tm->__tmf |= ((unsigned)this->__stf << __shift) & __mask;  \
+}  \
+  while (0)
+
+  _M_time_get_state_bitfield_inout (tm_hour,  0, 1, _M_have_I);
+  _M_time_get_state_bitfield_inout (tm_wday,  0, 1, _M_have_wday);
+  _M_time_get_state_bitfield_inout (tm_yday,  0, 1, _M_have_yday);
+  _M_time_get_state_bitfield_inout (tm_mon,   0, 1, _M_have_mon);
+  _M_time_get_state_bitfield_inout (tm_mday,  0, 1, _M_have_mday);
+  _M_time_get_state_bitfield_inout (tm_yday,  1, 1, _M_have_uweek);
+  _M_time_get_state_bitfield_inout (tm_yday,  2, 1, _M_have_wweek);
+  _M_time_get_state_bitfield_inout (tm_isdst, 0, 1, _M_have_century);
+  _M_time_get_state_bitfield_inout (tm_hour,  1, 1, _M_is_pm);
+  _M_time_get_state_bitfield_inout (tm_isdst, 1, 1, _M_want_century);
+  _M_time_get_state_bitfield_inout (tm_yday,  3, 1, _M_want_xday);
+  // _M_pad1
+  _M_time_get_state_bitfield_inout (tm_wday,  1, 6, _M_week_no);
+  // _M_pad2
+  _M_time_get_state_bitfield_inout (tm_mon,   1, 8, _M_century);
+  // _M_pad3
+
+#undef _M_min_shift_tm_hour
+#undef _M_min_shift_tm_sec
+#undef _M_min_shift_tm_min
+#undef _M_min_shift_tm_hour
+#undef _M_min_shift_tm_mday
+#undef _M_min_shift_tm_mon
+#undef 

[PATCH] [libstdc++] ensure mutex_pool survives _Safe_sequence_base

2023-02-16 Thread Alexandre Oliva via Gcc-patches


On vxworks, after destroying the semaphore used to implement a mutex,
__gthread_mutex_lock fails and __gnu_cxx::__mutex::lock calls
__throw_concurrence_lock_error.  Nothing ensures the mutex_pool
mutexes survive init-once objects containing _Safe_sequence_base.  If
such an object completes construction before mutex_pool
initialization, it will be registered for atexit destruction after the
mutex_pool mutexes, so the _M_detach_all() call in the
_Safe_sequence_base dtor will use already-destructed mutexes, and
basic_string/requirements/citerators_cc fails calling terminate.

This patch fixes this problem by ensuring the mutex pool completes
construction before any _Safe_sequence_base-containing object, so that
the mutex pool survives them all.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  libstdc++-v3/ChangeLog

* include/debug/safe_base.h (_Safe_sequence_base): Ensure
the mutex pool survives *this.
---
 libstdc++-v3/include/debug/safe_base.h |   10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/debug/safe_base.h 
b/libstdc++-v3/include/debug/safe_base.h
index 1dfa9f68b65b5..d4ba404cdac6e 100644
--- a/libstdc++-v3/include/debug/safe_base.h
+++ b/libstdc++-v3/include/debug/safe_base.h
@@ -203,7 +203,15 @@ namespace __gnu_debug
 // Initialize with a version number of 1 and no iterators
 _Safe_sequence_base() _GLIBCXX_NOEXCEPT
 : _M_iterators(0), _M_const_iterators(0), _M_version(1)
-{ }
+{
+  // Make sure the mutex_pool machinery is initialized before any
+  // full object containing a _Safe_sequence_base completes
+  // construction, so that any local static mutexes in the mutex
+  // pool won't be destructed before our destructor runs;
+  // _M_detach_all could fail otherwise, on targets whose mutexes
+  // stop working after being destroyed.
+  (void)this->_M_get_mutex();
+}
 
 #if __cplusplus >= 201103L
 _Safe_sequence_base(const _Safe_sequence_base&) noexcept

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [arm] [vxworks] xfail fp-double-convert-float-1.c

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Even with vcvt.f32.f64, the FE_UPWARD test rounds down and fails.  I'm
not sure whether this opcode disregards the rounding mode (it looks
like it should take it into account) or it is a qemu bug, but it does
not look like GCC is doing anything wrong, and the test fails, so I'm
marking the fail as expected on arm-*-vxworks*.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.dg/torture/fp-double-convert-float-1.c: XFAIL on
arm-*-vxworks*.
---
 .../gcc.dg/torture/fp-double-convert-float-1.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/torture/fp-double-convert-float-1.c 
b/gcc/testsuite/gcc.dg/torture/fp-double-convert-float-1.c
index 1c28a9e101eb7..c3ca69d64bbc6 100644
--- a/gcc/testsuite/gcc.dg/torture/fp-double-convert-float-1.c
+++ b/gcc/testsuite/gcc.dg/torture/fp-double-convert-float-1.c
@@ -1,5 +1,5 @@
 /* PR57245 */
-/* { dg-do run } */
+/* { dg-do run { xfail { arm-*-vxworks* } } } */
 /* { dg-require-effective-target fenv } */
 /* { dg-require-effective-target hard_float } */
 /* { dg-additional-options "-frounding-math" } */

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [libstdc++] xfail noreplace tests on vxworks

2023-02-16 Thread Alexandre Oliva via Gcc-patches


vxworks ignores O_EXCL in open, so noreplace open succeeds when it is
expected to fail.  xfail the tests.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  libstdc++-v3/ChangeLog

* testsuite/27_io/basic_ofstream/open/char/noreplace.cc: xfail
on vxworks.
* testsuite/27_io/basic_ofstream/open/wchar_t/noreplace.cc:
Likewise.
---
 .../27_io/basic_ofstream/open/char/noreplace.cc|2 +-
 .../27_io/basic_ofstream/open/wchar_t/noreplace.cc |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/testsuite/27_io/basic_ofstream/open/char/noreplace.cc 
b/libstdc++-v3/testsuite/27_io/basic_ofstream/open/char/noreplace.cc
index 56ff2d7cead3c..2e99707df86d0 100644
--- a/libstdc++-v3/testsuite/27_io/basic_ofstream/open/char/noreplace.cc
+++ b/libstdc++-v3/testsuite/27_io/basic_ofstream/open/char/noreplace.cc
@@ -1,4 +1,4 @@
-// { dg-do run }
+// { dg-do run { xfail *-*-vxworks* } }
 
 #include 
 
diff --git 
a/libstdc++-v3/testsuite/27_io/basic_ofstream/open/wchar_t/noreplace.cc 
b/libstdc++-v3/testsuite/27_io/basic_ofstream/open/wchar_t/noreplace.cc
index f0425cdab3d23..ddb7fd691608c 100644
--- a/libstdc++-v3/testsuite/27_io/basic_ofstream/open/wchar_t/noreplace.cc
+++ b/libstdc++-v3/testsuite/27_io/basic_ofstream/open/wchar_t/noreplace.cc
@@ -1,4 +1,4 @@
-// { dg-do run }
+// { dg-do run { xfail *-*-vxworks* } }
 
 #include 
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [PR104882] [arm] require mve hw for mve run test

2023-02-16 Thread Alexandre Oliva via Gcc-patches


The pr104882.c test is an execution test, but arm_v8_1m_mve_ok only
tests for compile-time support.  Add a requirement for mve hardware.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

PR target/104882
* gcc.target/arm/simd/pr104882.c: Require mve hardware.
---
 gcc/testsuite/gcc.target/arm/simd/pr104882.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/arm/simd/pr104882.c 
b/gcc/testsuite/gcc.target/arm/simd/pr104882.c
index ae9709af42f22..1ea7a14836f54 100644
--- a/gcc/testsuite/gcc.target/arm/simd/pr104882.c
+++ b/gcc/testsuite/gcc.target/arm/simd/pr104882.c
@@ -1,4 +1,5 @@
 /* { dg-do run } */
+/* { dg-require-effective-target arm_mve_hw } */
 /* { dg-require-effective-target arm_v8_1m_mve_ok } */
 /* { dg-add-options arm_v8_1m_mve } */
 /* { dg-additional-options "-O2" } */

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


Re: [PATCH] simplify-rtx: Fix VOIDmode operand handling in simplify_subreg [PR108805]

2023-02-16 Thread Richard Biener via Gcc-patches
On Thu, 16 Feb 2023, Uros Bizjak wrote:

> simplify_subreg can return VOIDmode const_int operand and will
> cause ICE in simplify_gen_subreg when this operand is passed to it.
> 
> The patch prevents VOIDmode temporary from entering simplify_gen_subreg.
> We can't process const_int operand any further, since outermode
> is not an integer mode here.

But if it's a CONST_INT then we know it's of int_outermode, no? That is,
doesn't simplify_subreg (mode, ...) always return something in 'mode'
and thus we can always pass just 'mode' as third argument to the
following simplify_gen_subreg call?

Richard.

> 2023-02-16  Uroš Bizjak  
> 
> gcc/ChangeLog:
> 
> PR target/108805
> * simplify_rtx.cc (simplify_context::simplify_subreg): Prevent
> VOIDmode const_int result from simplify_subreg from entering
> simplify_gen_subreg.
> 
> gcc/testsuite/ChangeLog:
> 
> PR target/108805
> * gcc.dg/pr108805.c: New test.
> 
> Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
> 
> OK for master and release branches?
> 
> Uros.
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)


[PATCH] [arm] complete vmsr/vmrs blank and case adjustments

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Back in September last year, some of the vmsr and vmrs patterns had an
extraneous blank removed, and the case of register names lowered, but
another instance remained, and so did a few testcases.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/ChangeLog

* config/arm/vfp.md (*thumb2_movsi_vfp): Drop blank after tab
after vmsr and vmrs, and lower the case of P0.

for  gcc/testsuite/ChangeLog

* gcc.target/arm/acle/cde-mve-full-assembly.c: Drop blank
after tab after vmsr, and lower the case of P0.
---
 gcc/config/arm/vfp.md  |4 
 .../gcc.target/arm/acle/cde-mve-full-assembly.c|  264 ++--
 2 files changed, 134 insertions(+), 134 deletions(-)

diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index f34f35e1185e2..60e7ba35d8b25 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -312,9 +312,9 @@ (define_insn "*thumb2_movsi_vfp"
 case 12: case 13:
   return output_move_vfp (operands);
 case 14:
-  return \"vmsr\\t P0, %1\";
+  return \"vmsr\\tp0, %1\";
 case 15:
-  return \"vmrs\\t %0, P0\";
+  return \"vmrs\\t%0, p0\";
 case 16:
   return \"mcr\\tp10, 7, %1, cr1, cr0, 0\\t @SET_FPSCR\";
 case 17:
diff --git a/gcc/testsuite/gcc.target/arm/acle/cde-mve-full-assembly.c 
b/gcc/testsuite/gcc.target/arm/acle/cde-mve-full-assembly.c
index d025c3391fbe5..72f330185944a 100644
--- a/gcc/testsuite/gcc.target/arm/acle/cde-mve-full-assembly.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-mve-full-assembly.c
@@ -534,80 +534,80 @@
contain back references).  */
 /*
 ** test_cde_vcx1q_mfloat16x8_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_mfloat32x4_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_muint8x16_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_muint16x8_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_muint32x4_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_muint64x2_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_mint8x16_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_mint16x8_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_mint32x4_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** 

Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

2023-02-16 Thread Richard Biener via Gcc-patches
On Thu, 16 Feb 2023, juzhe.zhong wrote:

> Thanks for the great work to fix this issue for rvv.Hi,richard. This is the
> patch to differentiate mask mode of same bytesize. Adjust the precision
> correctly according to rvv isa. Would you mind helping us with this patch ?
> Since it‘s very important for rvv support in gcc 

If adjusting the precision works fine then I suppose the patch looks
reasonable.  I'll defer to Richard S. though since he's the one knowing
the mode stuff better.  I'd have integrated the precision adjustment
with the ADJUST_NITER hook since that is also documented to adjust
the precision btw.

Richard.

> Thanks. 
>  Replied Message 
> From
> incarnation.p@outlook.com
> Date
> 02/16/2023 23:12
> To
> gcc-patches@gcc.gnu.org
> Cc
> juzhe.zh...@rivai.ai,
> kito.ch...@sifive.com,
> rguent...@suse.de,
> pan2...@intel.com
> Subject
> [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
> From: Pan Li 
> 
>    Fix the bug of the rvv bool mode precision with the adjustment.
>    The bits size of vbool*_t will be adjusted to
>    [1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
>    adjusted mode precison of vbool*_t will help underlying pass to
>    make the right decision for both the correctness and optimization.
> 
>    Given below sample code:
>    void test_1(int8_t * restrict in, int8_t * restrict out)
>    {
>      vbool8_t v2 = *(vbool8_t*)in;
>      vbool16_t v5 = *(vbool16_t*)in;
>      *(vbool16_t*)(out + 200) = v5;
>      *(vbool8_t*)(out + 100) = v2;
>    }
> 
>    Before the precision adjustment:
>    addi    a4,a1,100
>    vsetvli a5,zero,e8,m1,ta,ma
>    addi    a1,a1,200
>    vlm.v   v24,0(a0)
>    vsm.v   v24,0(a4)
>    // Need one vsetvli and vlm.v for correctness here.
>    vsm.v   v24,0(a1)
> 
>    After the precision adjustment:
>    csrr    t0,vlenb
>    slli    t1,t0,1
>    csrr    a3,vlenb
>    sub sp,sp,t1
>    slli    a4,a3,1
>    add a4,a4,sp
>    sub a3,a4,a3
>    vsetvli a5,zero,e8,m1,ta,ma
>    addi    a2,a1,200
>    vlm.v   v24,0(a0)
>    vsm.v   v24,0(a3)
>    addi    a1,a1,100
>    vsetvli a4,zero,e8,mf2,ta,ma
>    csrr    t0,vlenb
>    vlm.v   v25,0(a3)
>    vsm.v   v25,0(a2)
>    slli    t1,t0,1
>    vsetvli a5,zero,e8,m1,ta,ma
>    vsm.v   v24,0(a1)
>    add sp,sp,t1
>    jr  ra
> 
>    However, there may be some optimization opportunates after
>    the mode precision adjustment. It can be token care of in
>    the RISC-V backend in the underlying separted PR(s).
> 
>    PR 108185
>    PR 108654
> 
> gcc/ChangeLog:
> 
>    * config/riscv/riscv-modes.def (ADJUST_PRECISION):
>    * config/riscv/riscv.cc (riscv_v_adjust_precision):
>    * config/riscv/riscv.h (riscv_v_adjust_precision):
>    * genmodes.cc (ADJUST_PRECISION):
>    (emit_mode_adjustments):
> 
> gcc/testsuite/ChangeLog:
> 
>    * gcc.target/riscv/pr108185-1.c: New test.
>    * gcc.target/riscv/pr108185-2.c: New test.
>    * gcc.target/riscv/pr108185-3.c: New test.
>    * gcc.target/riscv/pr108185-4.c: New test.
>    * gcc.target/riscv/pr108185-5.c: New test.
>    * gcc.target/riscv/pr108185-6.c: New test.
>    * gcc.target/riscv/pr108185-7.c: New test.
>    * gcc.target/riscv/pr108185-8.c: New test.
> 
> Signed-off-by: Pan Li 
> ---
> gcc/config/riscv/riscv-modes.def    |  8 +++
> gcc/config/riscv/riscv.cc   | 12 
> gcc/config/riscv/riscv.h    |  1 +
> gcc/genmodes.cc | 25 ++-
> gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +
> 12 files changed, 598 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
> 
> diff --git a/gcc/config/riscv/riscv-modes.def
> b/gcc/config/riscv/riscv-modes.def
> index d5305efa8a6..110bddce851 100644
> --- a/gcc/config/riscv/riscv-modes.def
> +++ b/gcc/config/riscv/riscv-modes.def
> @@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks *
> riscv_bytes_per_vector_chunk);
> ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks *
> 

[Bug middle-end/107411] trivial-auto-var-init=zero invalid uninitialized variable warning

2023-02-16 Thread rguenther at suse dot de via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107411

--- Comment #13 from rguenther at suse dot de  ---
On Thu, 16 Feb 2023, qing.zhao at oracle dot com wrote:

> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107411
> 
> --- Comment #8 from Qing Zhao  ---
> > On Feb 16, 2023, at 2:35 AM, rguenther at suse dot de 
> >  wrote:
> > 
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107411
> > 
> > --- Comment #7 from rguenther at suse dot de  ---
> > On Wed, 15 Feb 2023, qinzhao at gcc dot gnu.org wrote:
> > 
> > 
> > Hmm, I don't think so.  So this is indeed expected behavior since the
> > frontend IL doesn't have variable definitions with initializers but
> > instead just (immediately following) assignments.
> 
> Then, if that’s the case, it also is correct to add the .DEFERRED_INIT to them
> during gimplification?

Yes.

[PATCH] [arm] adjust expectations for armv8_2-fp16-move-[12].c

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Commit 3a7ba8fd0cda387809e4902328af2473662b6a4a, a patch for
tree-ssa-sink, enabled the removal of basic blocks in ways that
affected the generated code for both of these tests, deviating from
the expectations of the tests.

The simplest case is that of -2, in which the edge unsplitting ends up
enabling a conditional return rather than a conditional branch to a
set-and-return block.  That looks like an improvement to me, but the
condition in which the branch or the return takes place can be
reasonably reversed (and, with the current code, it is), I've relaxed
the pattern in the test so as to accept reversed and unreversed
conditions applied to return or branch opcodes.

The situation in -1 is a little more elaborate: conditional branches
based on FP compares in test_select_[78] are initially expanded with
CCFPE compare-and-cbranch on G{T,E}, but when ce2 turns those into a
cmove, because now we have a different fallthrough block, the
condition is reversed, and that lands us with a compare-and-cmove
sequence that needs CCFP for UNL{E,T}.  The insn output reverses the
condition and swaps the cmove input operands, so the vcmp and vsel
insns come out the same except for the missing 'e' (for the compare
mode) in vcmp, so, since such reversals could have happened to any of
the tests depending on legitimate basic block layout, I've combined
the vcmp and vcmpe counts.

I see room for improving cmove sequence generation, e.g. trying direct
and reversed conditions and selecting the cheapest one (which would
require CCFP conditions to be modeled as more expensive than CCFPE),
or for some other machine-specific (peephole2?) optimization to turn
CCFP-requiring compare and cmove into CCFPE compare and swapped-inputs
cmove, but I haven't tried that.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.target/arm/armv8_2-fp16-move-1.c: Combine vcmp and vcmpe
expected counts into a single pattern.
* gcc.target/arm/armv8_2-fp16-move-2.c: Accept conditional
return and reversed conditions.
---
 gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c |3 +--
 gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-2.c |2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c 
b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
index 009bb8d1575a4..444c4a3353555 100644
--- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
+++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
@@ -196,5 +196,4 @@ test_compare_5 (__fp16 a, __fp16 b)
 /* { dg-final { scan-assembler-not {vcmp\.f16} } }  */
 /* { dg-final { scan-assembler-not {vcmpe\.f16} } }  */
 
-/* { dg-final { scan-assembler-times {vcmp\.f32} 4 } }  */
-/* { dg-final { scan-assembler-times {vcmpe\.f32} 8 } }  */
+/* { dg-final { scan-assembler-times {vcmpe?\.f32} 12 } }  */
diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-2.c 
b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-2.c
index fcb857f29ff15..dff57ac8147c2 100644
--- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-2.c
+++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-2.c
@@ -8,4 +8,4 @@ test_select (__fp16 a, __fp16 b, __fp16 c)
 {
   return (a < b) ? b : c;
 }
-/* { dg-final { scan-assembler "bmi" } } */
+/* { dg-final { scan-assembler "bx?(mi|pl)" } } */

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [PR51534] [arm] split out pr51534 test for softfp

2023-02-16 Thread Alexandre Oliva via Gcc-patches


The test uses arm_hard_ok and arm_softfp_ok as if they were mutually
exclusive, but they test whether the corresponding -mfloat-abi= flag
is usable, not whether it is in effect, so it is possible for both to
pass, and then the test comes out with incorrect expectations
whichever the default float-abi is.

Separate the test into hard and softfp variants, and extend the softfp
variant to accept both ARM and Thumb opcodes; it unwarrantedly assumed
the latter.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

PR target/51534
* gcc.target/arm/pr51534.c: Split softfp variant into...
* gcc.target/arm/pr51534s.c: ... this, and support ARM too.
---
 gcc/testsuite/gcc.target/arm/pr51534.c  |9 ++--
 gcc/testsuite/gcc.target/arm/pr51534s.c |   72 +++
 2 files changed, 76 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/pr51534s.c

diff --git a/gcc/testsuite/gcc.target/arm/pr51534.c 
b/gcc/testsuite/gcc.target/arm/pr51534.c
index 5e121f5fb9946..ba21259bee554 100644
--- a/gcc/testsuite/gcc.target/arm/pr51534.c
+++ b/gcc/testsuite/gcc.target/arm/pr51534.c
@@ -1,9 +1,9 @@
 /* Test the vector comparison intrinsics when comparing to immediate zero.
*/
 
-/* { dg-do assemble } */
+/* { dg-do assemble { target { arm_hard_ok } } } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O3" } */
+/* { dg-options "-save-temps -mfloat-abi=hard -O3" } */
 /* { dg-add-options arm_neon } */
 
 #include 
@@ -64,9 +64,8 @@ GEN_COND_TESTS(vceq)
 /* { dg-final { scan-assembler-times "vceq\.i8\[   \]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" 4 } } */
 /* { dg-final { scan-assembler-times "vceq\.i16\[  \]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" 4 } } */
 /* { dg-final { scan-assembler-times "vceq\.i32\[  \]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" 4 } } */
-/* { dg-final { scan-assembler-times "vmov\.i32\[  \]+\[dD\]\[0-9\]+, 
#0x" 3 { target { arm_hard_ok } } } } */
-/* { dg-final { scan-assembler-times "vmov\.i32\[  \]+\[qQ\]\[0-9\]+, 
#4294967295" 3 { target { arm_hard_ok } } } } */
-/* { dg-final { scan-assembler-times "mov\[\]+r\[0-9\]+, #-1" 6 { target { 
arm_softfp_ok } } } } */
+/* { dg-final { scan-assembler-times "vmov\.i32\[  \]+\[dD\]\[0-9\]+, 
#0x" 3 } } */
+/* { dg-final { scan-assembler-times "vmov\.i32\[  \]+\[qQ\]\[0-9\]+, 
#4294967295" 3 } } */
 
 /* And ensure we don't have unexpected output too.  */
 /* { dg-final { scan-assembler-not "vc\[gl\]\[te\]\.u\[0-9\]+\[
\]+\[qQdD\]\[0-9\]+, \[qQdD\]\[0-9\]+, #0" } } */
diff --git a/gcc/testsuite/gcc.target/arm/pr51534s.c 
b/gcc/testsuite/gcc.target/arm/pr51534s.c
new file mode 100644
index 0..b1638919c2f75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr51534s.c
@@ -0,0 +1,72 @@
+/* Test the vector comparison intrinsics when comparing to immediate zero.
+   */
+
+/* { dg-do assemble { target { arm_softfp_ok } } } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -mfloat-abi=softfp -O3" } */
+/* { dg-add-options arm_neon } */
+
+#include 
+
+#define GEN_TEST(T, D, C, R) \
+  R test_##C##_##T (T a) { return C (a, D (0)); }
+
+#define GEN_DOUBLE_TESTS(S, T, C) \
+  GEN_TEST (T, vdup_n_s##S, C##_s##S, u##T) \
+  GEN_TEST (u##T, vdup_n_u##S, C##_u##S, u##T) 
+
+#define GEN_QUAD_TESTS(S, T, C) \
+  GEN_TEST (T, vdupq_n_s##S, C##q_s##S, u##T) \
+  GEN_TEST (u##T, vdupq_n_u##S, C##q_u##S, u##T) 
+
+#define GEN_COND_TESTS(C) \
+  GEN_DOUBLE_TESTS (8, int8x8_t, C) \
+  GEN_DOUBLE_TESTS (16, int16x4_t, C) \
+  GEN_DOUBLE_TESTS (32, int32x2_t, C) \
+  GEN_QUAD_TESTS (8, int8x16_t, C) \
+  GEN_QUAD_TESTS (16, int16x8_t, C) \
+  GEN_QUAD_TESTS (32, int32x4_t, C)
+
+GEN_COND_TESTS(vcgt)
+GEN_COND_TESTS(vcge)
+GEN_COND_TESTS(vclt)
+GEN_COND_TESTS(vcle)
+GEN_COND_TESTS(vceq)
+
+/* Scan for expected outputs.  */
+/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcgt\.s16\[\]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcgt\.s32\[\]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcgt\.s16\[\]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcgt\.s32\[\]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcge\.s16\[\]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcge\.s32\[\]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcge\.s16\[

[PATCH] [arm] adjust tests for quotes around +cdecp

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Back when quotes were added around "+cdecp" in the "coproc must be
a constant immediate" error in arm-builtins.cc, tests for that message
lagged behind.  Fixed thusly.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.target/arm/acle/cde-errors.c: Adjust messages for quote
around +cdecp.
* gcc.target/arm/acle/cde-mve-error-2.c: Likewise.
---
 gcc/testsuite/gcc.target/arm/acle/cde-errors.c |   52 ++---
 .../gcc.target/arm/acle/cde-mve-error-2.c  |   82 ++--
 2 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/acle/cde-errors.c 
b/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
index 85a91666cd5ef..f38514848677e 100644
--- a/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
@@ -47,19 +47,19 @@ uint64_t test_cde (uint32_t n, uint32_t m)
   accum += __arm_cx3da (7, accum, n, m,   0); /* { dg-error 
{coprocessor 7 is not enabled with \+cdecp7} } */
 
   /* `coproc` out of range.  */
-  accum += __arm_cx1   (8,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx1a  (8, (uint32_t)accum,   0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx2   (8, n, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx2a  (8, (uint32_t)accum, n,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx3   (8, n, m,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx3a  (8, (uint32_t)accum, n, m, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-
-  accum += __arm_cx1d  (8,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx1da (8, accum, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx2d  (8, n, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx2da (8, accum, n,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx3d  (8, n, m,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx3da (8, accum, n, m,   0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
+  accum += __arm_cx1   (8,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx1a  (8, (uint32_t)accum,   0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx2   (8, n, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx2a  (8, (uint32_t)accum, n,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx3   (8, n, m,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx3a  (8, (uint32_t)accum, n, m, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+
+  accum += __arm_cx1d  (8,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx1da (8, accum, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx2d  (8, n, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx2da (8, accum, n,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx3d  (8, n, m,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx3da (8, accum, n, m,   0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
 
   /* `imm` out of range.  */
   accum += __arm_cx1   (0,8192); /* { dg-error 
{argument 2 to '__builtin_arm_cx1si' must be a constant immediate in range 
\[0-8191\]} } */
@@ 

RE: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

2023-02-16 Thread Li, Pan2 via Gcc-patches
Thank you all.

Hi Richard,

Could you please help to review the precision adjustment related change when 
you free? I am looking forward your option of this issue from the expert’s 
perspective, !

Pan

From: juzhe.zhong 
Sent: Thursday, February 16, 2023 11:23 PM
To: incarnation.p@outlook.com
Cc: gcc-patches@gcc.gnu.org; kito.ch...@sifive.com; rguent...@suse.de; Li, Pan2 

Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

Thanks for the great work to fix this issue for rvv.
Hi,richard. This is the patch to differentiate mask mode of same bytesize. 
Adjust the precision correctly according to rvv isa. Would you mind helping us 
with this patch ? Since it‘s very important for rvv support in gcc

Thanks.
 Replied Message 
From
incarnation.p@outlook.com
Date
02/16/2023 23:12
To
gcc-patches@gcc.gnu.org
Cc
juzhe.zh...@rivai.ai,
kito.ch...@sifive.com,
rguent...@suse.de,
pan2...@intel.com
Subject
[PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
From: Pan Li mailto:pan2...@intel.com>>

   Fix the bug of the rvv bool mode precision with the adjustment.
   The bits size of vbool*_t will be adjusted to
   [1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
   adjusted mode precison of vbool*_t will help underlying pass to
   make the right decision for both the correctness and optimization.

   Given below sample code:
   void test_1(int8_t * restrict in, int8_t * restrict out)
   {
 vbool8_t v2 = *(vbool8_t*)in;
 vbool16_t v5 = *(vbool16_t*)in;
 *(vbool16_t*)(out + 200) = v5;
 *(vbool8_t*)(out + 100) = v2;
   }

   Before the precision adjustment:
   addia4,a1,100
   vsetvli a5,zero,e8,m1,ta,ma
   addia1,a1,200
   vlm.v   v24,0(a0)
   vsm.v   v24,0(a4)
   // Need one vsetvli and vlm.v for correctness here.
   vsm.v   v24,0(a1)

   After the precision adjustment:
   csrrt0,vlenb
   sllit1,t0,1
   csrra3,vlenb
   sub sp,sp,t1
   sllia4,a3,1
   add a4,a4,sp
   sub a3,a4,a3
   vsetvli a5,zero,e8,m1,ta,ma
   addia2,a1,200
   vlm.v   v24,0(a0)
   vsm.v   v24,0(a3)
   addia1,a1,100
   vsetvli a4,zero,e8,mf2,ta,ma
   csrrt0,vlenb
   vlm.v   v25,0(a3)
   vsm.v   v25,0(a2)
   sllit1,t0,1
   vsetvli a5,zero,e8,m1,ta,ma
   vsm.v   v24,0(a1)
   add sp,sp,t1
   jr  ra

   However, there may be some optimization opportunates after
   the mode precision adjustment. It can be token care of in
   the RISC-V backend in the underlying separted PR(s).

   PR 108185
   PR 108654

gcc/ChangeLog:

   * config/riscv/riscv-modes.def (ADJUST_PRECISION):
   * config/riscv/riscv.cc (riscv_v_adjust_precision):
   * config/riscv/riscv.h (riscv_v_adjust_precision):
   * genmodes.cc (ADJUST_PRECISION):
   (emit_mode_adjustments):

gcc/testsuite/ChangeLog:

   * gcc.target/riscv/pr108185-1.c: New test.
   * gcc.target/riscv/pr108185-2.c: New test.
   * gcc.target/riscv/pr108185-3.c: New test.
   * gcc.target/riscv/pr108185-4.c: New test.
   * gcc.target/riscv/pr108185-5.c: New test.
   * gcc.target/riscv/pr108185-6.c: New test.
   * gcc.target/riscv/pr108185-7.c: New test.
   * gcc.target/riscv/pr108185-8.c: New test.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>
---
gcc/config/riscv/riscv-modes.def|  8 +++
gcc/config/riscv/riscv.cc   | 12 
gcc/config/riscv/riscv.h|  1 +
gcc/genmodes.cc | 25 ++-
gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +
12 files changed, 598 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index d5305efa8a6..110bddce851 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * 

[PATCH] -Wdangling-pointer: don't mark SSA lhs sets as stores

2023-02-16 Thread Alexandre Oliva via Gcc-patches


check_dangling_stores has some weirdnesses that causes its behavior to
change when the target ABI requires C++ ctors to return this: while
scanning stmts backwards in e.g. the AS ctor on a target that returns
this in ctors, the scan first encounters a copy of this to the SSA
name used to hold the return value.  m_ptr_query.get_ref resolves lhs
(the return SSA name) to the rhs (the default SSA name for this), does
not skip it because auto_var_p is false for SSA_NAMEs, and proceeds to
add it to stores, which seems to prevent later attempts to add stores
into *this from succeeding, which disables warnings that should have
triggered.

This is also the case when the backwards search finds unrelated stores
to other fields of *this before it reaches stores that IMHO should be
warned about.  The store found first disables checking of other
stores, as if the store appearing later in the code would necessarily
overwrite the store that should be warned about.  I've added an
xfailed variant of the existing test (struct An) that triggers this
problem, but I'm not sure how to go about fixing it.

Meanwhile, this patch prevents assignments with SSA_NAMEs in the lhs
from being regarded as stores, which is enough to remove the
undesirable side effect on -Wdangling-pointer of ABI-mandated ctors'
returning this.  Another variant of the existing test (struct Al) that
demonstrates the problem regardless of this aspect of the ABI, and
that gets the desired warning with the proposed patch, but not
without.

Curiously, this fix exposes yet another problem in
Wdangling-pointer-5.c: it is the return stmt of the unrelated pointer
p, not the store into possibly-overlapping *vpp2, that caused the
warning to not be issued for the store in *vpp1.  I'm not sure whether
we should or should not warn in that case, but this patch adjusts the
test to reflect the behavior change.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/ChangeLog

* gimple-ssa-warn-access.cc
(pass_waccess::check_dangling_stores): Skip non-stores.

for  gcc/testsuite/ChangeLog

* g++.dg/warn/Wdangling-pointer.C (warn_init_ref_member): Add
two new variants, one fixed, one xfailed.
* c-c++-common/Wdangling-pointer-5.c
(nowarn_store_arg_store_arg): Add now-expected warnings.
---
 gcc/gimple-ssa-warn-access.cc|3 ++
 gcc/testsuite/c-c++-common/Wdangling-pointer-5.c |4 ++-
 gcc/testsuite/g++.dg/warn/Wdangling-pointer.C|   29 +-
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
index 2eab1d59abd05..c0efb3fdb4e52 100644
--- a/gcc/gimple-ssa-warn-access.cc
+++ b/gcc/gimple-ssa-warn-access.cc
@@ -4511,7 +4511,8 @@ pass_waccess::check_dangling_stores (basic_block bb,
   use the escaped locals.  */
return;
 
-  if (!is_gimple_assign (stmt) || gimple_clobber_p (stmt))
+  if (!is_gimple_assign (stmt) || gimple_clobber_p (stmt)
+ || !gimple_store_p (stmt))
continue;
 
   access_ref lhs_ref;
diff --git a/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c 
b/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
index 2a165cea76768..cb6da9e86394d 100644
--- a/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
+++ b/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
@@ -75,9 +75,9 @@ void nowarn_store_arg_store (void **vpp)
 
 void* nowarn_store_arg_store_arg (void **vpp1, void **vpp2)
 {
-  int x;
+  int x;  // { dg-message "'x' declared here" }
   void **p = (void**)sink (0);
-  *vpp1 =  // warn here?
+  *vpp1 =  // { dg-warning "storing the address of local variable 
'x' in '\\*vpp1'" }
   *vpp2 = 0;  // might overwrite *vpp1
   return p;
 }
diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C 
b/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C
index 22c559e4adafe..a94477a647666 100644
--- a/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C
+++ b/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C
@@ -35,7 +35,34 @@ void warn_init_ref_member ()
 { }
   } ai;
 
-  sink (, );
+  struct Al
+  {
+const S 
+Al ():
+  // The temporary S object is destroyed when Al::Al() returns.
+  sref (S ())  // { dg-warning "storing the address" }
+{
+  // Copying this to an SSA_NAME used to disable the warning:
+  Al *ptr = this;
+  asm ("" : "+r" (ptr));
+}
+  } al;
+
+  struct An
+  {
+An *next;
+const S 
+An ():
+  next (0),
+  // The temporary S object is destroyed when An::An() returns.
+  sref (S ())  // { dg-warning "storing the address" "" { xfail *-*-* } }
+{
+  // ??? Writing to another part of *this disables the warning:
+  next = 0;
+}
+  } an;
+
+  sink (, , , );
 }
 
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain 

[PATCH] [arm] disable aes-1742098 mitigation for a72 combine tests

2023-02-16 Thread Alexandre Oliva via Gcc-patches


The expected asm output for aes-fuse-[12].c does not correspond to
that which is generated when -mfix-cortex-a57-aes-1742098 is enabled.
It was introduced after the test, and enabled by default for the
selected processor.  Disabling the option restores the circumstance
that was tested for.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.target/arm/aes-fuse-1.c: Add
-mno-fix-cortex-a57-aes-1742098.
* gcc.target/arm/aes-fuse-2.c: Likewise.
---
 gcc/testsuite/gcc.target/arm/aes-fuse-1.c |4 
 gcc/testsuite/gcc.target/arm/aes-fuse-2.c |4 
 2 files changed, 8 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/aes-fuse-1.c 
b/gcc/testsuite/gcc.target/arm/aes-fuse-1.c
index 27b08aeef7ba7..6ffb4991cca69 100644
--- a/gcc/testsuite/gcc.target/arm/aes-fuse-1.c
+++ b/gcc/testsuite/gcc.target/arm/aes-fuse-1.c
@@ -2,6 +2,10 @@
 /* { dg-require-effective-target arm_crypto_ok } */
 /* { dg-add-options arm_crypto } */
 /* { dg-additional-options "-mcpu=cortex-a72 -O3 -dp" } */
+/* The mitigation applies to a72 by default, and protects the CRYPTO_AES
+   inputs, such as the explicit xor ops, from being combined like test used to
+   expect.  */
+/* { dg-additional-options "-mno-fix-cortex-a57-aes-1742098" } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/arm/aes-fuse-2.c 
b/gcc/testsuite/gcc.target/arm/aes-fuse-2.c
index 1266a28753169..b72479c0e5726 100644
--- a/gcc/testsuite/gcc.target/arm/aes-fuse-2.c
+++ b/gcc/testsuite/gcc.target/arm/aes-fuse-2.c
@@ -2,6 +2,10 @@
 /* { dg-require-effective-target arm_crypto_ok } */
 /* { dg-add-options arm_crypto } */
 /* { dg-additional-options "-mcpu=cortex-a72 -O3 -dp" } */
+/* The mitigation applies to a72 by default, and protects the CRYPTO_AES
+   inputs, such as the explicit xor ops, from being combined like test used to
+   expect.  */
+/* { dg-additional-options "-mno-fix-cortex-a57-aes-1742098" } */
 
 #include 
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [vxworks] make wint_t and wchar_t the same distinct type

2023-02-16 Thread Alexandre Oliva via Gcc-patches


We used to define WINT_TYPE to WCHAR_TYPE, so that both wint_t and
wchar_t mapped to the same underlying type, but this caused a glitch
in Wstringop-overflow-6.C: on vxworks, wint_t is typedef'ed to
wchar_t, headers got included in the test that declared functions that
take wint_t parameters, and those conflicted with the builtin
declarations that had wint_t mapped to the underlying integral type.

The problem is that, in C++, wchar_t is a distinct type.  Having
wint_t be a typedef to wchar_t in the headers, but a typedef to
wchar_t's underlying integral type in builtins, makes for mismatches
between the declarations.

This patch defines WINT_TYPE to "wchar_t" for vxworks, and adjusts the
fallout, namely:

- since wchar_t may not have been defined yet when
  c_common_nodes_and_builtins runs, use the node already reserved for
  wchar_t for wint_t when WINT_TYPE is defined to wchar_t.

- for the same reason, when WINT_TYPE is wchar_t and we're not
  compiling C++ where wchar_t is a compiler built-in, define
  __WINT_TYPE__ to WCHAR_TYPE rather than WINT_TYPE, because wchar_t
  may not even be defined in the translation unit.

- recognize and handle wchar_type_node when type_suffix is called for
  wint_type_node.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/ChangeLog

* config/vx-common.h (WINT_TYPE): Alias to "wchar_t".

for  gcc/c-family/ChangeLog

* c-common.cc (c_common_nodes_and_builtins): Take
wchar_type_node for wint_type_node when aliased.
(c_stddef_cpp_builtins): Define __WINT_TYPE__, when aliased to
wchar_t, to the underlying type rather than wchar_t in
non-C++.
* c-cppbuiltin.cc (type_suffix): Handle wchar_type_node.
---
 gcc/c-family/c-common.cc |   16 +---
 gcc/c-family/c-cppbuiltin.cc |2 ++
 gcc/config/vx-common.h   |2 +-
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index ae92cd5adaf5e..a92597c2f544f 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -4576,8 +4576,11 @@ c_common_nodes_and_builtins (void)
   char32_array_type_node
 = build_array_type (char32_type_node, array_domain_type);
 
-  wint_type_node =
-TREE_TYPE (identifier_global_value (get_identifier (WINT_TYPE)));
+  if (strcmp (WINT_TYPE, "wchar_t") == 0)
+wint_type_node = wchar_type_node;
+  else
+wint_type_node =
+  TREE_TYPE (identifier_global_value (get_identifier (WINT_TYPE)));
 
   intmax_type_node =
 TREE_TYPE (identifier_global_value (get_identifier (INTMAX_TYPE)));
@@ -5359,7 +5362,14 @@ c_stddef_cpp_builtins(void)
   builtin_define_with_value ("__SIZE_TYPE__", SIZE_TYPE, 0);
   builtin_define_with_value ("__PTRDIFF_TYPE__", PTRDIFF_TYPE, 0);
   builtin_define_with_value ("__WCHAR_TYPE__", MODIFIED_WCHAR_TYPE, 0);
-  builtin_define_with_value ("__WINT_TYPE__", WINT_TYPE, 0);
+  /* C++ has wchar_t as a builtin type, C doesn't, so if WINT_TYPE
+ maps to wchar_t, define it to the underlying WCHAR_TYPE in C, and
+ to wchar_t in C++, so the desired type equivalence holds.  */
+  if (!c_dialect_cxx ()
+  && strcmp (WINT_TYPE, "wchar_t") == 0)
+builtin_define_with_value ("__WINT_TYPE__", WCHAR_TYPE, 0);
+  else
+builtin_define_with_value ("__WINT_TYPE__", WINT_TYPE, 0);
   builtin_define_with_value ("__INTMAX_TYPE__", INTMAX_TYPE, 0);
   builtin_define_with_value ("__UINTMAX_TYPE__", UINTMAX_TYPE, 0);
   if (flag_char8_t)
diff --git a/gcc/c-family/c-cppbuiltin.cc b/gcc/c-family/c-cppbuiltin.cc
index b333f97fd3237..98f5aef2af95d 100644
--- a/gcc/c-family/c-cppbuiltin.cc
+++ b/gcc/c-family/c-cppbuiltin.cc
@@ -1903,6 +1903,8 @@ type_suffix (tree type)
  systems use it anyway.  */
   || type == char_type_node)
 is_long = 0;
+  else if (type == wchar_type_node)
+return type_suffix (underlying_wchar_type_node);
   else
 gcc_unreachable ();
 
diff --git a/gcc/config/vx-common.h b/gcc/config/vx-common.h
index 83580d0dec288..9733c90fe4c6f 100644
--- a/gcc/config/vx-common.h
+++ b/gcc/config/vx-common.h
@@ -69,7 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef WINT_TYPE_SIZE
 #define WINT_TYPE_SIZE WCHAR_TYPE_SIZE
 #undef WINT_TYPE
-#define WINT_TYPE WCHAR_TYPE
+#define WINT_TYPE "wchar_t"
 
 /* -- Debug and unwind info formats --  */
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] Accept pmf-vbit-in-delta extra warning

2023-02-16 Thread Alexandre Oliva via Gcc-patches


cp_build_binary_op, that issues -Waddress warnings, issues an extra
warning on arm targets, that g++.dg/warn/Waddress-5.C does not expect
when comparing a pointer-to-member-function literal with null.

The reason for the extra warning is that, on arm targets,
TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_delta, which
causes a different path to be taken, that extracts the
pointer-to-function and the delta fields (minus the vbit) and compares
each one with zero.  It's when comparing this pointer-to-function with
zero, in a recursive cp_build_binary_op, that another warning is
issued.

I suppose there should be a way to skip the warning in this recursive
call, without disabling other warnings that might be issued there, but
this patch only arranges for the test to tolerate the extra warning.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* g++.dg/warn/Waddress-5.C: Tolerate extra -Waddress warning.
---
 gcc/testsuite/g++.dg/warn/Waddress-5.C |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/warn/Waddress-5.C 
b/gcc/testsuite/g++.dg/warn/Waddress-5.C
index b1287b2fac316..1de88076f7767 100644
--- a/gcc/testsuite/g++.dg/warn/Waddress-5.C
+++ b/gcc/testsuite/g++.dg/warn/Waddress-5.C
@@ -23,7 +23,11 @@ void T (bool);
 void warn_memptr_if ()
 {
   // Exercise warnings for addresses of nonstatic member functions.
-  if (::f == 0) // { dg-warning "the address '::f'" }
+  // On targets with TARGET_PTRMEMFUNC_VBIT_LOCATION ==
+  // ptrmemfunc_vbit_in_delta, cp_build_binary_op recurses to compare
+  // the pfn from the ptrmemfunc with null, so we get two warnings.
+  // This matches both.  ??? Should we disable one of them?
+  if (::f == 0) // { dg-warning "A::f" }
 T (0);
 
   if (::vf) // { dg-warning "-Waddress" }

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] Drop need for constant I in ctf test

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Though I is supposed to be a constant expression, this is not the case
on vxworks, but this is not what this debug information format test is
testing for, so use real constants to initialize complex variables.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.dg/debug/ctf/ctf-complex-1.c: Do not test whether I is
usable in initializers.
---
 gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c 
b/gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c
index a36dd9b6b90a9..e6c3199f913d7 100644
--- a/gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c
+++ b/gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c
@@ -14,8 +14,8 @@
 
 #include 
 
-double complex z1 = I * I;
+double complex z1 = -1;
 
-const long double complex z2 = I * I;
+const long double complex z2 = -1;
 
-float complex z4 = 1+2.11*I;
+float complex z4 = 1;

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] Skip module_cmi_p and related unsupported module test

2023-02-16 Thread Alexandre Oliva via Gcc-patches


When a multi-source module is found to be unsupported, we fail
module_cmi_p and subsequent sources.  Override proc unsupported to
mark the result in module_do, and test it to skip module_cmp_p and
subsequent related tests.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* g++.dg/modules/modules.exp: Override unsupported to update
module_do, and test it after dg-test.
---
 gcc/testsuite/g++.dg/modules/modules.exp |   14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/testsuite/g++.dg/modules/modules.exp 
b/gcc/testsuite/g++.dg/modules/modules.exp
index 61994b059457b..ba1287427bf05 100644
--- a/gcc/testsuite/g++.dg/modules/modules.exp
+++ b/gcc/testsuite/g++.dg/modules/modules.exp
@@ -315,6 +315,14 @@ proc module-check-requirements { tests } {
 # cleanup any detritus from previous run
 cleanup_module_files [find $DEFAULT_REPO *.gcm]
 
+set module_do {"compile" "P"}
+rename unsupported saved-unsupported
+proc unsupported { args } {
+global module_do
+lset module_do 1 "N"
+return [saved-unsupported $args]
+}
+
 # not grouped tests, sadly tcl doesn't have negated glob
 foreach test [prune [lsort [find $srcdir/$subdir {*.[CH]}]] \
  "$srcdir/$subdir/*_?.\[CH\]"] {
@@ -327,6 +335,9 @@ foreach test [prune [lsort [find $srcdir/$subdir {*.[CH]}]] 
\
set module_cmis {}
verbose "Testing $nshort $std" 1
dg-test $test "$std" $DEFAULT_MODFLAGS
+   if { [lindex $module_do 1] == "N" } {
+   continue
+   }
set testcase [string range $test [string length "$srcdir/"] end]
cleanup_module_files [module_cmi_p $testcase $module_cmis]
}
@@ -372,6 +383,9 @@ foreach src [lsort [find $srcdir/$subdir {*_a.[CHX}]] {
}
}
dg-test -keep-output $test "$std" $DEFAULT_MODFLAGS
+   if { [lindex $module_do 1] == "N" } {
+   break
+   }
set testcase [string range $test [string length "$srcdir/"] 
end]
lappend mod_files [module_cmi_p $testcase $module_cmis]
}

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PR100127] Test for coroutine header in clang-compatible tests

2023-02-16 Thread Alexandre Oliva via Gcc-patches


The test is compatible with clang as well as gcc, but ISTM that
testing for the __clang__ macro is just as potentially error-prone as
macros that used to be GCC-specific are now defined in compilers that
aim for GCC compatibility.  Use a __has_include feature test instead.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

PR c++/100127
* g++.dg/coroutines/pr100127.C: Test for header rather than
compiler macro.
---
 gcc/testsuite/g++.dg/coroutines/pr100127.C   |2 +-
 gcc/testsuite/g++.dg/coroutines/pr100772-a.C |2 +-
 gcc/testsuite/g++.dg/coroutines/pr100772-b.C |2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/coroutines/pr100127.C 
b/gcc/testsuite/g++.dg/coroutines/pr100127.C
index 374cd710077af..1eaa72ff0acdd 100644
--- a/gcc/testsuite/g++.dg/coroutines/pr100127.C
+++ b/gcc/testsuite/g++.dg/coroutines/pr100127.C
@@ -1,4 +1,4 @@
-#ifdef __clang__
+#if __has_include() // for __clang__
 #include 
 namespace std {
   using namespace std::experimental;
diff --git a/gcc/testsuite/g++.dg/coroutines/pr100772-a.C 
b/gcc/testsuite/g++.dg/coroutines/pr100772-a.C
index a325d384fc390..724c377c82e5b 100644
--- a/gcc/testsuite/g++.dg/coroutines/pr100772-a.C
+++ b/gcc/testsuite/g++.dg/coroutines/pr100772-a.C
@@ -1,5 +1,5 @@
 //  { dg-additional-options "-fsyntax-only " }
-#ifdef __clang__
+#if __has_include() // for __clang__
 #include 
 namespace std {
   using namespace std::experimental;
diff --git a/gcc/testsuite/g++.dg/coroutines/pr100772-b.C 
b/gcc/testsuite/g++.dg/coroutines/pr100772-b.C
index 6cdf8d1e529e5..4cf31e5f9e0c2 100644
--- a/gcc/testsuite/g++.dg/coroutines/pr100772-b.C
+++ b/gcc/testsuite/g++.dg/coroutines/pr100772-b.C
@@ -1,4 +1,4 @@
-#ifdef __clang__
+#if __has_include() // for __clang__
 #include 
 namespace std {
   using namespace std::experimental;

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[libstdc++] Use __gthread_join in jthread/95989

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Ref: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570617.html

Bernd Edlinger  reported that the 95989.cc
test fails without pthread_join at the end of main, but pthread_join
is no good for a test that doesn't require pthreads.

This patch adds a __gthread_join call instead.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  libstdc++-v3/ChangeLog

* testsuite/30_threads/jthread/95989.cc (main): Call
__gthread_join at the end.
---
 libstdc++-v3/testsuite/30_threads/jthread/95989.cc |1 +
 1 file changed, 1 insertion(+)

diff --git a/libstdc++-v3/testsuite/30_threads/jthread/95989.cc 
b/libstdc++-v3/testsuite/30_threads/jthread/95989.cc
index e98836d094531..407b52748438c 100644
--- a/libstdc++-v3/testsuite/30_threads/jthread/95989.cc
+++ b/libstdc++-v3/testsuite/30_threads/jthread/95989.cc
@@ -52,4 +52,5 @@ main()
   test01();
   test02();
   test03();
+  __gthread_join(0, NULL);
 }

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [arm] xfail fp-uint64-convert-double-* on all arm targets

2023-02-16 Thread Alexandre Oliva via Gcc-patches


It wasn't long ago that I xfailed these tests on arm-*-eabi, but the
fail is expected on all other arm targets: even when hard float is
available, conversions between 64-bit integers and double are always
emulated on ARM, and the emulation disregards rounding modes.  So,
bump the xfail to all of arm-*-*.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.dg/torture/fp-uint64-convert-double-1.c: XFAIL on all of
arm-*-*.
* gcc.dg/torture/fp-uint64-convert-double-2.c: Likewise.
---
 .../gcc.dg/torture/fp-uint64-convert-double-1.c|2 +-
 .../gcc.dg/torture/fp-uint64-convert-double-2.c|2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-1.c 
b/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-1.c
index 61cfa96374631..8f437e2efb1db 100644
--- a/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-1.c
+++ b/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-1.c
@@ -1,5 +1,5 @@
 /* PR84407 */
-/* { dg-do run { xfail { arm-*-eabi* } } } */
+/* { dg-do run { xfail { arm-*-* } } } */
 /* { dg-require-effective-target fenv } */
 /* { dg-require-effective-target hard_float } */
 /* { dg-additional-options "-frounding-math -fexcess-precision=standard" } */
diff --git a/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-2.c 
b/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-2.c
index b32b28a329580..fd3f4cbfb830f 100644
--- a/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-2.c
+++ b/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-2.c
@@ -1,5 +1,5 @@
 /* PR84407 */
-/* { dg-do run { xfail { arm-*-eabi* } } } */
+/* { dg-do run { xfail { arm-*-* } } } */
 /* { dg-require-effective-target fenv } */
 /* { dg-require-effective-target hard_float } */
 /* { dg-additional-options "-frounding-math" } */


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[libstdc++] [testsuite] intro/names.cc: undef func on vxw7krn

2023-02-16 Thread Alexandre Oliva via Gcc-patches


The '#define func' added in 2021, to test that system headers don't
violate the user namespace, exposes such a bug in the vxworks sysLib.h
header, so add yet another such annotated workaround.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  libstdc++-v3/ChangeLog

* testsuite/17_intro/names.cc: Undef func on vxworks >= 7 in
kernel mode.
---
 libstdc++-v3/testsuite/17_intro/names.cc |2 ++
 1 file changed, 2 insertions(+)

diff --git a/libstdc++-v3/testsuite/17_intro/names.cc 
b/libstdc++-v3/testsuite/17_intro/names.cc
index d3e0db9bab6b9..c2d67ebe01276 100644
--- a/libstdc++-v3/testsuite/17_intro/names.cc
+++ b/libstdc++-v3/testsuite/17_intro/names.cc
@@ -329,6 +329,8 @@
 #undef d
 #undef e
 #undef f
+// in sysLib.h, func appears as a formal parameter name
+#undef func
 #endif // __RTP__
 
 #endif // VxWorks Major >= 7

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[arm] [testsuite] asm-flag-4.c: match quotes in expected message

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Quotes were added around the "asm" keyword in the message expected by
the test, so the test needs adjusting.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).
Ok to install?


for  gcc/testsuite/ChangeLog

* gcc.target/arm/asm-flag-4.c: Match quotes around "asm" in
message.
---
 gcc/testsuite/gcc.target/arm/asm-flag-4.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-4.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-4.c
index 75378cc89b092..6841b6ea1e272 100644
--- a/gcc/testsuite/gcc.target/arm/asm-flag-4.c
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-4.c
@@ -11,5 +11,5 @@ void __attribute__((target("arm"))) f(char *out)
 
 void __attribute__((target("thumb"))) g(char *out)
 {
-  asm("" : "=@ccne"(out[0]));  /* { dg-message "asm flags not supported" } */
+  asm("" : "=@ccne"(out[0]));  /* { dg-message ".asm. flags not supported" } */
 }


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[Bug testsuite/108810] gcc.target/powerpc/fold-vec-extract-double.p9.c fails on power 9 BE

2023-02-16 Thread linkw at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108810

Kewen Lin  changed:

   What|Removed |Added

   Last reconfirmed||2023-02-17
 Status|UNCONFIRMED |NEW
  Component|target  |testsuite
 CC||linkw at gcc dot gnu.org
 Ever confirmed|0   |1

--- Comment #1 from Kewen Lin  ---
This is an test case issue:

double
testd_cst (vector double vd2)
{
  return vec_extract (vd2, 1);
}

got xxlor on LE but xxpermdi on BE.

The scan insn xxlor is for the high order element, the index is 0 on BE and 1
on LE.

The below diff can fix it:

diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p9.c
b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p9.c
index 6c515035d1a..100f680fd02 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p9.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p9.c
@@ -18,9 +18,15 @@ testd_var (vector double vd2, signed int si)
   return vec_extract (vd2, si);
 }

+#ifdef __BIG_ENDIAN__
+#define HIGH_ORDER_ELEMENT_INDEX 0
+#else
+#define HIGH_ORDER_ELEMENT_INDEX 1
+#endif
+
 double
 testd_cst (vector double vd2)
 {
-  return vec_extract (vd2, 1);
+  return vec_extract (vd2, HIGH_ORDER_ELEMENT_INDEX);
 }

Re: C++ modules and AAPCS/ARM EABI clash on inline key methods

2023-02-16 Thread Alexandre Oliva via Gcc-patches
On Apr  5, 2022, Alexandre Oliva  wrote:

> Would something like this be acceptable/desirable?  It's overreaching,
> in that not all arm platforms are expected to fail, but the result on
> them will be an unexpected pass, which is not quite as bad as the
> unexpected fail we get on most arm variants now.

Ping?
https://gcc.gnu.org/pipermail/gcc-patches/2022-April/592763.html

[PR105224] C++ modules and AAPCS/ARM EABI clash on inline key methods

g++.dg/modules/virt-2_a.C fails on arm-eabi and many other arm targets
that use the AAPCS variant.  ARM is the only target that overrides
TARGET_CXX_KEY_METHOD_MAY_BE_INLINE.  It's not clear to me which way
the clash between AAPCS and C++ Modules design should be resolved, but
currently it favors AAPCS and thus the test fails.

Skipping the test or conditionally dropping the inline keyword breaks
subsequent tests, so I'm XFAILing the expectation that vtable and rtti
symbols are output on arm*-*-*.

Retested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?


for  gcc/testsuite/ChangeLog

PR c++/105224
* g++.dg/modules/virt-2_a.C: XFAIL syms on arm*-*-*.
---
 gcc/testsuite/g++.dg/modules/virt-2_a.C |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/modules/virt-2_a.C 
b/gcc/testsuite/g++.dg/modules/virt-2_a.C
index 580552be5a0d8..b265515e2c7fd 100644
--- a/gcc/testsuite/g++.dg/modules/virt-2_a.C
+++ b/gcc/testsuite/g++.dg/modules/virt-2_a.C
@@ -22,6 +22,6 @@ export int Visit (Visitor *v)
 }
 
 // Emit here
-// { dg-final { scan-assembler {_ZTVW3foo7Visitor:} } }
-// { dg-final { scan-assembler {_ZTIW3foo7Visitor:} } }
-// { dg-final { scan-assembler {_ZTSW3foo7Visitor:} } }
+// { dg-final { scan-assembler {_ZTVW3foo7Visitor:} { xfail arm*-*-* } } }
+// { dg-final { scan-assembler {_ZTIW3foo7Visitor:} { xfail arm*-*-* } } }
+// { dg-final { scan-assembler {_ZTSW3foo7Visitor:} { xfail arm*-*-* } } }


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[Bug target/108814] gcc.target/powerpc/pr79251-run.p9.c fails on power 9 BE

2023-02-16 Thread linkw at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108814

Kewen Lin  changed:

   What|Removed |Added

 Resolution|--- |DUPLICATE
 Status|UNCONFIRMED |RESOLVED
 CC||linkw at gcc dot gnu.org

--- Comment #1 from Kewen Lin  ---
Dup.

*** This bug has been marked as a duplicate of bug 108807 ***

[Bug target/108807] [11/12/13 regression] gcc.target/powerpc/vsx-builtin-10d.c fails after r11-6857-gb29225597584b6 on power 9 BE

2023-02-16 Thread linkw at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108807

--- Comment #3 from Kewen Lin  ---
*** Bug 108814 has been marked as a duplicate of this bug. ***

Re: [PING 2] [PATCH] swap: Fix incorrect lane extraction by vec_extract() [PR106770]

2023-02-16 Thread Surya Kumari Jangala via Gcc-patches
Ping. Please review the patch.

On 12/01/23 10:21 pm, Surya Kumari Jangala via Gcc-patches wrote:
> Ping
> 
> On 04/01/23 1:58 pm, Surya Kumari Jangala via Gcc-patches wrote:
>> swap: Fix incorrect lane extraction by vec_extract() [PR106770]
>>
>> In the routine rs6000_analyze_swaps(), special handling of swappable
>> instructions is done even if the webs that contain the swappable
>> instructions are not optimized, i.e., the webs do not contain any
>> permuting load/store instructions along with the associated register
>> swap instructions. Doing special handling in such webs will result in
>> the extracted lane being adjusted unnecessarily for vec_extract.
>>
>> Modifying swappable instructions is also incorrect in webs where
>> loads/stores on quad word aligned addresses are changed to lvx/stvx.
>> Similarly, in webs where swap(load(vector constant)) instructions are
>> replaced with load(swapped vector constant), the swappable
>> instructions should not be modified.
>>
>> 2023-01-04  Surya Kumari Jangala  
>>
>> gcc/
>>  PR rtl-optimization/106770
>>  * rs6000-p8swap.cc (rs6000_analyze_swaps): .
>>
>> gcc/testsuite/
>>  PR rtl-optimization/106770
>>  * gcc.target/powerpc/pr106770.c: New test.
>> ---
>>
>> diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
>> b/gcc/config/rs6000/rs6000-p8swap.cc
>> index 19fbbfb67dc..7ed39251df9 100644
>> --- a/gcc/config/rs6000/rs6000-p8swap.cc
>> +++ b/gcc/config/rs6000/rs6000-p8swap.cc
>> @@ -179,6 +179,9 @@ class swap_web_entry : public web_entry_base
>>unsigned int special_handling : 4;
>>/* Set if the web represented by this entry cannot be optimized.  */
>>unsigned int web_not_optimizable : 1;
>> +  /* Set if the web represented by this entry has been optimized, ie,
>> + register swaps of permuting loads/stores have been removed.  */
>> +  unsigned int web_is_optimized : 1;
>>/* Set if this insn should be deleted.  */
>>unsigned int will_delete : 1;
>>  };
>> @@ -2627,22 +2630,43 @@ rs6000_analyze_swaps (function *fun)
>>/* For each load and store in an optimizable web (which implies
>>   the loads and stores are permuting), find the associated
>>   register swaps and mark them for removal.  Due to various
>> - optimizations we may mark the same swap more than once.  Also
>> - perform special handling for swappable insns that require it.  */
>> + optimizations we may mark the same swap more than once. Fix up
>> + the non-permuting loads and stores by converting them into
>> + permuting ones.  */
>>for (i = 0; i < e; ++i)
>>  if ((insn_entry[i].is_load || insn_entry[i].is_store)
>>  && insn_entry[i].is_swap)
>>{
>>  swap_web_entry* root_entry
>>= (swap_web_entry*)((_entry[i])->unionfind_root ());
>> -if (!root_entry->web_not_optimizable)
>> +if (!root_entry->web_not_optimizable) {
>>mark_swaps_for_removal (insn_entry, i);
>> +  root_entry->web_is_optimized = true;
>> +}
>>}
>> -else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
>> +else if (insn_entry[i].is_swappable
>> + && (insn_entry[i].special_handling == SH_NOSWAP_LD ||
>> + insn_entry[i].special_handling == SH_NOSWAP_ST))
>> +  {
>> +swap_web_entry* root_entry
>> +  = (swap_web_entry*)((_entry[i])->unionfind_root ());
>> +if (!root_entry->web_not_optimizable) {
>> +  handle_special_swappables (insn_entry, i);
>> +  root_entry->web_is_optimized = true;
>> +}
>> +  }
>> +
>> +  /* Perform special handling for swappable insns that require it. 
>> + Note that special handling should be done only for those 
>> + swappable insns that are present in webs optimized above.  */
>> +  for (i = 0; i < e; ++i)
>> +if (insn_entry[i].is_swappable && insn_entry[i].special_handling &&
>> +!(insn_entry[i].special_handling == SH_NOSWAP_LD || 
>> +  insn_entry[i].special_handling == SH_NOSWAP_ST))
>>{
>>  swap_web_entry* root_entry
>>= (swap_web_entry*)((_entry[i])->unionfind_root ());
>> -if (!root_entry->web_not_optimizable)
>> +if (root_entry->web_is_optimized)
>>handle_special_swappables (insn_entry, i);
>>}
>>  
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106770.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr106770.c
>> new file mode 100644
>> index 000..84e9aead975
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106770.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target powerpc_p8vector_ok } */
>> +/* { dg-options "-mdejagnu-cpu=power8 -O3 " } */
>> +/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */
>> +
>> +/* Test case to resolve PR106770  */
>> +
>> +#include 
>> +
>> +int cmp2(double a, double b)
>> +{
>> +vector double va = vec_promote(a, 1);
>> +vector double vb = vec_promote(b, 1);
>> +vector long long vlt = (vector 

Re: [PATCH v6] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-02-16 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/02/16 7:18, Max Filippov wrote:
> Hi Suwa-san,

Hi!

> 
> On Thu, Jan 26, 2023 at 7:17 PM Takayuki 'January June' Suwa
>  wrote:
>>
>> In the case of the CALL0 ABI, values that must be retained before and
>> after function calls are placed in the callee-saved registers (A12
>> through A15) and referenced later.  However, it is often the case that
>> the save and the reference are each only once and a simple register-
>> register move (with two exceptions; i. the register saved to/restored
>> from is the stack pointer, ii. the function needs an additional stack
>> pointer adjustment to grow the stack).
>>
>> e.g. in the following example, if there are no other occurrences of
>> register A14:
>>
>> ;; before
>> ; prologue {
>>   ...
>> s32i.n  a14, sp, 16
>>   ...   ;; no frame pointer needed
>> ;; no additional stack growth
>> ; } prologue
>>   ...
>> mov.n   a14, a6 ;; A6 is not SP
>>   ...
>> call0   foo
>>   ...
>> mov.n   a8, a14 ;; A8 is not SP
>>   ...
>> ; epilogue {
>>   ...
>> l32i.n  a14, sp, 16
>>   ...
>> ; } epilogue
>>
>> It can be possible like this:
>>
>> ;; after
>> ; prologue {
>>   ...
>> (no save needed)
>>   ...
>> ; } prologue
>>   ...
>> s32i.n  a6, sp, 16  ;; replaced with A14's slot
>>   ...
>> call0   foo
>>   ...
>> l32i.n  a8, sp, 16  ;; through SP
>>   ...
>> ; epilogue {
>>   ...
>> (no restoration needed)
>>   ...
>> ; } epilogue
>>
>> This patch adds the abovementioned logic to the function prologue/epilogue
>> RTL expander code.
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.cc (machine_function): Add new member
>> 'eliminated_callee_saved_bmp'.
>> (xtensa_can_eliminate_callee_saved_reg_p): New function to
>> determine whether the register can be eliminated or not.
>> (xtensa_expand_prologue): Add invoking the above function and
>> elimination the use of callee-saved register by using its stack
>> slot through the stack pointer (or the frame pointer if needed)
>> directly.
>> (xtensa_expand_prologue): Modify to not emit register restoration
>> insn from its stack slot if the register is already eliminated.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/xtensa/elim_callee_saved.c: New.
>> ---
>>  gcc/config/xtensa/xtensa.cc   | 132 ++
>>  .../gcc.target/xtensa/elim_callee_saved.c |  38 +
>>  2 files changed, 145 insertions(+), 25 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c
> 
> This version passes regression tests, but I still have a couple questions.
> 
>> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
>> index 3e2e22d4cbe..ff59c933d4d 100644
>> --- a/gcc/config/xtensa/xtensa.cc
>> +++ b/gcc/config/xtensa/xtensa.cc
>> @@ -105,6 +105,7 @@ struct GTY(()) machine_function
>>bool epilogue_done;
>>bool inhibit_logues_a1_adjusts;
>>rtx last_logues_a9_content;
>> +  HOST_WIDE_INT eliminated_callee_saved_bmp;
>>  };
>>
>>  static void xtensa_option_override (void);
>> @@ -3343,6 +3344,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, 
>> int flags)
>>  cfun->machine->last_logues_a9_content = GEN_INT (offset);
>>  }
>>
>> +static bool
>> +xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
>> +rtx_insn **p_insnS,
>> +rtx_insn **p_insnR)
>> +{
>> +  df_ref ref;
>> +  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
>> +  rtx pattern;
>> +
>> +  if (!optimize || !df || call_used_or_fixed_reg_p (regno))
>> +return false;
>> +
>> +  for (ref = DF_REG_DEF_CHAIN (regno);
>> +   ref; ref = DF_REF_NEXT_REG (ref))
>> +if (DF_REF_CLASS (ref) != DF_REF_REGULAR
>> +   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
>> +  continue;
>> +else if (GET_CODE (pattern = PATTERN (insn)) == SET
>> +&& REG_P (SET_DEST (pattern))
>> +&& REGNO (SET_DEST (pattern)) == regno
>> +&& REG_NREGS (SET_DEST (pattern)) == 1
>> +&& REG_P (SET_SRC (pattern))
>> +&& REGNO (SET_SRC (pattern)) != A1_REG)
> 
> Do I understand correctly that the check for A1 here and below is
> for the case when regno is a hard frame pointer and the function
> needs the frame pointer? If so, wouldn't it be better to check
> for it explicitly in the beginning?

I see.  But I can't be sure that the body of the function never saves and 
restores the stack pointer to another register if the function doesn't need the 
frame pointer.
Therefore, I think that the validity depends on the regtest.

> 
>> +  {
>> +   if (insnS)
>> + return false;
>> +   insnS = insn;
>> +   continue;
>> +  }
>> +else
>> +  

[Bug target/108807] [11/12/13 regression] gcc.target/powerpc/vsx-builtin-10d.c fails after r11-6857-gb29225597584b6 on power 9 BE

2023-02-16 Thread linkw at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108807

--- Comment #2 from Kewen Lin  ---
Created attachment 54478
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=54478=edit
untested patch

The lvsr and lvsl for generating permutation control vectors only works for LE
as the element ordering is different on LE and BE. The proposed patch is to fix
gen function for generating permutation control vectors by considering
endianness.

It can fix the exposed failures on vsx-builtin-{9,10,11,14,16,18}d.c, and a
full testing is ongoing.

Re: Missed warning (-Wuse-after-free)

2023-02-16 Thread Siddhesh Poyarekar

On 2023-02-16 10:15, David Malcolm via Gcc wrote:

I'm not convinced that it's useful to the end-user to warn about the
"use of q itself" case.


FWIW, -Wuse-after-free=3 already should do this:

At level 3, the warning also diagnoses uses of indeterminate pointers in 
equality expressions.  All uses of indeterminate pointers are undefined 
but equality tests sometimes appear after
calls to "realloc" as an attempt to determine whether the call resulted 
in relocating the object to a different address.  They are diagnosed at 
a separate level to aid legacy code gradually
transition to safe alternatives.  For example, the equality test in the 
function below is diagnosed at this level:


Jakub and I had discussed this in the context of _FORTIFY_SOURCE=3 
(which is anal about this and can break things) and we got pr#105217, 
but that is also a best-effort thing, not really a guarantee.


IMO the analyzer should go that extra mile and warn for the use of q 
itself and maybe deprecate -Wuse-after-free=3 in its favour.


Sid


[PATCHES, Committed] As obvious

2023-02-16 Thread Jerry DeLisle via Gcc-patches
Committed as obvious:

commit 061b13ed014ba0b6891800a5c7f852bf58e4d856
Author: Jerry DeLisle 
Date:   Thu Feb 16 18:13:56 2023 -0800

 Fortran Tests: Allow passing on mingw.

 gcc/testsuite/ChangeLog:

 * gfortran.dg/bind_c_array_params_2.f90: Add *-*-ming* to
dg-final.

and

commit f978585c2939691176ad8d3fa9c2e4e91ed18bf4 (HEAD -> master,
origin/master, origin/HEAD)
Author: Jerry DeLisle 
Date:   Thu Feb 16 19:29:44 2023 -0800

 Fortran test: Modify test cases to pass on mingw.

 gcc/testsuite/ChangeLog:

 * gfortran.dg/ISO_Fortran_binding_14.f90: Change example
function to
 CLOCK which is available on mingw as well as other platforms.
 * gfortran.dg/pr96486.f90: Change variable to PATH likewise.




Re: [PATCH] rs6000: Fix vector parity support [PR108699]

2023-02-16 Thread Kewen.Lin via Gcc-patches
Hi Segher,

Thanks for the comments!

on 2023/2/16 23:10, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Feb 16, 2023 at 08:06:02PM +0800, Kewen.Lin wrote:
>> on 2023/2/16 19:14, Segher Boessenkool wrote:
>>> On Thu, Feb 16, 2023 at 05:23:40PM +0800, Kewen.Lin wrote:
 This patch is to fix the handling with one more pre-insn
 vpopcntb.  It also fixes an oversight having V8HI in VEC_IP,
 replaces VParity with VEC_IP, and adjusts the existing
 UNSPEC_PARITY to a more meaningful name UNSPEC_PARITYB.
>>>
>>> Please don't do that.  UNSPEC_PARITYB is worse than UNSPEC_PARITY,
>>> even more so for the prtyw etc. instructions.
>>
>> I thought the scalar insns prty[wd] also operate on byte
>> (especially on the least significant bit in each byte),
>> PARITYB(yte) seems better ...
> 
> The scalar instruction does not include a "b" in the mnemonic, and it
> says nothing "byte" or "bit" in the instruction name either.  The
> existing name is simpler, less confusing, simply better.
> 
>>> You might want to express the vector parity insns separately, but then
>>> *do that*, don't rename the normal stuff as well, and use a more obvious
>>> name like UNSPEC_VPARITY please.
>>
>> I'll update for vector only.  Maybe it's better with UNSPEC_VPARITY*B*?
>> since the mnemonic has "b"(yte).
> 
> No, you are right that the semantics are pretty much the same.  Please
> just keep UNSPEC_PARITY everywhere.

OK, since it has UNSPEC, I would hope the reader can realize it's
different from RTL opcode parity and mainly operating on byte.  :)

> 
const vsll __builtin_altivec_vprtybd (vsll);
 -VPRTYBD parityv2di2 {}
 +VPRTYBD p9v_paritybv2di2 {}
>>>
>>> Why this?  Please keep the simpler names if at all possible.
>>
>> The bif would like to map with the vector parity byte insns
>> directly, the parity2 can't work here any more.
> 
> Ah, because it cannot use the expander here, it has to be a define_insn?

No, the above statement seems to cause some misunderstanding, let me clarify:
first, the built-in functions __builtin_altivec_vprtyb[wdq] require to be
mapped to hardware insns vprtyb[wdq] directly as the functions name show.
Before this patch, the standard pattern name parity2 expands to those
insns directly (wrongly), so it's fine to use those expanders here.  After
this patch, those expands get fixed to get parity for each vector element
(vpopcntb + vprtyb*), they are not valid to be used for expanding these
built-in functions (not 1-1 map any more), so this patch fixes it with
the correct name which maps to vprtyb*.

> Why is that?
> 
>> The name is updated from previous *p9v_parity2 (becoming
>> to a named define_insn), I noticed there are some names with
>> p8v_, p9v_, meant to keep it consistent with the context.
>> You want this to be simplified as parity*b*v2di2?
> 
> Without the "b".  But that would be better then, yes.  This is a great
> example why p9v_ in the name is not good: most users do not care at all
> what ISA version this insn first appeared in.

The name without "b" is standard pattern name, whose semantic doesn't align
with what these insns provide and we already have the matched expander with
it ("parity2"), so we can't use the name here :(.  As you felt a name
with "b" is better than "p9v_*", I'll go with "parityb" then.  :)

>>> Later patches can do all other things (also, not do this expand for
>>> TImode at all, ho hum).
>>
>> OK, I guess all the others are for next stage1. :)
> 
> Yes exactly.  And one (small, self-contained) thing per patch please.

Got it, thanks again!

BR,
Kewen


Re: Re: [PATCH V2 0/5] RISC-V: Implement Scalar Cryptography Extension

2023-02-16 Thread shihua
OK, I will send another one which remove riscv_scalar_crypto.h and update 
testcases with __builtin_riscv_XX


 -原始邮件-
 发件人: "Kito Cheng" 
 发送时间: 2023-02-16 21:28:34 (星期四)
 收件人: "Liao Shihua" 
 抄送: gcc-patches@gcc.gnu.org, jia...@iscas.ac.cn, m...@iki.fi, 
pal...@dabbelt.com, shiyul...@iscas.ac.cn, ben.marsh...@pqshield.com, 
christoph.muell...@vrull.eu
 主题: Re: [PATCH V2 0/5] RISC-V: Implement Scalar Cryptography Extension
 
 Hi Shihua:
 
 Thanks for your patches! This patch set is generally in good shape,
 but I would prefer to remove riscv_scalar_crypto.h at this moment
 since it's NOT standardized yet.
 
 Do you mind sending a new version of this patch set which does not
 include that and also update the testcases?
 
 
 
 On Thu, Feb 16, 2023 at 3:52 PM Liao Shihua  wrote:
 
  This series adds basic support for the Scalar Cryptography extensions:
  * Zbkb
  * Zbkc
  * Zbkx
  * Zknd
  * Zkne
  * Zknh
  * Zksed
  * Zksh
 
  The implementation follows the version Scalar Cryptography v1.0.0 of 
the specification,
  and the intrinsic of Scalar Cryptography extensions follows 
riscv-c-api
  which can be found here:
  https://github.com/riscv/riscv-crypto/releases/tag/v1.0.0-scalar
  https://github.com/riscv-non-isa/riscv-c-api-doc/pull/31
 
  It works by Wu Siyu and Liao Shihua .
 
  Liao Shihua (5):
Add prototypes for RISC-V Crypto built-in functions
Implement ZBKB, ZBKC and ZBKX extensions
Implement ZKND and ZKNE extensions
Implement ZKNH extensions
Implement ZKSH and ZKSED extensions
 
   gcc/config.gcc|   2 +-
   gcc/config/riscv/bitmanip.md  |  20 +-
   gcc/config/riscv/constraints.md   |   8 +
   gcc/config/riscv/crypto.md| 435 
++
   gcc/config/riscv/riscv-builtins.cc|  26 ++
   gcc/config/riscv/riscv-crypto.def |  94 
   gcc/config/riscv/riscv-ftypes.def |  10 +
   gcc/config/riscv/riscv.md |   4 +-
   gcc/config/riscv/riscv_scalar_crypto.h| 218 +
   gcc/testsuite/gcc.target/riscv/zbkb32.c   |  36 ++
   gcc/testsuite/gcc.target/riscv/zbkb64.c   |  28 ++
   gcc/testsuite/gcc.target/riscv/zbkc32.c   |  17 +
   gcc/testsuite/gcc.target/riscv/zbkc64.c   |  17 +
   gcc/testsuite/gcc.target/riscv/zbkx32.c   |  18 +
   gcc/testsuite/gcc.target/riscv/zbkx64.c   |  18 +
   gcc/testsuite/gcc.target/riscv/zknd32.c   |  18 +
   gcc/testsuite/gcc.target/riscv/zknd64.c   |  36 ++
   gcc/testsuite/gcc.target/riscv/zkne32.c   |  18 +
   gcc/testsuite/gcc.target/riscv/zkne64.c   |  30 ++
   gcc/testsuite/gcc.target/riscv/zknh-sha256.c  |  29 ++
   .../gcc.target/riscv/zknh-sha512-32.c |  43 ++
   .../gcc.target/riscv/zknh-sha512-64.c |  31 ++
   gcc/testsuite/gcc.target/riscv/zksed.c|  20 +
   gcc/testsuite/gcc.target/riscv/zksh.c |  19 +
   24 files changed, 1183 insertions(+), 12 deletions(-)
   create mode 100644 gcc/config/riscv/crypto.md
   create mode 100644 gcc/config/riscv/riscv-crypto.def
   create mode 100644 gcc/config/riscv/riscv_scalar_crypto.h
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zknd32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zknd64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zkne32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zkne64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zksed.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zksh.c
 
  --
  2.38.1.windows.1
 


[Bug tree-optimization/90838] Detect table-based ctz implementation

2023-02-16 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838

--- Comment #13 from Andrew Pinski  ---
(In reply to Gabriel Ravier from comment #12)
> It appears this new optimization is non-functional on trunk with x86-64...
> specifically on x86-64, too, on AArch64 it works just fine. So does that
> mean this bug should be re-opened or should a new bug be opened for that ?

It does work with -mbmi where the instruction which is used has a defined value
at 0.
You can open a new issue for improvement of the case for not supplying -mbmi .
That is CTZ_DEFINED_VALUE_AT_ZERO is non-2.

[Bug tree-optimization/90838] Detect table-based ctz implementation

2023-02-16 Thread gabravier at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838

Gabriel Ravier  changed:

   What|Removed |Added

 CC||gabravier at gmail dot com

--- Comment #12 from Gabriel Ravier  ---
It appears this new optimization is non-functional on trunk with x86-64...
specifically on x86-64, too, on AArch64 it works just fine. So does that mean
this bug should be re-opened or should a new bug be opened for that ?

Re: Missed warning (-Wuse-after-free)

2023-02-16 Thread Sam James via Gcc


> On 17 Feb 2023, at 01:05, Alejandro Colomar via Gcc  wrote:
> 
> On 2/17/23 02:04, Alejandro Colomar wrote:
>> [CC: Added those who contributed to the discussion in linux-man@,
>> and also the authors of N2861 for C2x]
> 
> [...]
> 
>> 
>> There was a discussion in linux-man@ some years ago, which now I realize it
>> didn't end up being applied (I thought we had applied a patch, but it seems 
>> we
>> didn't).  I'll check if we still need such a patch (and I guess we do, since
>> we're having this conversation).
> 
> I forgot to link:
> 
> 

See also 
https://siddhesh.in/posts/that-is-not-a-number-that-is-a-freed-object.html.


signature.asc
Description: Message signed with OpenPGP


Re: [PATCH] LoongArch: Fix multiarch tuple canonization

2023-02-16 Thread Lulu Cheng

Hi,

在 2023/2/15 下午6:42, WANG Xuerui 写道:

Hi,

On 2023/2/13 18:38, Xi Ruoyao wrote:

Multiarch tuple will be coded in file or directory names in
multiarch-aware distros, so one ABI should have only one multiarch
tuple.  For example, "--target=loongarch64-linux-gnu --with-abi=lp64s"
and "--target=loongarch64-linux-gnusf" should both set multiarch tuple
to "loongarch64-linux-gnusf".  Before this commit,
"--target=loongarch64-linux-gnu --with-abi=lp64s --disable-multilib"
will produce wrong result (loongarch64-linux-gnu).

A recent LoongArch psABI revision mandates "loongarch64-linux-gnu" to be
used for -mabi=lp64d (instead of "loongarch64-linux-gnuf64") for some
non-technical reason [1].  Note that we cannot make
"loongarch64-linux-gnuf64" an alias for "loongarch64-linux-gnu" because
to implement such an alias, we must create thousands of symlinks in the
distro and doing so would be completely unpractical.  This commit also
aligns GCC with the revision.

Tested by building cross compilers with --enable-multiarch and multiple
combinations of --target=loongarch64-linux-gnu*, --with-abi=lp64{s,f,d},
and --{enable,disable}-multilib; and run "xgcc --print-multiarch" then
manually verify the result with eyesight.

Ok for trunk and backport to releases/gcc-12?

[1]: https://github.com/loongson/LoongArch-Documentation/pull/80

gcc/ChangeLog:

* config.gcc (triplet_abi): Set its value based on $with_abi,
instead of $target.
(la_canonical_triplet): Set it after $triplet_abi is set
correctly.
* config/loongarch/t-linux (MULTILIB_OSDIRNAMES): Make the
multiarch tuple for lp64d "loongarch64-linux-gnu" (without
"f64" suffix).
---
  gcc/config.gcc   | 14 +++---
  gcc/config/loongarch/t-linux |  2 +-
  2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 067720ac795..c070e6ecd2e 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4889,20 +4889,16 @@ case "${target}" in
  case ${target} in
  loongarch64-*-*-*f64)
  abi_pattern="lp64d"
-    triplet_abi="f64"
  ;;
  loongarch64-*-*-*f32)
  abi_pattern="lp64f"
-    triplet_abi="f32"
  ;;
  loongarch64-*-*-*sf)
  abi_pattern="lp64s"
-    triplet_abi="sf"
  ;;
  loongarch64-*-*-*)
  abi_pattern="lp64[dfs]"
  abi_default="lp64d"
-    triplet_abi=""
  ;;
  *)
  echo "Unsupported target ${target}." 1>&2
@@ -4923,9 +4919,6 @@ case "${target}" in
    ;;
  esac
  - la_canonical_triplet="loongarch64-${triplet_os}${triplet_abi}"
-
-
  # Perform initial sanity checks on --with-* options.
  case ${with_arch} in
  "" | loongarch64 | la464) ;; # OK, append here.
@@ -4996,6 +4989,13 @@ case "${target}" in
  ;;
  esac
  +    case ${with_abi} in
+  "lp64d") triplet_abi="";;
+  "lp64f") triplet_abi="f32";;
+  "lp64s") triplet_abi="sf";;
+    esac
+ la_canonical_triplet="loongarch64-${triplet_os}${triplet_abi}"
+
  # Set default value for with_abiext (internal)
  case ${with_abiext} in
  "")
diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux
index 131c45fdced..e40da179203 100644
--- a/gcc/config/loongarch/t-linux
+++ b/gcc/config/loongarch/t-linux
@@ -40,7 +40,7 @@ ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),)
    MULTILIB_OSDIRNAMES = \
    mabi.lp64d=../lib64$\
-  $(call if_multiarch,:loongarch64-linux-gnuf64)
+  $(call if_multiarch,:loongarch64-linux-gnu)
    MULTILIB_OSDIRNAMES += \
    mabi.lp64f=../lib64/f32$\


Thanks for the quick patch; however Revy told me offline yesterday 
that this might conflict with things Debian side once this gets 
merged. He may have more details to share.


Adding him to CC -- you could keep him CC-ed on future changes that 
may impact distro packaging.


Thank you for your feedback.

This modification plan is determined by the operating system group. If 
there is any problem, you can describe it clearly.


If there is no problem, we will combine this patch.

Thanks!



[PATCH] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd

2023-02-16 Thread Jiufu Guo via Gcc-patches
Hi,

Compare with previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611823.html
This patch does not define new insn for mtvsrws, but use exit one.

As mentioned in PR108338, on p9, we could use mtvsrws to implement
the bitcast from SI#0 to SF (or lowpart DI to SF).

For code:
  *(long long*)buff = di;
  float f = *(float*)(buff);

We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
"mtvsrws 1,3 ; xscvspdpn 1,1".

This patch update this, and also enhance the bitcast from highpart
DI to SF.

Bootstrap and regtests pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu)


PR target/108338

gcc/ChangeLog:

* config/rs6000/predicates.md (lowpart_subreg_operator): New
define_predicate.
* config/rs6000/rs6000.md (any_rshift): New code_iterator.
(movsf_from_si): Update to generate mtvsrws.
(movsf_from_si2): Rename to...
(movsf_from_si2_): ... this.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr108338.c: New test.

---
 gcc/config/rs6000/predicates.md |  5 +++
 gcc/config/rs6000/rs6000.md | 34 +++--
 gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +
 3 files changed, 70 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..e57c9d99c6b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address"
   else
 return false;
 })
+
+(define_predicate "lowpart_subreg_operator"
+  (and (match_code "subreg")
+   (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
+   == SUBREG_BYTE (op)")))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4a7812fa592..74b1c9cee6a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8200,13 +8200,24 @@ (define_insn_and_split "movsf_from_si"
 {
   rtx op0 = operands[0];
   rtx op1 = operands[1];
-  rtx op2 = operands[2];
-  rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
 
-  /* Move SF value to upper 32-bits for xscvspdpn.  */
-  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
-  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
-  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  if (TARGET_P9_VECTOR)
+{
+  rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
+  emit_insn (gen_vsx_splat_v4si (op0_v, op1));
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+}
+  else
+{
+  rtx op2 = operands[2];
+  rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
+
+  /* Move SF value to upper 32-bits for xscvspdpn.  */
+  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+}
+
   DONE;
 }
   [(set_attr "length"
@@ -8219,18 +8230,19 @@ (define_insn_and_split "movsf_from_si"
"*,  *, p9v,   p8v,   *, *,
 p8v,p8v,   p8v,   *")])
 
+(define_code_iterator any_rshift [ashiftrt lshiftrt])
+
 ;; For extracting high part element from DImode register like:
 ;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
 ;; split it before reload with "and mask" to avoid generating shift right
 ;; 32 bit then shift left 32 bit.
-(define_insn_and_split "movsf_from_si2"
+(define_insn_and_split "movsf_from_si2_"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
(unspec:SF
-[(subreg:SI
-  (ashiftrt:DI
+[(match_operator:SI 3 "lowpart_subreg_operator"
+  [(any_rshift:DI
(match_operand:DI 1 "input_operand" "r")
-   (const_int 32))
-  0)]
+   (const_int 32))])]
 UNSPEC_SF_FROM_SI))
   (clobber (match_scratch:DI 2 "=r"))]
   "TARGET_NO_SF_SUBREG"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c 
b/gcc/testsuite/gcc.target/powerpc/pr108338.c
new file mode 100644
index 000..2438dc13f41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
@@ -0,0 +1,42 @@
+// { dg-do run }
+// { dg-options "-O2 -save-temps" }
+
+float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
+{
+  char buff[16];
+  *(long long*)buff = l;
+  float f = *(float*)(buff);
+  return f;
+}
+
+float  __attribute__ ((noipa)) sf_from_di_off4 (long long l)
+{
+  char buff[16];
+  *(long long*)buff = l;
+  float f = *(float*)(buff + 4);
+  return f; 
+}
+
+/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && 
has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && 
has_arch_pwr8 } } } } */
+
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { 
has_arch_pwr8 && { 

[wwwdocs] testing: Tweak the link to upstream FTensor (was: Anyone using FTensor to test GCC (or otherwise)?)

2023-02-16 Thread Gerald Pfeifer
On Tue, 14 Feb 2023, NightStrike wrote:
>> Alas http://www.wlandry.net/Projects/FTensor has been down for a while,
>> and there does not appear to be a new location?
> https://wlandry.net/Projects/FTensor/ works

Ah, indeed. Thank you! Somehow that must have been the one combination I 
did not try.

I pushed the little patch below.

Gerald

commit b74309c36e59105ef0d8e0d91a85a5bfa884e175
Author: Gerald Pfeifer 
Date:   Fri Feb 17 02:19:19 2023 +0100

Tweak the link to upstream FTensor.

diff --git a/htdocs/testing/testing-ftensor.html 
b/htdocs/testing/testing-ftensor.html
index 2e67b4d8..7b1f4675 100644
--- a/htdocs/testing/testing-ftensor.html
+++ b/htdocs/testing/testing-ftensor.html
@@ -11,7 +11,7 @@
 FTensor build and test guide
 
 This page is a guide to running the testing and timing programs for the
-http://www.wlandry.net/Projects/FTensor;>FTensor
+https://wlandry.net/Projects/FTensor;>FTensor
 tensor class library as part of GCC integration testing.
 
 Resource usage


[wwwdocs] testing: Tweak the link to upstream FTensor (was: Anyone using FTensor to test GCC (or otherwise)?)

2023-02-16 Thread Gerald Pfeifer
On Tue, 14 Feb 2023, NightStrike wrote:
>> Alas http://www.wlandry.net/Projects/FTensor has been down for a while,
>> and there does not appear to be a new location?
> https://wlandry.net/Projects/FTensor/ works

Ah, indeed. Thank you! Somehow that must have been the one combination I 
did not try.

I pushed the little patch below.

Gerald

commit b74309c36e59105ef0d8e0d91a85a5bfa884e175
Author: Gerald Pfeifer 
Date:   Fri Feb 17 02:19:19 2023 +0100

Tweak the link to upstream FTensor.

diff --git a/htdocs/testing/testing-ftensor.html 
b/htdocs/testing/testing-ftensor.html
index 2e67b4d8..7b1f4675 100644
--- a/htdocs/testing/testing-ftensor.html
+++ b/htdocs/testing/testing-ftensor.html
@@ -11,7 +11,7 @@
 FTensor build and test guide
 
 This page is a guide to running the testing and timing programs for the
-http://www.wlandry.net/Projects/FTensor;>FTensor
+https://wlandry.net/Projects/FTensor;>FTensor
 tensor class library as part of GCC integration testing.
 
 Resource usage


Re: Missed warning (-Wuse-after-free)

2023-02-16 Thread Alejandro Colomar via Gcc
On 2/17/23 02:04, Alejandro Colomar wrote:
> [CC: Added those who contributed to the discussion in linux-man@,
>  and also the authors of N2861 for C2x]

[...]

> 
> There was a discussion in linux-man@ some years ago, which now I realize it
> didn't end up being applied (I thought we had applied a patch, but it seems we
> didn't).  I'll check if we still need such a patch (and I guess we do, since
> we're having this conversation).

I forgot to link:


-- 

GPG key fingerprint: A9348594CE31283A826FBDD8D57633D441E25BB5


OpenPGP_signature
Description: OpenPGP digital signature


Re: Missed warning (-Wuse-after-free)

2023-02-16 Thread Alejandro Colomar via Gcc
[CC: Added those who contributed to the discussion in linux-man@,
 and also the authors of N2861 for C2x]

Hi David,

On 2/16/23 16:15, David Malcolm wrote:
> On Thu, 2023-02-16 at 15:35 +0100, Alejandro Colomar via Gcc wrote:
>> Hi!
>>
>> I was preparing an example program of a use-after-realloc bug,
>> when I found that GCC doesn't warn in a case where it should.
>>
>>
>> alx@debian:~/tmp$ cat realloc.c
>> #include 
>> #include 
>> #include 
>> #include 
>> #include 
>>
>> static inline char *
>> xstrdup(const char *s)
>> {
>> char  *p;
>>
>> p = strdup(s);
>> if (p == NULL)
>> exit(EXIT_FAILURE);
>> return p;
>> }
>>
>> static inline char *
>> strnul(const char *s)
>> {
>> return (char *) s + strlen(s);
>> }
>>
>> int
>> main(void)
>> {
>> char  *p, *q;
>>
>> p = xstrdup("");
>> q = strnul(p);
>>
>> if (p == q)
>> puts("equal before");
>> else
>> exit(EXIT_FAILURE); // It's an empty string; this
>> won't happen
>>
>> printf("p = %p; q = %p\n", p, q);
>>
>> p = realloc(p, UINT16_MAX);
>> if (p == NULL)
>> exit(EXIT_FAILURE);
>> puts("realloc()");
>>
>> if (p == q) {  // Use after realloc.  I'd expect a warning
>> here.
>> puts("equal after");
>> } else {
>> /* Can we get here?
>>    Let's see the options:
>>
>> - realloc(3) fails:
>> We exit immediately.  We don't arrive
>> here.
>>
>> - realloc(3) doesn't move the memory:
>> p == q, as before
>>
>> - realloc(3) moved the memory:
>> p is guaranteed to be a unique
>> pointer,
>> and q is now an invalid pointer.  It
>> is
>> Undefined Behavior to read `q`, so `p
>> == q`
>> is UB.
>>
>>    As we see, there's no _defined_ path where this
>> can happen
>>  */
>> printf("PID = %i\n", (int) getpid());
>> }
>>
>> printf("p = %p; q = %p\n", p, q);
>> }
>> alx@debian:~/tmp$ cc -Wall -Wextra realloc.c -O3 -fanalyzer
>> realloc.c: In function ‘main’:
>> realloc.c:67:9: warning: pointer ‘p’ may be used after ‘realloc’ [-
>> Wuse-after-free]
>>    67 | printf("p = %p; q = %p\n", p, q);
>>   | ^~~~
>> realloc.c:39:13: note: call to ‘realloc’ here
>>    39 | p = realloc(p, UINT16_MAX);
>>   | ^~
>> alx@debian:~/tmp$ ./a.out 
>> equal before
>> p = 0x55bff80802a0; q = 0x55bff80802a0
>> realloc()
>> PID = 25222
>> p = 0x55bff80806d0; q = 0x55bff80802a0
>>
>>
>> Did I miss anything?
> 
> GCC's -fanalyzer will warn if you dereference q, so e.g. adding:
>  printf("*q = %i\n", *q);
> gives a warning:
>   https://godbolt.org/z/6qx4afb3E
> 
> : In function 'main':
> :65:29: warning: use after 'free' of 'q' [CWE-416] 
> [-Wanalyzer-use-after-free]
>65 | printf("*q = %i\n", *q);
>   | ^~

[...]

> 
> I'm not convinced that it's useful to the end-user to warn about the
> "use of q itself" case.

I didn't quote the standard because I couldn't find it.  I was searching in C11,
and it seems that it was only implicitly Undefined Behavior, without explicit
spelling (the value of the pointer was indeterminate, according to C11).
Now C23 will better clarify that reading such a pointer value (not even
dereferencing) is Undefined Behavior.

There was a discussion in linux-man@ some years ago, which now I realize it
didn't end up being applied (I thought we had applied a patch, but it seems we
didn't).  I'll check if we still need such a patch (and I guess we do, since
we're having this conversation).

Using the pointer is _wrong_.  And by wrong, I mean that it's Undefined 
Behavior.
I think that alone is enough to issue a warning.  Especially, since the compiler
already has that information; otherwise, it couldn't have warned about line 67
of my example program.  I could understand if due to optimizations the compiler
lost that information, so it couldn't warn, but in this case, there's no excuse.

The benefit for users?  They'll realize that the code they wrote is bad.  Not 
even
suspicious, as some warnings warn about suspicious code.  This case is
uncontroversially wrong.  That code has no valid reason to be written that way,
under ISO C.

Cheers,

Alex

> 
> Dave

-- 

GPG key fingerprint: A9348594CE31283A826FBDD8D57633D441E25BB5


OpenPGP_signature
Description: OpenPGP digital signature


[pushed] doc: Reword how to get possible values of a parameter (was: Document all param values and remove defaults (PR middle-end/86078))

2023-02-16 Thread Gerald Pfeifer
On Mon, 24 Sep 2018, Martin Liška wrote:
> As mentioned in the PR we miss defaults for quite some param option.
> I agreed with Richi that easiest way how to fix that would be to remove
> that from documentation and use rather --help=param. It's done in the 
> patch.

And here is a little follow-up patch after that, ahem, little while...

Pushed.

Gerald


gcc/ChangeLog:

* doc/invoke.texi (Optimize Options): Reword the explanation
getting minimal, maximal and default values of a parameter.
---
 gcc/doc/invoke.texi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6def1fd631e..7b308cd3c31 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14678,8 +14678,8 @@ The names of specific parameters, and the meaning of 
the values, are
 tied to the internals of the compiler, and are subject to change
 without notice in future releases.
 
-In order to get minimal, maximal and default value of a parameter,
-one can use @option{--help=param -Q} options.
+In order to get the minimal, maximal and default values of a parameter,
+use the @option{--help=param -Q} options.
 
 In each case, the @var{value} is an integer.  The following choices
 of @var{name} are recognized for all targets:
-- 
2.39.1


[Bug analyzer/108830] New: Excess warnings from -Wanalyzer-null-dereference

2023-02-16 Thread dmalcolm at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108830

Bug ID: 108830
   Summary: Excess warnings from -Wanalyzer-null-dereference
   Product: gcc
   Version: 13.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: analyzer
  Assignee: dmalcolm at gcc dot gnu.org
  Reporter: dmalcolm at gcc dot gnu.org
Blocks: 108562
  Target Milestone: ---

Created attachment 54477
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=54477=edit
Reproducer

I see lots of (probable) false positives from the attached on GCC 11 through
13.

Trunk:https://godbolt.org/z/nzYreY1zx
GCC 12.2: https://godbolt.org/z/zjod5768f
GCC 11.3: https://godbolt.org/z/aeevhssG4

After the initial warning:
  :77:24: warning: dereference of NULL 'new_vals' [CWE-476]
[-Wanalyzer-null-dereference]

...we emit 4 further almost identical warnings.

I think they're all false positives, due to invariants we can't know about, but
presumably we should only emit the first warning: once we've determined that
we're derefing NULL 'new_vals', it doesn't make sense to repeatedly complain
each time through the loop (which is what I think is happening).

There are also a huge number of spammy "'new_vals' is NULL" messages.


Referenced Bugs:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108562
[Bug 108562] [meta-bug] tracker bug for issues with -Wanalyzer-null-dereference

[Bug libstdc++/108827] [C++23] Implement P2387R3, Pipe support for user-defined range adaptors

2023-02-16 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108827

Jonathan Wakely  changed:

   What|Removed |Added

 Status|UNCONFIRMED |NEW
 Ever confirmed|0   |1
   Last reconfirmed||2023-02-16

[Bug analyzer/108806] -Wanalyzer-null-dereference false positives due to not handling bitmasks

2023-02-16 Thread dmalcolm at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108806

David Malcolm  changed:

   What|Removed |Added

 Ever confirmed|0   |1
   Last reconfirmed||2023-02-16
 Status|UNCONFIRMED |ASSIGNED

--- Comment #2 from David Malcolm  ---
Should be fixed on trunk for GCC 13 by the above commit.

Keeping this open to track backporting the fix to GCC 12.
I rewrote switch-handling in GCC 12, so I don't think this is going to be
backportable to 11 or 10.

[pushed] analyzer: respect some conditions from bit masks [PR108806]

2023-02-16 Thread David Malcolm via Gcc-patches
PR analyzer/108806 reports false +ves seen from -fanalyzer on code like this
in qemu-7.2.0's hw/intc/omap_intc.c:

  [...snip...]
  struct omap_intr_handler_bank_s* bank = NULL;
  if ((offset & 0xf80) == 0x80) {
[...set "bank" to non-NULL...]
  }
  switch (offset) {
[...snip various cases that don't deref "bank"...]
case 0x80:
  return bank->inputs;
case 0x84:
  return bank->mask;
[...etc...]
   }

where the analyzer falsely complains about execution paths in which
"(offset & 0xf80) == 0x80" was false (leaving "bank" as NULL), but then
in which "switch (offset)" goes to a case for which
"(offset & 0xf80) == 0x80" is true and dereferences NULL "bank", i.e.
paths in which "(offset & 0xf80) == 0x80" is both true *and* false.

This patch adds enough logic to constraint_manager for -fanalyzer to
reject such execution paths as impossible, fixing the false +ves.

Integration testing shows this eliminates 20 probable false positives:

Comparison: 9.08% -> 9.34% GOOD: 66 BAD: 661 -> 641 (-20)

where the affected warnings/projects are:

  -Wanalyzer-null-dereference: 0.00% GOOD: 0 BAD: 279 -> 269 (-10)
qemu-7.2.0: 175 -> 165 (-10)

  -Wanalyzer-use-of-uninitialized-value: 0.00% GOOD: 0 BAD: 153 -> 143 (-10)
 coreutils-9.1:  18 ->  14 (-4)
qemu-7.2.0:  54 ->  48 (-6)

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-6101-g4d3b7be281e73e.

gcc/analyzer/ChangeLog:
PR analyzer/108806
* constraint-manager.cc (bounded_range::dump_to_pp): Use
bounded_range::singleton_p.
(constraint_manager::add_bounded_ranges): Handle singleton ranges
by adding an EQ_EXPR constraint.
(constraint_manager::impossible_derived_conditions_p): New.
(constraint_manager::eval_condition): Reject EQ_EXPR when it would
imply impossible derived conditions.
(selftest::test_bits): New.
(selftest::run_constraint_manager_tests): Run it.
* constraint-manager.h (bounded_range::singleton_p): New.
(constraint_manager::impossible_derived_conditions_p): New decl.
* region-model.cc (region_model::get_rvalue_1): Handle
BIT_AND_EXPR, BIT_IOR_EXPR, and BIT_XOR_EXPR.

gcc/testsuite/ChangeLog:
PR analyzer/108806
* gcc.dg/analyzer/null-deref-pr108806-qemu.c: New test.
* gcc.dg/analyzer/pr103217.c: Add -Wno-analyzer-too-complex.
* gcc.dg/analyzer/switch.c (test_bitmask_1): New.
(test_bitmask_2): New.
* gcc.dg/analyzer/uninit-pr108806-qemu.c: New test.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/constraint-manager.cc| 166 +-
 gcc/analyzer/constraint-manager.h |   7 +
 gcc/analyzer/region-model.cc  |   3 +
 .../analyzer/null-deref-pr108806-qemu.c   | 105 +++
 gcc/testsuite/gcc.dg/analyzer/pr103217.c  |   2 +
 gcc/testsuite/gcc.dg/analyzer/switch.c|  76 
 .../gcc.dg/analyzer/uninit-pr108806-qemu.c| 108 
 7 files changed, 466 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/null-deref-pr108806-qemu.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/uninit-pr108806-qemu.c

diff --git a/gcc/analyzer/constraint-manager.cc 
b/gcc/analyzer/constraint-manager.cc
index 5a859c6c0f7..2c9c435527e 100644
--- a/gcc/analyzer/constraint-manager.cc
+++ b/gcc/analyzer/constraint-manager.cc
@@ -421,7 +421,7 @@ dump_cst (pretty_printer *pp, tree cst, bool show_types)
 void
 bounded_range::dump_to_pp (pretty_printer *pp, bool show_types) const
 {
-  if (tree_int_cst_equal (m_lower, m_upper))
+  if (singleton_p ())
 dump_cst (pp, m_lower, show_types);
   else
 {
@@ -2118,6 +2118,17 @@ bool
 constraint_manager::add_bounded_ranges (const svalue *sval,
const bounded_ranges *ranges)
 {
+  /* If RANGES is just a singleton, convert this to adding the constraint:
+ "SVAL == {the singleton}".  */
+  if (ranges->get_count () == 1
+  && ranges->get_range (0).singleton_p ())
+{
+  tree range_cst = ranges->get_range (0).m_lower;
+  const svalue *range_sval
+   = m_mgr->get_or_create_constant_svalue (range_cst);
+  return add_constraint (sval, EQ_EXPR, range_sval);
+}
+
   sval = sval->unwrap_any_unmergeable ();
 
   /* Nothing can be known about unknown/poisoned values.  */
@@ -2466,6 +2477,66 @@ constraint_manager::eval_condition (equiv_class_id 
lhs_ec,
   return tristate::unknown ();
 }
 
+/* Return true iff "LHS == RHS" is known to be impossible due to
+   derived conditions.
+
+   Look for an EC containing an EC_VAL of the form (LHS OP CST).
+   If found, see if (LHS OP CST) == EC_VAL is false.
+   If so, we know this condition is false.
+
+   For example, if we already know that
+ (X & CST_MASK) == Y
+   and we're evaluating X == Z, we can test to see if
+ (Z & CST_MASK) == EC_VAL
+   and thus if:
+ (Z & CST_MASK) == Y
+   and 

[Bug analyzer/108806] -Wanalyzer-null-dereference false positives due to not handling bitmasks

2023-02-16 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108806

--- Comment #1 from CVS Commits  ---
The master branch has been updated by David Malcolm :

https://gcc.gnu.org/g:4d3b7be281e73ecdaa233598db1a8390422b7770

commit r13-6101-g4d3b7be281e73ecdaa233598db1a8390422b7770
Author: David Malcolm 
Date:   Thu Feb 16 18:12:55 2023 -0500

analyzer: respect some conditions from bit masks [PR108806]

PR analyzer/108806 reports false +ves seen from -fanalyzer on code like
this
in qemu-7.2.0's hw/intc/omap_intc.c:

  [...snip...]
  struct omap_intr_handler_bank_s* bank = NULL;
  if ((offset & 0xf80) == 0x80) {
[...set "bank" to non-NULL...]
  }
  switch (offset) {
[...snip various cases that don't deref "bank"...]
case 0x80:
  return bank->inputs;
case 0x84:
  return bank->mask;
[...etc...]
   }

where the analyzer falsely complains about execution paths in which
"(offset & 0xf80) == 0x80" was false (leaving "bank" as NULL), but then
in which "switch (offset)" goes to a case for which
"(offset & 0xf80) == 0x80" is true and dereferences NULL "bank", i.e.
paths in which "(offset & 0xf80) == 0x80" is both true *and* false.

This patch adds enough logic to constraint_manager for -fanalyzer to
reject such execution paths as impossible, fixing the false +ves.

Integration testing shows this eliminates 20 probable false positives:

Comparison: 9.08% -> 9.34% GOOD: 66 BAD: 661 -> 641 (-20)

where the affected warnings/projects are:

  -Wanalyzer-null-dereference: 0.00% GOOD: 0 BAD: 279 -> 269 (-10)
qemu-7.2.0: 175 -> 165 (-10)

  -Wanalyzer-use-of-uninitialized-value: 0.00% GOOD: 0 BAD: 153 -> 143
(-10)
 coreutils-9.1:  18 ->  14 (-4)
qemu-7.2.0:  54 ->  48 (-6)

gcc/analyzer/ChangeLog:
PR analyzer/108806
* constraint-manager.cc (bounded_range::dump_to_pp): Use
bounded_range::singleton_p.
(constraint_manager::add_bounded_ranges): Handle singleton ranges
by adding an EQ_EXPR constraint.
(constraint_manager::impossible_derived_conditions_p): New.
(constraint_manager::eval_condition): Reject EQ_EXPR when it would
imply impossible derived conditions.
(selftest::test_bits): New.
(selftest::run_constraint_manager_tests): Run it.
* constraint-manager.h (bounded_range::singleton_p): New.
(constraint_manager::impossible_derived_conditions_p): New decl.
* region-model.cc (region_model::get_rvalue_1): Handle
BIT_AND_EXPR, BIT_IOR_EXPR, and BIT_XOR_EXPR.

gcc/testsuite/ChangeLog:
PR analyzer/108806
* gcc.dg/analyzer/null-deref-pr108806-qemu.c: New test.
* gcc.dg/analyzer/pr103217.c: Add -Wno-analyzer-too-complex.
* gcc.dg/analyzer/switch.c (test_bitmask_1): New.
(test_bitmask_2): New.
* gcc.dg/analyzer/uninit-pr108806-qemu.c: New test.

Signed-off-by: David Malcolm 

gcc-10-20230216 is now available

2023-02-16 Thread GCC Administrator via Gcc
Snapshot gcc-10-20230216 is now available on
  https://gcc.gnu.org/pub/gcc/snapshots/10-20230216/
and on various mirrors, see http://gcc.gnu.org/mirrors.html for details.

This snapshot has been generated from the GCC 10 git branch
with the following options: git://gcc.gnu.org/git/gcc.git branch 
releases/gcc-10 revision 4303f7d9bc5b37cabb77413c7a4f6946772b9da9

You'll find:

 gcc-10-20230216.tar.xz   Complete GCC

  SHA256=36ed578a5c80bed60695969fa98462cc016049da7a512ed1886aaf308c729261
  SHA1=6b5e65ac4f3d61d91e88485312c14d8bd12e7c09

Diffs from 10-20230209 are available in the diffs/ subdirectory.

When a particular snapshot is ready for public consumption the LATEST-10
link is updated and a message is sent to the gcc list.  Please do not use
a snapshot before it has been announced that way.


[Bug c/108796] Can't intermix C2x and GNU style attributes

2023-02-16 Thread aaron at aaronballman dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108796

--- Comment #10 from Aaron Ballman  ---
One other reason for the Clang behavior that may be worth mentioning is that
this helps users who wish to migrate away from `__attribute__` and towards
`[[]]`. Many (most?) uses of attributes end up behind a macro, so the user may
not even be aware which syntax is being used. Consider this contrived example:
```
// LibraryHeader.h
#if SOMETHING
#define FOO_ATTR __attribute__((foo))
#define BAR_ATTR __attribute__((bar))
#define BAZ_ATTR [[lib::baz]]
#elif SOMETHING_ELSE
...
#else
#define FOO_ATTR
#define BAR_ATTR
#define BAZ_ATTR
#endif

// UserCode.c
FOO_ATTR BAR_ATTR void func(void) { ... }
```
The user reading UserCode.c has no idea what attribute syntax is being used,
nor do they probably care all that much.

Under a strict parsing model, trying to add `BAZ_ATTR` to the declaration of
`func()` requires the user to be very aware of exactly what each macro expands
to, otherwise they might get the order wrong.

With a relaxed parsing model, the user doesn't have to care. Additionally, the
library header can migrate `BAR_ATTR` to `[[gnu::bar]]` syntax without also
migrating `FOO_ATTR` at the same time with less fear of breaking downstream
users due to attribute ordering, so this allows for gradual migration to a
newer syntax. (It's not "no fear" because `[[]]` has strict appertainment
rules, so it's possible for some attributes to break user code when migrating
from `__attribute__` to `[[]]` due to differences in appertainment.)

[og12] Attempt to register OpenMP pinned memory using a device instead of 'mlock' (was: [PATCH] libgomp, openmp: pinned memory)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2023-02-16T16:17:32+, "Stubbs, Andrew via Gcc-patches" 
 wrote:
>> On 2022-06-09T11:38:22+0200, I wrote:
>> > [...]
>> > *register* your standard 'malloc'ed etc. memory via 'cuMemHostRegister',
>> > :
>> > "Page-locks the memory range specified [...] and maps it for the
>> > device(s) [...].  This memory range also is added to the same tracking
>> > mechanism as cuMemHostAlloc to automatically accelerate [...]"?  (No
>> > manual 'mlock'ing involved in that case, too; presumably again using this
>> > interface likely circumvents any "annoying" 'ulimit' limitations?)
>> >
>> > Such a *register* abstraction can then be implemented by all the libgomp
>> > offloading plugins: they just call the respective
>> > CUDA/HSA/etc. functions to register such (existing, 'malloc'ed, etc.)
>> > memory.
>> >
>> > ..., but maybe I'm missing some crucial "detail" here?
>>
>> Indeed this does appear to work; see attached
>> "[WIP] Attempt to register OpenMP pinned memory using a device instead of
>> 'mlock'".
>> Any comments (aside from the TODOs that I'm still working on)?

With those TODOs resolved, I've now pushed to devel/omp/gcc-12
commit a5a4800e92773da7126c00a9c79b172494d58ab5
"Attempt to register OpenMP pinned memory using a device instead of 'mlock'",
see attached.


> The mmap implementation was not optimized for a lot of small allocations, and 
> I can't see that issue changing here

That's correct, 'mmap' remains.  Under the hood, 'cuMemHostRegister' must
surely also be doing some 'mlock'-like thing, so I figured it's best to
feed page-boundary memory regions to it, which 'mmap' gets us.

> so I don't know if this can be used for mlockall replacement.
>
> I had assumed that using the Cuda allocator would fix that limitation.

>From what I've read (but no first-hand experiments), there's non-trivial
overhead with 'cuMemHostRegister' (just like with 'mlock'), so routing
all small allocations individually through it probably isn't a good idea
either.  Therefore, I suppose, we'll indeed want to use some local
allocator if we wish this "optimized for a lot of small allocations".

And, getting rid of 'mlockall' is yet another topic.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From a5a4800e92773da7126c00a9c79b172494d58ab5 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Thu, 16 Feb 2023 15:57:37 +0100
Subject: [PATCH] Attempt to register OpenMP pinned memory using a device
 instead of 'mlock'

Implemented for nvptx offloading via 'cuMemHostRegister'.  This means: (a) not
running into 'mlock' limitations, and (b) the device is aware of this and may
optimize host <-> device memory transfers.

This re-works og12 commit ab7520b3b4cd9fdabfd63652badde478955bd3b5
"libgomp: pinned memory".

	include/
	* cuda/cuda.h (cuMemHostRegister, cuMemHostUnregister): New.
	libgomp/
	* config/linux/allocator.c (linux_memspace_alloc)
	(linux_memspace_free, linux_memspace_realloc): Attempt to register
	OpenMP pinned memory using a device instead of 'mlock'.
	* libgomp-plugin.h (GOMP_OFFLOAD_register_page_locked)
	(GOMP_OFFLOAD_unregister_page_locked): New.
	* libgomp.h (gomp_register_page_locked)
	(gomp_unregister_page_locked): New
	(struct gomp_device_descr): Add 'register_page_locked_func',
	'unregister_page_locked_func'.
	* plugin/cuda-lib.def (cuMemHostRegister_v2, cuMemHostRegister)
	(cuMemHostUnregister): New.
	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_register_page_locked)
	(GOMP_OFFLOAD_unregister_page_locked): New.
	* target.c (gomp_register_page_locked)
	(gomp_unregister_page_locked): New.
	(gomp_load_plugin_for_device): Handle 'register_page_locked',
	'unregister_page_locked'.
	* testsuite/libgomp.c/alloc-pinned-1.c: Adjust.
	* testsuite/libgomp.c/alloc-pinned-2.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-3.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-4.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-5.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-6.c: Likewise.
---
 include/ChangeLog.omp|   4 +
 include/cuda/cuda.h  |   3 +
 libgomp/ChangeLog.omp|  24 
 libgomp/config/linux/allocator.c |  74 +-
 libgomp/libgomp-plugin.h |   2 +
 libgomp/libgomp.h|   4 +
 libgomp/plugin/cuda-lib.def  |   3 +
 libgomp/plugin/plugin-nvptx.c|  33 +
 libgomp/target.c | 137 +++
 libgomp/testsuite/libgomp.c/alloc-pinned-1.c |  25 
 libgomp/testsuite/libgomp.c/alloc-pinned-2.c |  25 
 libgomp/testsuite/libgomp.c/alloc-pinned-3.c |  43 +-
 

[Bug sanitizer/108824] ASAN -O2/3 missed a stack-buffer-underflow since GCC-10

2023-02-16 Thread shaohua.li at inf dot ethz.ch via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108824

Li Shaohua  changed:

   What|Removed |Added

 Resolution|--- |INVALID
 Status|UNCONFIRMED |RESOLVED

--- Comment #1 from Li Shaohua  ---
Sorry, I checked the code and confirmed that the overflow operations were
optimized out.

[Bug middle-end/107411] trivial-auto-var-init=zero invalid uninitialized variable warning

2023-02-16 Thread qinzhao at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107411

--- Comment #12 from qinzhao at gcc dot gnu.org ---
(In reply to Jakub Jelinek from comment #11)
> (In reply to qinzhao from comment #10)
> > the following patch fixed this issue:
> 
> This would leak memory.

thank you, I will fix the memory leak issue in the patch.

Re: [og12] In 'libgomp/allocator.c:omp_realloc', route 'free' through 'MEMSPACE_FREE' (was: [PATCH] libgomp, OpenMP, nvptx: Low-latency memory allocator)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2023-02-14T15:11:14+, Andrew Stubbs  wrote:
> On 14/02/2023 12:54, Thomas Schwinge wrote:
>> On 2022-01-13T11:13:51+, Andrew Stubbs  wrote:
>>> Updated patch: this version fixes some missed cases of malloc in the
>>> realloc implementation.
>>
>> Right, and as it seems I've run into another issue: a stray 'free'.
>>
>>> --- a/libgomp/allocator.c
>>> +++ b/libgomp/allocator.c
>>
>> Re 'omp_realloc':
>>
>>> @@ -660,9 +709,10 @@ retry:
>>> gomp_mutex_unlock (_data->lock);
>>>   #endif
>>> if (prev_size)
>>> - new_ptr = realloc (data->ptr, new_size);
>>> + new_ptr = MEMSPACE_REALLOC (allocator_data->memspace, data->ptr,
>>> + data->size, new_size);
>>> else
>>> - new_ptr = malloc (new_size);
>>> + new_ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size);
>>> if (new_ptr == NULL)
>>>{
>>>   #ifdef HAVE_SYNC_BUILTINS
>>> @@ -690,7 +740,11 @@ retry:
>>>   && (free_allocator_data == NULL
>>>   || free_allocator_data->pool_size == ~(uintptr_t) 0))
>>>   {
>>> -  new_ptr = realloc (data->ptr, new_size);
>>> +  omp_memspace_handle_t memspace __attribute__((unused))
>>> + = (allocator_data
>>> +? allocator_data->memspace
>>> +: predefined_alloc_mapping[allocator]);
>>> +  new_ptr = MEMSPACE_REALLOC (memspace, data->ptr, data->size, 
>>> new_size);
>>> if (new_ptr == NULL)
>>>goto fail;
>>> ret = (char *) new_ptr + sizeof (struct omp_mem_header);
>>> @@ -701,7 +755,11 @@ retry:
>>>   }
>>> else
>>>   {
>>> -  new_ptr = malloc (new_size);
>>> +  omp_memspace_handle_t memspace __attribute__((unused))
>>> + = (allocator_data
>>> +? allocator_data->memspace
>>> +: predefined_alloc_mapping[allocator]);
>>> +  new_ptr = MEMSPACE_ALLOC (memspace, new_size);
>>> if (new_ptr == NULL)
>>>goto fail;
>>>   }
>>> @@ -735,32 +793,35 @@ retry:
>> |free (data->ptr);
>>> return ret;
>>
>> I run into a SIGSEGV if a non-'malloc'-based allocation is 'free'd here.
>>
>> The attached
>> "In 'libgomp/allocator.c:omp_realloc', route 'free' through 'MEMSPACE_FREE'"
>> appears to resolve my issue, but not yet regression-tested.

No issues in testing.

>> Does that
>> look correct to you?
>
> That looks correct.

Thanks.  I've pushed to devel/omp/gcc-12 branch
commit 3a2c07395b0a565955a7b86f0eba866937e15989
"In 'libgomp/allocator.c:omp_realloc', route 'free' through 'MEMSPACE_FREE'",
see attached.

> The only remaining use of "free" should be the one
> referring to the allocator object itself (i.e. the destructor).

ACK.

>> Or, instead of invoking 'MEMSPACE_FREE', should we scrap the
>> 'used_pool_size' bookkeeping here, and just invoke 'omp_free' instead?
>>
>>  --- libgomp/allocator.c
>>  +++ libgomp/allocator.c
>>  @@ -842,19 +842,7 @@ retry:
>> if (old_size - old_alignment < size)
>>   size = old_size - old_alignment;
>> memcpy (ret, ptr, size);
>>  -  if (__builtin_expect (free_allocator_data
>>  -   && free_allocator_data->pool_size < ~(uintptr_t) 0, 
>> 0))
>>  -{
>>  -#ifdef HAVE_SYNC_BUILTINS
>>  -  __atomic_add_fetch (_allocator_data->used_pool_size, 
>> -data->size,
>>  - MEMMODEL_RELAXED);
>>  -#else
>>  -  gomp_mutex_lock (_allocator_data->lock);
>>  -  free_allocator_data->used_pool_size -= data->size;
>>  -  gomp_mutex_unlock (_allocator_data->lock);
>>  -#endif
>>  -}
>>  -  free (data->ptr);
>>  +  ialias_call (omp_free) (ptr, free_allocator);
>> return ret;
>>
>> (I've not yet analyzed whether that's completely equivalent.)
>
> The used_pool_size code comes from upstream, so if you want to go beyond
> the mechanical substitution of "free" then you're adding a new patch
> (rather than tweaking an old one). I'll leave that for others to comment on.

And I'll leave that for another day, and/or another person.  ;-)


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 3a2c07395b0a565955a7b86f0eba866937e15989 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 14 Feb 2023 13:35:03 +0100
Subject: [PATCH] In 'libgomp/allocator.c:omp_realloc', route 'free' through
 'MEMSPACE_FREE'

... to not run into a SIGSEGV if a non-'malloc'-based allocation is 'free'd
here.

Fix-up for og12 commit c5d1d7651297a273321154a5fe1b01eba9dcf604
"libgomp, nvptx: low-latency memory allocator".

	libgomp/
	* allocator.c (omp_realloc): Route 'free' through 'MEMSPACE_FREE'.
---
 libgomp/ChangeLog.omp |  2 ++
 libgomp/allocator.c   | 12 +++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git 

[Bug tree-optimization/108825] [13 Regression] error during GIMPLE pass: unrolljam

2023-02-16 Thread dcb314 at hotmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108825

--- Comment #9 from David Binderman  ---
(In reply to David Binderman from comment #8)
> (In reply to Andrew Pinski from comment #7)
> > (In reply to David Binderman from comment #6)
> > > git range now seems to be g:0cbb756fe9c8e13a .. g:bd044dae51caea3c,
> > > which is 6 commits.
> > 
> > Most likely r13-3875-g9e11ceef165bc0 .
> 
> Agreed. Over to Richard for their best advice.

Bisection finished. It does appear to be this revision.

[og12] Clarify/verify OpenMP 'omp_calloc' zero-initialization for pinned memory (was: [PATCH] libgomp, openmp: pinned memory)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2022-01-13T13:53:03+, Andrew Stubbs  wrote:
> Pinned memory is allocated via mmap

> --- /dev/null
> +++ b/libgomp/config/linux/allocator.c

> +static void *
> +linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
> +{
> +  if (pin)
> +return linux_memspace_alloc (memspace, size, pin);
> +[...]

This confused me for a moment, why we don't have to manually
zero-initialize here.  I've pushed to devel/omp/gcc-12 branch
commit 57b8f0600262566cd4f1ab12bf1bdafb29dbdc34
"Clarify/verify OpenMP 'omp_calloc' zero-initialization for pinned memory",
see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 57b8f0600262566cd4f1ab12bf1bdafb29dbdc34 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Wed, 15 Feb 2023 10:23:03 +0100
Subject: [PATCH] Clarify/verify OpenMP 'omp_calloc' zero-initialization for
 pinned memory

Clarification for og12 commit ab7520b3b4cd9fdabfd63652badde478955bd3b5
"libgomp: pinned memory".  No functional change.

	libgomp/
	* config/linux/allocator.c (linux_memspace_alloc)
	(linux_memspace_calloc): Clarify zero-initialization for pinned
	memory.
	* testsuite/libgomp.c/alloc-pinned-1.c: Verify zero-initialization
	for pinned memory.
	* testsuite/libgomp.c/alloc-pinned-2.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-3.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-4.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-5.c: Likewise.
---
 libgomp/ChangeLog.omp| 10 ++
 libgomp/config/linux/allocator.c |  2 ++
 libgomp/testsuite/libgomp.c/alloc-pinned-1.c | 10 ++
 libgomp/testsuite/libgomp.c/alloc-pinned-2.c | 10 ++
 libgomp/testsuite/libgomp.c/alloc-pinned-3.c |  9 +
 libgomp/testsuite/libgomp.c/alloc-pinned-4.c |  9 +
 libgomp/testsuite/libgomp.c/alloc-pinned-5.c | 10 ++
 7 files changed, 60 insertions(+)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 1c4b1833c0b..530f5c6acf6 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,5 +1,15 @@
 2023-02-16  Thomas Schwinge  
 
+	* config/linux/allocator.c (linux_memspace_alloc)
+	(linux_memspace_calloc): Clarify zero-initialization for pinned
+	memory.
+	* testsuite/libgomp.c/alloc-pinned-1.c: Verify zero-initialization
+	for pinned memory.
+	* testsuite/libgomp.c/alloc-pinned-2.c: Likewise.
+	* testsuite/libgomp.c/alloc-pinned-3.c: Likewise.
+	* testsuite/libgomp.c/alloc-pinned-4.c: Likewise.
+	* testsuite/libgomp.c/alloc-pinned-5.c: Likewise.
+
 	* config/linux/allocator.c (linux_memspace_calloc): Elide
 	(innocuous) duplicate 'if' condition.
 	* config/nvptx/allocator.c (nvptx_memspace_free): Explicitly
diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c
index 8a9171c36df..f278e5cdf14 100644
--- a/libgomp/config/linux/allocator.c
+++ b/libgomp/config/linux/allocator.c
@@ -65,6 +65,7 @@ linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin)
 }
   else if (pin)
 {
+  /* 'mmap' zero-initializes, which 'linux_memspace_calloc' relies on.  */
   void *addr = mmap (NULL, size, PROT_READ | PROT_WRITE,
 			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   if (addr == MAP_FAILED)
@@ -96,6 +97,7 @@ linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
   return ret;
 }
   else if (pin)
+/* If PINned, 'linux_memspace_alloc' 'mmap's, which zero-initializes.  */
 return linux_memspace_alloc (memspace, size, pin);
   else
 return calloc (1, size);
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-1.c b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
index 79792b16d83..fb7ac8b0080 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
@@ -54,6 +54,14 @@ get_pinned_mem ()
 }
 #endif
 
+static void
+verify0 (char *p, size_t s)
+{
+  for (size_t i = 0; i < s; ++i)
+if (p[i] != 0)
+  abort ();
+}
+
 #include 
 
 int
@@ -91,5 +99,7 @@ main ()
   if (get_pinned_mem () <= amount2)
 abort ();
 
+  verify0 (p, SIZE);
+
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-2.c b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
index 228c656b715..651b89fb42f 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
@@ -54,6 +54,14 @@ get_pinned_mem ()
 }
 #endif
 
+static void
+verify0 (char *p, size_t s)
+{
+  for (size_t i = 0; i < s; ++i)
+if (p[i] != 0)
+  abort ();
+}
+
 #include 
 
 int
@@ -97,5 +105,7 @@ main ()
   if (get_pinned_mem () <= amount2)
 abort ();
 
+  verify0 (p, SIZE);
+
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-3.c b/libgomp/testsuite/libgomp.c/alloc-pinned-3.c
index 

[og12] Miscellaneous clean-up re OpenMP 'ompx_unified_shared_mem_space', 'ompx_host_mem_space' (was: [PATCH 3/5] openmp, nvptx: ompx_unified_shared_mem_alloc)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2023-02-10T15:31:47+, Andrew Stubbs  wrote:
> On 10/02/2023 14:21, Thomas Schwinge wrote:
>> Is the correct fix the following [...]
>
> Yes, [...]

>>> --- a/libgomp/config/nvptx/allocator.c
>>> +++ b/libgomp/config/nvptx/allocator.c
>>> @@ -125,6 +125,8 @@ nvptx_memspace_alloc (omp_memspace_handle_t memspace, 
>>> size_t size)
>>> __atomic_store_n (&__nvptx_lowlat_heap_root, root.raw, 
>>> MEMMODEL_RELEASE);
>>> return result;
>>>   }
>>> +  else if (memspace == ompx_host_mem_space)
>>> +return NULL;
>>> else
>>>   return malloc (size);
>>>   }
>>> @@ -145,6 +147,8 @@ nvptx_memspace_calloc (omp_memspace_handle_t memspace, 
>>> size_t size)
>>>
>>> return result;
>>>   }
>>> +  else if (memspace == ompx_host_mem_space)
>>> +return NULL;
>>> else
>>>   return calloc (1, size);
>>>   }
>>> @@ -354,6 +358,8 @@ nvptx_memspace_realloc (omp_memspace_handle_t memspace, 
>>> void *addr,
>>>}
>>> return result;
>>>   }
>>> +  else if (memspace == ompx_host_mem_space)
>>> +return NULL;
>>> else
>>>   return realloc (addr, size);
>>>   }
>>
>> (I'd have added an explicit no-op (or, 'abort'?) to
>> 'nvptx_memspace_free', but that's maybe just me...)  ;-\
>
> Why? The host memspace is just the regular heap, which can be a thing on
> any device. It's an extension though so we can define it either way.

My point was: for nvptx libgomp, all 'ompx_host_mem_space' allocator
functions (cited above) 'return NULL', and it's a cheap check to verify
that in 'nvptx_memspace_free'.

>>> --- a/libgomp/libgomp.h
>>> +++ b/libgomp/libgomp.h
>>
>>> +extern void * gomp_usm_alloc (size_t size, int device_num);
>>> +extern void gomp_usm_free (void *device_ptr, int device_num);
>>> +extern bool gomp_is_usm_ptr (void *ptr);
>>
>> 'gomp_is_usm_ptr' isn't defined/used anywhere; I'll remove it.
>
> I think I started that and then decided against. Thanks.

These three combined, I've pushed to devel/omp/gcc-12 branch
commit 23f52e49368d7b26a1b1a72d6bb903d31666e961
"Miscellaneous clean-up re OpenMP 'ompx_unified_shared_mem_space', 
'ompx_host_mem_space'",
see attached.


>>> --- a/libgomp/target.c
>>> +++ b/libgomp/target.c
>>
>>> @@ -3740,6 +3807,9 @@ gomp_load_plugin_for_device (struct gomp_device_descr 
>>> *device,
>>> DLSYM (unload_image);
>>> DLSYM (alloc);
>>> DLSYM (free);
>>> +  DLSYM_OPT (usm_alloc, usm_alloc);
>>> +  DLSYM_OPT (usm_free, usm_free);
>>> +  DLSYM_OPT (is_usm_ptr, is_usm_ptr);
>>> DLSYM (dev2host);
>>> DLSYM (host2dev);
>>
>> As a sanity check, shouldn't we check that either none or all three of
>> those are defined, like in the 'if (cuda && cuda != 4) { [error] }' check
>> a bit further down?
>
> This is only going to happen when somebody writes a new plugin, and then
> they'll discover very quickly that there are issues. I've wasted more
> time writing this sentence than it's worth already. :)

Eh.  ;-) OK, outvoted.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 23f52e49368d7b26a1b1a72d6bb903d31666e961 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 14 Feb 2023 17:10:57 +0100
Subject: [PATCH] Miscellaneous clean-up re OpenMP
 'ompx_unified_shared_mem_space', 'ompx_host_mem_space'

Clean-up for og12 commit 84914e197d91a67b3d27db0e4c69a433462983a5
"openmp, nvptx: ompx_unified_shared_mem_alloc".  No functional change.

	libgomp/
	* config/linux/allocator.c (linux_memspace_calloc): Elide
	(innocuous) duplicate 'if' condition.
	* config/nvptx/allocator.c (nvptx_memspace_free): Explicitly
	handle 'memspace == ompx_host_mem_space'.
	* libgomp.h (gomp_is_usm_ptr): Remove.
---
 libgomp/ChangeLog.omp| 6 ++
 libgomp/config/linux/allocator.c | 3 +--
 libgomp/config/nvptx/allocator.c | 4 
 libgomp/libgomp.h| 1 -
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index b667c72b8ca..1c4b1833c0b 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,5 +1,11 @@
 2023-02-16  Thomas Schwinge  
 
+	* config/linux/allocator.c (linux_memspace_calloc): Elide
+	(innocuous) duplicate 'if' condition.
+	* config/nvptx/allocator.c (nvptx_memspace_free): Explicitly
+	handle 'memspace == ompx_host_mem_space'.
+	* libgomp.h (gomp_is_usm_ptr): Remove.
+
 	* basic-allocator.c (BASIC_ALLOC_YIELD): instead of '#deine',
 	'#define' it.
 
diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c
index 07af3a2821a..8a9171c36df 100644
--- a/libgomp/config/linux/allocator.c
+++ b/libgomp/config/linux/allocator.c
@@ -95,8 +95,7 @@ linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
   memset (ret, 0, size);
   return ret;
 }
- 

[og12] Un-break nvptx libgomp build (was: [OG12][committed] amdgcn: OpenMP low-latency allocator)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2023-02-16T18:06:41+, Andrew Stubbs  wrote:
> 1. 230216-basic-allocator.patch
>
> Separate the allocator from NVPTX so the code can be shared.

Yay!

> nvptx, libgomp: Move the low-latency allocator code
>
> There shouldn't be a functionality change; this is just so AMD can share
> the code.

I've quickly observed one "functionality" change:

> --- /dev/null
> +++ b/libgomp/basic-allocator.c

> +#ifndef BASIC_ALLOC_YIELD
> +#deine BASIC_ALLOC_YIELD
> +#endif

In file included from [...]/libgomp/config/nvptx/allocator.c:49:
[...]/libgomp/config/nvptx/../../basic-allocator.c:52:2: error: invalid 
preprocessing directive #deine; did you mean #define?
   52 | #deine BASIC_ALLOC_YIELD
  |  ^
  |  define

Yes, indeed.

I've pushed to devel/omp/gcc-12 branch
commit 6cc0e7bebf1b3ad6aacf75419e7f06942409f90c
"Un-break nvptx libgomp build", see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 6cc0e7bebf1b3ad6aacf75419e7f06942409f90c Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Thu, 16 Feb 2023 21:59:55 +0100
Subject: [PATCH] Un-break nvptx libgomp build

In file included from [...]/libgomp/config/nvptx/allocator.c:49:
[...]/libgomp/config/nvptx/../../basic-allocator.c:52:2: error: invalid preprocessing directive #deine; did you mean #define?
   52 | #deine BASIC_ALLOC_YIELD
  |  ^
  |  define

Yes, indeed.

Fix-up for og12 commit 9583738a62a33a276b2aad980a27e77097f95924
"nvptx, libgomp: Move the low-latency allocator code".

	libgomp/
	* basic-allocator.c (BASIC_ALLOC_YIELD): instead of '#deine',
	'#define' it.
---
 libgomp/ChangeLog.omp | 3 +++
 libgomp/basic-allocator.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index ecc14b4f537..b667c72b8ca 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,5 +1,8 @@
 2023-02-16  Thomas Schwinge  
 
+	* basic-allocator.c (BASIC_ALLOC_YIELD): instead of '#deine',
+	'#define' it.
+
 	* testsuite/libgomp.c/usm-1.c: Re-enable non-GCN offloading
 	compilation.
 	* testsuite/libgomp.c/usm-2.c: Likewise.
diff --git a/libgomp/basic-allocator.c b/libgomp/basic-allocator.c
index 94b99a89e0b..b4b9e4ba13a 100644
--- a/libgomp/basic-allocator.c
+++ b/libgomp/basic-allocator.c
@@ -49,7 +49,7 @@
 #endif
 
 #ifndef BASIC_ALLOC_YIELD
-#deine BASIC_ALLOC_YIELD
+#define BASIC_ALLOC_YIELD
 #endif
 
 #define ALIGN(VAR) (((VAR) + 7) & ~7)/* 8-byte granularity.  */
-- 
2.25.1



[Bug tree-optimization/108825] [13 Regression] error during GIMPLE pass: unrolljam

2023-02-16 Thread dcb314 at hotmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108825

David Binderman  changed:

   What|Removed |Added

 CC||rguenther at suse dot de

--- Comment #8 from David Binderman  ---
(In reply to Andrew Pinski from comment #7)
> (In reply to David Binderman from comment #6)
> > git range now seems to be g:0cbb756fe9c8e13a .. g:bd044dae51caea3c,
> > which is 6 commits.
> 
> Most likely r13-3875-g9e11ceef165bc0 .

Agreed. Over to Richard for their best advice.

[Bug middle-end/107411] trivial-auto-var-init=zero invalid uninitialized variable warning

2023-02-16 Thread jakub at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107411

Jakub Jelinek  changed:

   What|Removed |Added

 CC||jakub at gcc dot gnu.org

--- Comment #11 from Jakub Jelinek  ---
(In reply to qinzhao from comment #10)
> the following patch fixed this issue:

This would leak memory.

[og12] 'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading compilation (was: [OG12 commit] amdgcn, libgomp: USM allocation update)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2022-10-24T17:26:44+0100, Andrew Stubbs  wrote:
> I've committed this patch to the devel/omp/gcc-12 branch.

> --- a/libgomp/testsuite/libgomp.c/usm-1.c
> +++ b/libgomp/testsuite/libgomp.c/usm-1.c

> --- a/libgomp/testsuite/libgomp.c/usm-2.c
> +++ b/libgomp/testsuite/libgomp.c/usm-2.c

> --- a/libgomp/testsuite/libgomp.c/usm-3.c
> +++ b/libgomp/testsuite/libgomp.c/usm-3.c

> --- a/libgomp/testsuite/libgomp.c/usm-4.c
> +++ b/libgomp/testsuite/libgomp.c/usm-4.c

> @@ -1,5 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target omp_usm } */
> +/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target 
> offload_target_amdgcn } } */

I've pushed to devel/omp/gcc-12 branch
commit b4d4603df3fed290ccf721899be6bc69f037fe2b
"'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading compilation",
see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From b4d4603df3fed290ccf721899be6bc69f037fe2b Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 14 Feb 2023 18:57:04 +0100
Subject: [PATCH] 'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading
 compilation

Change '-foffload=amdgcn-amdhsa=[...]' to
'-foffload-options=amdgcn-amdhsa=[...]', so that non-GCN offloading compilation
doesn't get disabled.

Fix-up for og12 commit 6ec2c29dbbc19e7d2a8f991a5848e10c65c7c74c
"amdgcn, libgomp: USM allocation update".

	libgomp/
	* testsuite/libgomp.c/usm-1.c: Re-enable non-GCN offloading
	compilation.
	* testsuite/libgomp.c/usm-2.c: Likewise.
	* testsuite/libgomp.c/usm-3.c: Likewise.
	* testsuite/libgomp.c/usm-4.c: Likewise.
---
 libgomp/ChangeLog.omp   | 8 
 libgomp/testsuite/libgomp.c/usm-1.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-2.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-3.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-4.c | 2 +-
 5 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 2a20516cd09..ecc14b4f537 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,11 @@
+2023-02-16  Thomas Schwinge  
+
+	* testsuite/libgomp.c/usm-1.c: Re-enable non-GCN offloading
+	compilation.
+	* testsuite/libgomp.c/usm-2.c: Likewise.
+	* testsuite/libgomp.c/usm-3.c: Likewise.
+	* testsuite/libgomp.c/usm-4.c: Likewise.
+
 2023-02-16  Tobias Burnus  
 
 	Backported from master:
diff --git a/libgomp/testsuite/libgomp.c/usm-1.c b/libgomp/testsuite/libgomp.c/usm-1.c
index f7bf897b839..35f37de7542 100644
--- a/libgomp/testsuite/libgomp.c/usm-1.c
+++ b/libgomp/testsuite/libgomp.c/usm-1.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
diff --git a/libgomp/testsuite/libgomp.c/usm-2.c b/libgomp/testsuite/libgomp.c/usm-2.c
index 3f52adbd7e1..783075edb54 100644
--- a/libgomp/testsuite/libgomp.c/usm-2.c
+++ b/libgomp/testsuite/libgomp.c/usm-2.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
diff --git a/libgomp/testsuite/libgomp.c/usm-3.c b/libgomp/testsuite/libgomp.c/usm-3.c
index 225cba5fe58..733f0f34090 100644
--- a/libgomp/testsuite/libgomp.c/usm-3.c
+++ b/libgomp/testsuite/libgomp.c/usm-3.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
diff --git a/libgomp/testsuite/libgomp.c/usm-4.c b/libgomp/testsuite/libgomp.c/usm-4.c
index d4addfc587a..5bf99df3b24 100644
--- a/libgomp/testsuite/libgomp.c/usm-4.c
+++ b/libgomp/testsuite/libgomp.c/usm-4.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
-- 
2.25.1



[Bug tree-optimization/108825] [13 Regression] error during GIMPLE pass: unrolljam

2023-02-16 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108825

--- Comment #7 from Andrew Pinski  ---
(In reply to David Binderman from comment #6)
> git range now seems to be g:0cbb756fe9c8e13a .. g:bd044dae51caea3c,
> which is 6 commits.

Most likely r13-3875-g9e11ceef165bc0 .

[Bug tree-optimization/108825] [13 Regression] error during GIMPLE pass: unrolljam

2023-02-16 Thread dcb314 at hotmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108825

--- Comment #6 from David Binderman  ---
git range now seems to be g:0cbb756fe9c8e13a .. g:bd044dae51caea3c,
which is 6 commits.

[Bug middle-end/107411] trivial-auto-var-init=zero invalid uninitialized variable warning

2023-02-16 Thread qinzhao at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107411

--- Comment #10 from qinzhao at gcc dot gnu.org ---
the following patch fixed this issue:
diff --git a/gcc/tree-ssa-uninit.cc b/gcc/tree-ssa-uninit.cc
index c555cf5cd50..eca727b010a 100644
--- a/gcc/tree-ssa-uninit.cc
+++ b/gcc/tree-ssa-uninit.cc
@@ -113,6 +113,18 @@ uninit_undefined_value_p (tree t)
   return !get_no_uninit_warning (SSA_NAME_VAR (t));
 }

+
+/* Get the name string for the VAR that defined with a call to .DEFERRED_INIT.
+ * Refer to routine gimple_add_init_for_auto_var.  */
+static const char *
+get_var_name (tree var)
+{
+  const char *var_name_str
+= DECL_NAME (var) ? IDENTIFIER_POINTER (DECL_NAME (var))
+  : xasprintf ("D.%u", DECL_UID (var));
+  return var_name_str;
+}
+
 /* Emit warnings for uninitialized variables.  This is done in two passes.

The first pass notices real uses of SSA names with undefined values.
@@ -224,8 +236,6 @@ warn_uninit (opt_code opt, tree t, tree var, gimple
*context,
 at alt_reloc = temp.
  */
  tree lhs_var = NULL_TREE;
- tree lhs_var_name = NULL_TREE;
- const char *lhs_var_name_str = NULL;

  /* Get the variable name from the 3rd argument of call.  */
  tree var_name = gimple_call_arg (var_def_stmt, 2);
@@ -239,11 +249,12 @@ warn_uninit (opt_code opt, tree t, tree var, gimple
*context,
  else if (TREE_CODE (gimple_assign_lhs (context)) == SSA_NAME)
lhs_var = SSA_NAME_VAR (gimple_assign_lhs (context));
}
- if (lhs_var
- && (lhs_var_name = DECL_NAME (lhs_var))
- && (lhs_var_name_str = IDENTIFIER_POINTER (lhs_var_name))
- && (strcmp (lhs_var_name_str, var_name_str) == 0))
-   return;
+ if (lhs_var)
+   {
+ const char *lhs_var_name_str = get_var_name (lhs_var);
+ if (strcmp (lhs_var_name_str, var_name_str) == 0)
+   return;
+   }
  gcc_assert (var_name_str && var_def_stmt);
}
 }

[Bug middle-end/107411] trivial-auto-var-init=zero invalid uninitialized variable warning

2023-02-16 Thread qinzhao at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107411

--- Comment #9 from qinzhao at gcc dot gnu.org ---
it's a bug in tree-ssa-uninit.cc actually.

when doing the following:

  /* Ignore the call to .DEFERRED_INIT that define the original
 var itself as the following case:
temp = .DEFERRED_INIT (4, 2, “alt_reloc");
alt_reloc = temp;
 In order to avoid generating warning for the fake usage
 at alt_reloc = temp.
  */

we need to compare the var name inside the .DEFERRED_INIT call (the 3nd
argument) and the name for the left side variable. if they are the same, we
will NOT report the warning. 

there is one issue when we get the name for the left side variable. when the
variable doesn't have a DECL_NAME (it's not a user declared variable, which is
the case for this bug):

>   _1 = .DEFERRED_INIT (4, 2, &"D.2389"[0]);
>   D.2389 = _1;

(in the above example, D.2389 is a variable that doesn't have a DECL_NAME.)

the current checking just ignores this case, and still report the warning. this
is incorrect.

The fix is very simple, when get the var name for the left side variable, we
should consider this case and come up with the name the same way as we
construct the 3rd argument for the call to .DEFERRED_INIT (please refer to the
routine "gimple_add_init_for_auto_var")

[Bug tree-optimization/108825] [13 Regression] error during GIMPLE pass: unrolljam

2023-02-16 Thread dcb314 at hotmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108825

--- Comment #5 from David Binderman  ---
(In reply to David Binderman from comment #4)
> git range now seems to be g:59ad8b684dd67e17 .. g:3b54cc9d04c2efb2,
> which is 103 commits.

git range now seems to be g:0cbb756fe9c8e13a .. g:3b54cc9d04c2efb2,
which is 26 commits.

[Bug c/108796] Can't intermix C2x and GNU style attributes

2023-02-16 Thread aaron at aaronballman dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108796

--- Comment #9 from Aaron Ballman  ---
> GNU attributes are declaration specifiers *in the previous examples given 
> here*, not necessarily in all other cases.

Thanks for clarifying!

> (There is then logic in GCC to handle __attribute__ that, according to the 
> syntax, should appertain to a particular entity, so that it's instead 
> applied to some other related entity; for example, moving an attribute 
> from a declaration to its type.  This is deliberately *not* done for [[]] 
> attribute syntax; those attributes are expected to be written in a correct 
> location for the entity they appertain to.)

This touches on why I came to the decision I did in Clang. What `__attribute__`
will apply to is sometimes inscrutable and users are (perhaps) used to it
sliding around to whatever works. As you point out, `[[]]` doesn't have the
same behavior; it has strict appertainment. Because `__attribute__` doesn't
have strict appertainment, it did not seem like an issue for it to continue to
shift around to whatever makes sense. Thus `[[]]` will apply to what the
standard says it applies to, and `__attribute__` applies to whatever it should
apply to based on the attribute names in the specifier, but users don't have to
know whether they need to write `[[]] __attribute__(())` vs `__attribute__(())
[[]]`. (Clang also supports `__declspec`, so there are more combinations to
worry about sometimes.)

It really boils down to whether `__attribute__` is fundamentally a different
"thing" than `[[]]` and I couldn't convince myself they were different. The
result is, when the grammar allows consecutive attribute syntaxes, we parse all
allowed syntaxes in a loop so users can write them in an arbitrary order.

[PATCH] testsuite: Tweak gcc.dg/attr-aligned.c for CRIS

2023-02-16 Thread Hans-Peter Nilsson via Gcc-patches
Asking for the lines outside the "#if __CRIS__" part.
Ok to commit?

-- >8 --
tm.texi says for BIGGEST_ALIGNMENT (from which
__BIGGEST_ALIGNMENT__ is derived): "Biggest alignment that
any data type can require on this machine, in bits."

That is, using that value might be too strict for alignment
of *functions* and CRIS requires at least 16-bit alignment
for functions.  But, one purpose of the test is to test that
alignment can be set to a large but valid value, so pick
512, which has some use as a historically required alignment
for certain I/O descriptors.

* gcc.dg/attr-aligned.c: Adjust comment for ALIGN_MAX_STATIC.
(ALIGN_MAX_STATIC): Set to 512 for CRIS.
---
 gcc/testsuite/gcc.dg/attr-aligned.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/attr-aligned.c 
b/gcc/testsuite/gcc.dg/attr-aligned.c
index 887bdd0f3799..4f0c885dc812 100644
--- a/gcc/testsuite/gcc.dg/attr-aligned.c
+++ b/gcc/testsuite/gcc.dg/attr-aligned.c
@@ -18,6 +18,10 @@
 # else
 #   define ALIGN_MAX_STATIC  ALIGN_MAX_HARD
 # endif
+#elif __CRIS__
+/* __BIGGEST_ALIGNMENT__ doesn't cover functions (16 bits for CRIS). */
+#  define ALIGN_MAX_STATIC  512
+#  define ALIGN_TOO_BIG_OFILE   (ALIGN_MAX_HARD << 1)
 #elif pdp11
 #  define ALIGN_MAX_STATIC  2
 /* Work around a pdp11 ICE (see PR target/87821).  */
@@ -29,7 +33,9 @@
 /* Is this processor- or operating-system specific?  */
 #  define ALIGN_MAX_STATIC  ALIGN_MAX_HARD
 #else
-   /* Guaranteed to be accepted regardless of the target.  */
+   /* Guaranteed to be accepted regardless of the target for objects.
+  This might not be true for alignment of functions though, so
+  may need to be set to a target-specific value above.  */
 #  define ALIGN_MAX_STATIC  __BIGGEST_ALIGNMENT__
/* Guaranteed to be rejected regardless of the target.  */
 #  define ALIGN_TOO_BIG_OFILE   (ALIGN_MAX_HARD << 1)
-- 
2.30.2



[Bug target/108803] [10/11/12/13 Regression] wrong code for 128bit rotate on aarch64-unknown-linux-gnu with -Og

2023-02-16 Thread jakub at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108803

--- Comment #6 from Jakub Jelinek  ---
Created attachment 54476
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=54476=edit
gcc13-pr108803.patch

Actually, the above patch isn't correct because for op1 equal to 0 we really
need the reverse_unsigned_shift to give 0 aka (outof_input >> 1) >> 63 rather
than outof_input >> 0 aka outof_input.
Anyway, with this patch we get same number of instructions as before on the #c2
functions, the and unfortunately isn't optimized away because it has 2 uses
rather than just one, but on the other side 63 - count needs 2 instructions
while ~count only one.

Re: Stepping down as gcov maintainer and callgraph reviewer

2023-02-16 Thread Jan Hubicka via Gcc
Martin,
> Hello GCC community.
> 
> After spending last decade (including my diploma thesis even more)
> of my life working on GCC, I decided to leave the project and try
> a different job. I would like to thank all the community members I had
> change to interact with, I learned so much and enjoyed the journey!
> I'll be leaving somewhen at the beginning of May.
> 
> That said, I'm stepping down from my 2 positions as I won't have a time
> for proper patch review and bugs in the area I'm responsible for.

I am sad to hear this news and will definitely miss you as coleague
and co-maintaner.  Thank you for all the work on GCC!

Honza
> 
> I wish the project all the best!
> 
> Cheers,
> Martin

> From bb3aee20cdeeb6399ca77ac05cd8093d66256df3 Mon Sep 17 00:00:00 2001
> From: Martin Liska 
> Date: Thu, 16 Feb 2023 16:50:38 +0100
> Subject: [PATCH] MAINTAINERS: stepping down from my positions
> 
> ChangeLog:
> 
>   * MAINTAINERS: I'm stepping down from my positions.
> ---
>  MAINTAINERS | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 18edc86df67..a61d3ae06df 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -230,7 +230,6 @@ docstring relicensing Gerald Pfeifer  
> 
>  docstring relicensingJoseph Myers
> 
>  predict.def  Jan Hubicka 
>  gcov Jan Hubicka 
> -gcov Martin Liska
>  gcov Nathan Sidwell  
>  option handling  Joseph Myers
> 
>  middle-end   Jeff Law
> @@ -268,7 +267,6 @@ check in changes outside of the parts of the compiler 
> they maintain.
>   Reviewers
>  
>  arc port Claudiu Zissulescu  
> -callgraphMartin Liska
>  callgraphMartin Jambor   
>  C front end  Marek Polacek   
>  CTF, BTF David Faust 
> @@ -519,6 +517,7 @@ Kriang Lerdsuwanakij  
> 
>  Renlin Li
>  Xinliang David Li
>  Chen Liqin   
> +Martin Liska 
>  Jiangning Liu
>  Sa Liu   
>  Ralph Loader 
> -- 
> 2.39.1
> 



[Bug tree-optimization/108825] [13 Regression] error during GIMPLE pass: unrolljam

2023-02-16 Thread dcb314 at hotmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108825

--- Comment #4 from David Binderman  ---
git range now seems to be g:59ad8b684dd67e17 .. g:3b54cc9d04c2efb2,
which is 103 commits.

[Bug tree-optimization/108819] [12/13 Regression] ICE on valid code at -O1 with "-fno-tree-ccp -fno-tree-forwprop" on x86_64-linux-gnu: tree check: expected ssa_name, have integer_cst in number_of_ite

2023-02-16 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108819

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|13.0|12.3
 Status|UNCONFIRMED |NEW
Summary|[13 Regression] ICE on  |[12/13 Regression] ICE on
   |valid code at -O1 with  |valid code at -O1 with
   |"-fno-tree-ccp  |"-fno-tree-ccp
   |-fno-tree-forwprop" on  |-fno-tree-forwprop" on
   |x86_64-linux-gnu: tree  |x86_64-linux-gnu: tree
   |check: expected ssa_name,   |check: expected ssa_name,
   |have integer_cst in |have integer_cst in
   |number_of_iterations_cltz,  |number_of_iterations_cltz,
   |at  |at
   |tree-ssa-loop-niter.cc:2394 |tree-ssa-loop-niter.cc:2394
   Last reconfirmed||2023-02-16
  Known to work||11.1.0, 11.3.0
 Ever confirmed|0   |1

--- Comment #1 from Andrew Pinski  ---
  _7 = 1 & 1;

That I think is wrong but I think the problem is before ivcanon and it was
latent in GCC 12 even.


reassoc1 produces:
   [local count: 114863530]:
  _20 = a.0_1 == 0;
  _21 = a.0_1 > 0;
  _7 = 1 & 1;
  if (_7 != 0)
goto ; [89.30%]
  else
goto ; [10.70%]

From:
   [local count: 114863530]:
  _20 = a.0_1 == 0;
  _21 = a.0_1 > 0;
  _22 = _20 & _21;
  if (_22 != 0)
goto ; [89.30%]
  else
goto ; [10.70%]

All it has:
Optimizing range tests a.0_1 -[, 0] and +[, 0] and +[0, 0]
 into 0

GCC 11 looks ok though:
From:

  a.0_1 = a;
  if (a.0_1 <= 0)
goto ; [20.45%]
  else
goto ; [79.55%]

   [local count: 114863530]:
  _20 = a.0_1 == 0;
  _21 = a.0_1 > 0;
  _22 = _20 & _21;
  if (_22 != 0)
goto ; [89.30%]
  else
goto ; [10.70%]
to:

  a.0_1 = a;
  _20 = a.0_1 == 0;
  _16 = 0;
  _21 = a.0_1 > 0;
  _7 = 1 & _16;
  if (_7 != 0)
goto ; [89.30%]
  else
goto ; [10.70%]

So I am going to declare this as a latent bug (which the verifiers don't catch
either ...).

[Bug c/108796] Can't intermix C2x and GNU style attributes

2023-02-16 Thread joseph at codesourcery dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108796

--- Comment #8 from joseph at codesourcery dot com  ---
On Thu, 16 Feb 2023, aaron at aaronballman dot com via Gcc-bugs wrote:

> > The logic is that GNU attributes are declaration specifiers (and can mix 
> > anywhere with other declaration specifiers), but standard attributes 
> > aren't declaration specifiers; rather, they come in specified positions 
> > relative to declaration specifiers (the semantics before and after the 
> > declaration specifiers are different), and in the middle isn't such a 
> > position.
> 
> How does that square with:
> ```
> struct __attribute__((packed)) S { ... };
> void func(int *ip) __attribute__((nonnull(1)));
> ```
> where the GNU attribute is not written where a declaration specifier is
> allowed?

GNU attributes are declaration specifiers *in the previous examples given 
here*, not necessarily in all other cases.  The position in relation to 
other declaration specifiers does not matter in those examples.  Whereas a 
standard attribute at the start of declaration specifiers appertains to 
the entity declared, while a standard attribute at the end of declaration 
specifiers appertains to the type in those declaration specifiers.  That 
is

[[noreturn]] void f();

declares a non-returning function f, but

void [[noreturn]] f();

applies the attribute (invalidly) to the type void, not to the function f.  
While __attribute__((noreturn)) means exactly the same thing in both 
locations - it appertains to the function (and you could also have it in 
the middle of other declaration specifiers, with the same meaning).  So 
the two kinds of attributes are not interchangable, and the semantics for 
arbitrary mixtures would not be clear.

It might work to have arbitrary mixtures in the struct context.  But in 
the

void func(int *ip) __attribute__((nonnull(1)));

context you again have attributes appertaining to different things: a GNU 
attribute in that position is in a particular position *in a declaration* 
(after any asm ("identifier"), before an initializer), and appertains to 
the entity declared, whereas a standard attribute in such a position is 
part of the declarator (immediately following a function-declarator or 
array-declarator) and appertains to the function type - although they look 
superficially like the same case in simple examples such as this one, they 
aren't at all.  And so again it would be unclear what attributes in 
arbitrary mixtures should appertain to.

(There is then logic in GCC to handle __attribute__ that, according to the 
syntax, should appertain to a particular entity, so that it's instead 
applied to some other related entity; for example, moving an attribute 
from a declaration to its type.  This is deliberately *not* done for [[]] 
attribute syntax; those attributes are expected to be written in a correct 
location for the entity they appertain to.)

[Bug c++/108829] [12/13 Regression] internal compiler error: in is_capture_proxy, at cp/lambda.cc:272

2023-02-16 Thread mpolacek at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108829

Marek Polacek  changed:

   What|Removed |Added

   Keywords|needs-bisection |
   Priority|P3  |P2
 Status|NEW |ASSIGNED
   Assignee|unassigned at gcc dot gnu.org  |mpolacek at gcc dot 
gnu.org

--- Comment #5 from Marek Polacek  ---
Oop, I overlooked that.

Started with r12-6065.  So I guess we just need to add the missing
STRIP_ANY_LOCATION_WRAPPER somewhere.

[Bug tree-optimization/108825] [13 Regression] error during GIMPLE pass: unrolljam

2023-02-16 Thread dcb314 at hotmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108825

--- Comment #3 from David Binderman  ---
(In reply to David Binderman from comment #2)
> Trying revision 1191a412bb17a734.

Seems bad. Trying 59ad8b684dd67e17.

[Bug c++/108829] [12/13 Regression] internal compiler error: in is_capture_proxy, at cp/lambda.cc:272

2023-02-16 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108829

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |12.3
Summary|[13 Regression] internal|[12/13 Regression] internal
   |compiler error: in  |compiler error: in
   |is_capture_proxy, at|is_capture_proxy, at
   |cp/lambda.cc:272|cp/lambda.cc:272

[Bug c++/108829] [13 Regression] internal compiler error: in is_capture_proxy, at cp/lambda.cc:272

2023-02-16 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108829

--- Comment #4 from Andrew Pinski  ---
(In reply to Marek Polacek from comment #2)
> I can see the ICE but the reduced test seems overreduced and invalid; do you
> have the original .ii file?

It was attached in comment #1 too.

[Bug c++/108829] [13 Regression] internal compiler error: in is_capture_proxy, at cp/lambda.cc:272

2023-02-16 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108829

Andrew Pinski  changed:

   What|Removed |Added

  Known to fail||13.0
   Keywords||ice-on-valid-code,
   ||needs-bisection
   Last reconfirmed|2023-02-16 00:00:00 |
Summary|internal compiler error: in |[13 Regression] internal
   |is_capture_proxy, at|compiler error: in
   |cp/lambda.cc:272|is_capture_proxy, at
   ||cp/lambda.cc:272
  Known to work||12.2.0

--- Comment #3 from Andrew Pinski  ---
Reduced better (to a valid testcase):
```
template 
void f(void) {
  constexpr int IDX_PAGE_SIZE = 4096;
  int abyPage = [=, abyPage] { return IDX_PAGE_SIZE; }();
}
void h() {
  f<1>();
}
```

[Bug c++/108829] internal compiler error: in is_capture_proxy, at cp/lambda.cc:272

2023-02-16 Thread mpolacek at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108829

Marek Polacek  changed:

   What|Removed |Added

 Ever confirmed|0   |1
   Last reconfirmed||2023-02-16
 CC||mpolacek at gcc dot gnu.org
 Status|UNCONFIRMED |NEW

--- Comment #2 from Marek Polacek  ---
I can see the ICE but the reduced test seems overreduced and invalid; do you
have the original .ii file?

[Bug target/108803] [10/11/12/13 Regression] wrong code for 128bit rotate on aarch64-unknown-linux-gnu with -Og

2023-02-16 Thread jakub at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108803

--- Comment #5 from Jakub Jelinek  ---
The change then would be
--- gcc/optabs.cc.jj2023-01-02 09:32:53.309838465 +0100
+++ gcc/optabs.cc   2023-02-16 19:33:14.583883584 +0100
@@ -507,7 +507,7 @@ expand_subword_shift (scalar_int_mode op
  rtx outof_input, rtx into_input, rtx op1,
  rtx outof_target, rtx into_target,
  int unsignedp, enum optab_methods methods,
- unsigned HOST_WIDE_INT shift_mask)
+ unsigned HOST_WIDE_INT shift_mask, bool mask_count)
 {
   optab reverse_unsigned_shift, unsigned_shift;
   rtx tmp, carries;
@@ -526,6 +526,23 @@ expand_subword_shift (scalar_int_mode op
   tmp = simplify_expand_binop (op1_mode, sub_optab, tmp, op1,
   0, true, methods);
 }
+  else if (mask_count)
+{
+  /* When called from expand_doubleword_shift_condmove with shift_mask 0,
+we need to mask the shift count (and on some targets have that later
+be combined with shifts into a single instruction).  In that case
+we can avoid the separate shift by 1 and another by
+(BITS_PER_WORD - 1) - op1 and can just do one shift by
+-op1 & (BITS_PER_WORD - 1).  */
+  carries = outof_input;
+  tmp = expand_unop (op1_mode, neg_optab, op1, 0, false);
+  rtx tmp2 = immed_wide_int_const (wi::shwi (BITS_PER_WORD - 1,
+  op1_mode), op1_mode);
+  tmp = simplify_expand_binop (op1_mode, and_optab, tmp, tmp2, 0, true,
+  methods);
+  op1 = simplify_expand_binop (op1_mode, and_optab, op1, tmp2, 0, true,
+  methods);
+}
   else
 {
   /* We must avoid shifting by BITS_PER_WORD bits since that is either
@@ -596,6 +613,15 @@ expand_doubleword_shift_condmove (scalar
 {
   rtx outof_superword, into_superword;

+  if (shift_mask < BITS_PER_WORD - 1)
+{
+  rtx tmp = immed_wide_int_const (wi::shwi (BITS_PER_WORD - 1, op1_mode),
+ op1_mode);
+  superword_op1
+   = simplify_expand_binop (op1_mode, and_optab, superword_op1, tmp,
+0, true, methods);
+}
+
   /* Put the superword version of the output into OUTOF_SUPERWORD and
  INTO_SUPERWORD.  */
   outof_superword = outof_target != 0 ? gen_reg_rtx (word_mode) : 0;
@@ -621,7 +647,8 @@ expand_doubleword_shift_condmove (scalar
   if (!expand_subword_shift (op1_mode, binoptab,
 outof_input, into_input, subword_op1,
 outof_target, into_target,
-unsignedp, methods, shift_mask))
+unsignedp, methods, shift_mask,
+shift_mask < BITS_PER_WORD - 1))
 return false;

   /* Select between them.  Do the INTO half first because INTO_SUPERWORD
@@ -742,7 +769,7 @@ expand_doubleword_shift (scalar_int_mode
return expand_subword_shift (op1_mode, binoptab,
 outof_input, into_input, op1,
 outof_target, into_target,
-unsignedp, methods, shift_mask);
+unsignedp, methods, shift_mask, false);
 }

   /* Try using conditional moves to generate straight-line code.  */
@@ -781,7 +808,7 @@ expand_doubleword_shift (scalar_int_mode
   if (!expand_subword_shift (op1_mode, binoptab,
 outof_input, into_input, op1,
 outof_target, into_target,
-unsignedp, methods, shift_mask))
+unsignedp, methods, shift_mask, false))
 return false;

   emit_label (done_label);

or so and emits one fewer instruction for foo and bar as before.  But somehow
the #c0 testcase with it aborts again, so something is not right...

[Bug c++/108829] internal compiler error: in is_capture_proxy, at cp/lambda.cc:272

2023-02-16 Thread v.barinov at samsung dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108829

--- Comment #1 from Slava Barinov  ---
Created attachment 54475
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=54475=edit
Result of -freport-bug

Added full output of -freport-bug call

[Bug c++/108829] New: internal compiler error: in is_capture_proxy, at cp/lambda.cc:272

2023-02-16 Thread v.barinov at samsung dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108829

Bug ID: 108829
   Summary: internal compiler error: in is_capture_proxy, at
cp/lambda.cc:272
   Product: gcc
   Version: 12.2.1
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: v.barinov at samsung dot com
  Target Milestone: ---

Created attachment 54474
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=54474=edit
Reduced test case

Found an ICE during build of gdal-3.5.3 library

 gdal-3.6.2/ogr/ogrsf_frmts/openfilegdb/filegdbindex_write.cpp: In
instantiation of ‘bool OpenFileGDB::WriteIndex(VSILFILE*,
std::vector&, void (*)(std::vector&, const
typename ValueOIDPair::first_type&, int), int&, int) [with ValueOIDPair =
std::pair; VSILFILE = FILE; typename ValueOIDPair::first_type =
long int]’:
 gdal-3.6.2/ogr/ogrsf_frmts/openfilegdb/filegdbindex_write.cpp:1300:27:  
required from here
 gdal-3.6.2/ogr/ogrsf_frmts/openfilegdb/filegdbindex_write.cpp:500:9: internal
compiler error: in is_capture_proxy, at cp/lambda.cc:272

[Bug tree-optimization/108825] [13 Regression] error during GIMPLE pass: unrolljam

2023-02-16 Thread dcb314 at hotmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108825

--- Comment #2 from David Binderman  ---
Trying revision 1191a412bb17a734.

[Bug tree-optimization/108828] New: ivopts silencing gcc.dg/Wuse-after-free-2.c:115

2023-02-16 Thread hp at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108828

Bug ID: 108828
   Summary: ivopts silencing gcc.dg/Wuse-after-free-2.c:115
   Product: gcc
   Version: 13.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: hp at gcc dot gnu.org
  Target Milestone: ---
  Host: x86_64-pc-linux-gnu
Target: cris-elf

Source from r13-5978-g4f5a1198065d.
Running the test-suite for a cross to cris-elf on x86_64-pc-linux-gnu shows
among other differences for cris-elf:

Running /x/gcc/gcc/testsuite/gcc.dg/dg.exp ...
FAIL: gcc.dg/Wuse-after-free-2.c  (test for warnings, line 115)
FAIL: gcc.dg/Wuse-after-free-2.c  (test for warnings, line 116)

Diffing tree dumps from -fdump-tree-all-all shows a suspicious difference in
the "180t.ivopts" dump compared to that of the native run; for cris-elf, IIUC
some temporary object is introduced that causes dissociation with the
pointer...or something.  Anyway, "-fno-ivopts" makes the warning appear for
cris-elf.

I don't see this test-suite-failure for recent reports to gcc-testresults@ for
other targets (pru-unknown-elf, arm-unknown-linux-gnueabi,
powerpc64le-unknown-linux-gnu, aarch64-suse-linux-gnu, s390x-ibm-linux-gnu,
powerpc-ibm-aix7.2.5.0).
The test has failed since it's introduction; it's not a regression.

[Bug target/106282] [10/11/12/13 Regression] m68k: Problem with thread-local storage and -mcpu=5206 since r9-2326-gede9446c26a929

2023-02-16 Thread jsm28 at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106282

Joseph S. Myers  changed:

   What|Removed |Added

   Target Milestone|--- |10.5
Summary|m68k: Problem with  |[10/11/12/13 Regression]
   |thread-local storage and|m68k: Problem with
   |-mcpu=5206 since|thread-local storage and
   |r9-2326-gede9446c26a929 |-mcpu=5206 since
   ||r9-2326-gede9446c26a929
 CC||jsm28 at gcc dot gnu.org

--- Comment #2 from Joseph S. Myers  ---
My glibc bot has been failing for ColdFire since some time between commits
79d38dd46e6b07e5a90ab25df1438eb0918eb475 and
f56d48b2471c388401174029324e1f4c4b84fcdb, with an assembler error building
libgomp (affinity-fmt.c) that looks just like the one in this bug.

affinity-fmt.s: Assembler messages:
affinity-fmt.s:4062: Error: syntax error -- statement `lea
(gomp_tls_data@TLSLE+8,%a0),%a0' ignored

https://sourceware.org/pipermail/libc-testresults/2022q4/010394.html

Although that's from October 2022, I expect it's the same underlying issue, and
just was latent building libgomp until some other change exposed it there.

Note: soft-float ColdFire now fails earlier in my bot, failing to build glibc
with bug 103370 (also an assembler error, also probably latent for some time
before being exposed by an unrelated change).

[Bug rtl-optimization/108826] Inefficient address generation on POWER and RISC-V

2023-02-16 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108826

--- Comment #6 from Andrew Pinski  ---
(In reply to palmer from comment #5)
> We've run into a handful of things that look like this before, I'm not sure
> if it's a backend issue or something more general.  There's two patterns
> here that are frequently bad on RISC-V: "unsigned int" array indices and
> unsigned int shifting.  I think they might both boil down to some problems
> we have tracking the high parts of registers around ABI boundaries.

That seems unrelated to the issue here. In this case the shift is in DI
(ptrmode) mode already so the shift is fine. See comment # 4 for the RTL (this
was the RTL even for RV64).

[Bug rtl-optimization/108826] Inefficient address generation on POWER and RISC-V

2023-02-16 Thread palmer at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108826

--- Comment #5 from palmer at gcc dot gnu.org ---
We've run into a handful of things that look like this before, I'm not sure if
it's a backend issue or something more general.  There's two patterns here that
are frequently bad on RISC-V: "unsigned int" array indices and unsigned int
shifting.  I think they might both boil down to some problems we have tracking
the high parts of registers around ABI boundaries.

FWIW, the smallest bad code I can get is

unsigned int func(unsigned int ui) {
return (ui >> 6 & 5) << 2;
}

func:
srliw   a0,a0,6
slliw   a0,a0,2
andia0,a0,20
ret

which is particularly awkward as enough is going right to try and move that
andi, but we still end up with the double shifts.

[Bug target/108803] [10/11/12/13 Regression] wrong code for 128bit rotate on aarch64-unknown-linux-gnu with -Og

2023-02-16 Thread jakub at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108803

--- Comment #4 from Jakub Jelinek  ---
On the other side, if we knew that the backend would use something like the
shifts with masking, we could then avoid the extra reverse unsigned shift by 1
+ reverse unsigned shift by (63 - op1) & 63 plus two shifts by op1 & 63 and
could do instead a single shift by -op1 & 63 (plus as before two shifts by op1
& 63).
So replace the current problematic code for foo in #c2 with:
subsw5, w2, #64
lsl x6, x0, x5
-   lsr x3, x0, 1
-   mov w4, 63
-   sub w4, w4, w2
-   lsr x3, x3, x4
+   neg w4, w2
+   lsr x3, x0, x4
lsl x1, x1, x2
orr x1, x3, x1
lsl x0, x0, x2
cselx0, xzr, x0, pl
cselx1, x6, x1, pl
ret

[Bug target/103370] [12/13 Regression] Assembler error building glibc for ColdFire soft-float

2023-02-16 Thread jsm28 at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103370

--- Comment #9 from Joseph S. Myers  ---
The glibc build failure has done away and come back at least once since my
previous comment. It came back (I think the most recent time) with

commit 4fa25a7eb322f0a003c1eb15680c71ece345e01e
Author: Martin Liska 
AuthorDate: Mon Jan 24 15:45:38 2022 +0100
Commit: Martin Liska 
CommitDate: Wed Nov 30 14:03:43 2022 +0100

Improve profile handling in switch lowering.

PR tree-optimization/101301
PR tree-optimization/103680

though that's almost surely just exposing a back-end bug. However, the reduced
test here gave a different assembler error

/tmp/cc40cNSh.s: Assembler messages:
/tmp/cc40cNSh.s:320: Error: syntax error -- statement `lea
(.LC0@GOT+3,%a5),%a0' ignored
/tmp/cc40cNSh.s:328: Error: syntax error -- statement `lea
(.LC1@GOT+3,%a5),%a2' ignored

both before and after that commit (different register numbers and .s line
numbers before versus after). Despite the different error messages from the
reduced test and building glibc, they are still likely to be the same bug in
the m68k back end.

[Bug rtl-optimization/108826] Inefficient address generation on POWER and RISC-V

2023-02-16 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108826

Andrew Pinski  changed:

   What|Removed |Added

   Keywords||missed-optimization
   Last reconfirmed||2023-02-16
 Status|UNCONFIRMED |NEW
 Ever confirmed|0   |1

--- Comment #4 from Andrew Pinski  ---
Trying 13, 14, 15 -> 16:
   13: r84:DI=r83:DI+0xc8
  REG_DEAD r83:DI
   14: r85:DI=r84:DI<<0x2
  REG_DEAD r84:DI
   15: r86:DI=r72:DI+r85:DI
  REG_DEAD r85:DI
   16: r76:DI=sign_extend([r86:DI])
  REG_DEAD r86:DI
Failed to match this instruction:
(set (reg:DI 76 [ _5 ])
(sign_extend:DI (mem:SI (plus:DI (plus:DI (mult:DI (reg:DI 83)
(const_int 4 [0x4]))
(reg/f:DI 72 [ _nettle_aes_decrypt_T.0_1 ]))
(const_int 800 [0x320])) [2
_nettle_aes_decrypt_T.0_1->table[2][_4]+0 S4 A32])))
Failed to match this instruction:
(set (reg/f:DI 86)
(plus:DI (ashift:DI (reg:DI 83)
(const_int 2 [0x2]))
(reg/f:DI 72 [ _nettle_aes_decrypt_T.0_1 ])))


So combine does know how to combine all 4 instructions and produce the plus 800
there. But then it goes and splits it up and fails. I can't remember if there
is 4->3 splitting or just 4->2 .

[OG12][committed] amdgcn: OpenMP low-latency allocator

2023-02-16 Thread Andrew Stubbs

These patches implement an LDS memory allocator for OpenMP on AMD.

1. 230216-basic-allocator.patch

Separate the allocator from NVPTX so the code can be shared.

2. 230216-amd-low-lat.patch

Allocate the memory, adjust the default address space, and hook up the 
allocator.


They will need to be integrated with the rest of the memory management 
patch-stack when I repost that for mainline.


Andrewnvptx, libgomp: Move the low-latency allocator code

There shouldn't be a functionality change; this is just so AMD can share
the code.

The new basic-allocator.c is designed to be included so it can be used as a
template multiple times and inlined.

libgomp/ChangeLog:

* config/nvptx/allocator.c (BASIC_ALLOC_PREFIX): New define, and
include basic-allocator.c.
(__nvptx_lowlat_heap_root): Remove.
(heapdesc): Remove.
(nvptx_memspace_alloc): Move implementation to basic-allocator.c.
(nvptx_memspace_calloc): Likewise.
(nvptx_memspace_free): Likewise.
(nvptx_memspace_realloc): Likewise.
* config/nvptx/team.c (__nvptx_lowlat_heap_root): Remove.
(gomp_nvptx_main): Call __nvptx_lowlat_init.
* basic-allocator.c: New file.

diff --git a/libgomp/basic-allocator.c b/libgomp/basic-allocator.c
new file mode 100644
index 000..94b99a89e0b
--- /dev/null
+++ b/libgomp/basic-allocator.c
@@ -0,0 +1,380 @@
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+/* This is a basic "malloc" implementation intended for use with small,
+   low-latency memories.
+
+   To use this template, define BASIC_ALLOC_PREFIX, and then #include the
+   source file.  The other configuration macros are optional.
+
+   The root heap descriptor is stored in the first bytes of the heap, and each
+   free chunk contains a similar descriptor for the next free chunk in the
+   chain.
+
+   The descriptor is two values: offset and size, which describe the
+   location of a chunk of memory available for allocation. The offset is
+   relative to the base of the heap.  The special offset value 0x
+   indicates that the heap (free chain) is locked.  The offset and size are
+   32-bit values so the base alignment can be 8-bytes.
+
+   Memory is allocated to the first free chunk that fits.  The free chain
+   is always stored in order of the offset to assist coalescing adjacent
+   chunks.  */
+
+#include "libgomp.h"
+
+#ifndef BASIC_ALLOC_PREFIX
+#error "BASIC_ALLOC_PREFIX not defined."
+#endif
+
+#ifndef BASIC_ALLOC_YIELD
+#deine BASIC_ALLOC_YIELD
+#endif
+
+#define ALIGN(VAR) (((VAR) + 7) & ~7)/* 8-byte granularity.  */
+
+#define fn1(prefix, name) prefix ## _ ## name
+#define fn(prefix, name) fn1 (prefix, name)
+#define basic_alloc_init fn(BASIC_ALLOC_PREFIX,init)
+#define basic_alloc_alloc fn(BASIC_ALLOC_PREFIX,alloc)
+#define basic_alloc_calloc fn(BASIC_ALLOC_PREFIX,calloc)
+#define basic_alloc_free fn(BASIC_ALLOC_PREFIX,free)
+#define basic_alloc_realloc fn(BASIC_ALLOC_PREFIX,realloc)
+
+typedef struct {
+  uint32_t offset;
+  uint32_t size;
+} heapdesc;
+
+void
+basic_alloc_init (char *heap, size_t limit)
+{
+  if (heap == NULL)
+return;
+
+  /* Initialize the head of the free chain.  */
+  heapdesc *root = (heapdesc*)heap;
+  root->offset = ALIGN(1);
+  root->size = limit - root->offset;
+
+  /* And terminate the chain.  */
+  heapdesc *next = (heapdesc*)(heap + root->offset);
+  next->offset = 0;
+  next->size = 0;
+}
+
+static void *
+basic_alloc_alloc (char *heap, size_t size)
+{
+  if (heap == NULL)
+return NULL;
+
+  /* Memory is allocated in N-byte granularity.  */
+  size = ALIGN (size);
+
+  /* Acquire a lock on the low-latency heap.  */
+  heapdesc root, *root_ptr = (heapdesc*)heap;
+  do
+{
+  root.offset = __atomic_exchange_n (_ptr->offset, 0x, 
+MEMMODEL_ACQUIRE);
+  if (root.offset != 0x)
+   {
+ root.size = root_ptr->size;
+ 

  1   2   3   >