[PATCH] longlong.h: Do no use asm input cast for clang

2022-11-30 Thread Adhemerval Zanella via Gcc-patches
clang by default rejects the input casts with:

  error: invalid use of a cast in a inline asm context requiring an
  lvalue: remove the cast or build with -fheinous-gnu-extensions

And even with -fheinous-gnu-extensions clang still throws an warning
and also states that this option might be removed in the future.
For gcc the cast are still useful somewhat [1], so just remove it
clang is used.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581722.html
---
 include/ChangeLog  |  60 ++
 include/longlong.h | 524 +++--
 2 files changed, 325 insertions(+), 259 deletions(-)

diff --git a/include/ChangeLog b/include/ChangeLog
index dda005335c0..747fc923ef5 100644
--- a/include/ChangeLog
+++ b/include/ChangeLog
@@ -1,3 +1,63 @@
+2022-11-30  Adhemerval Zanella  
+
+   * include/longlong.h: Modified.
+   [(__GNUC__) && ! NO_ASM][( (__i386__) ||  (__i486__)) && W_TYPE_SIZE == 
32](add_ss): Modified.
+   [(__GNUC__) && ! NO_ASM][( (__i386__) ||  (__i486__)) && W_TYPE_SIZE == 
32](sub_ddmmss): Modified.
+   [(__GNUC__) && ! NO_ASM][( (__i386__) ||  (__i486__)) && W_TYPE_SIZE == 
32](umul_ppmm): Modified.
+   [(__GNUC__) && ! NO_ASM][( (__i386__) ||  (__i486__)) && W_TYPE_SIZE == 
32](udiv_qrnnd): Modified.
+   [(__GNUC__) && ! NO_ASM][(( (__sparc__) &&  (__arch64__)) ||  
(__sparcv9))  && W_TYPE_SIZE == 64](add_ss): Modified.
+   [(__GNUC__) && ! NO_ASM][(( (__sparc__) &&  (__arch64__)) ||  
(__sparcv9))  && W_TYPE_SIZE == 64](sub_ddmmss): Modified.
+   [(__GNUC__) && ! NO_ASM][(( (__sparc__) &&  (__arch64__)) ||  
(__sparcv9))  && W_TYPE_SIZE == 64](umul_ppmm): Modified.
+   [(__GNUC__) && ! NO_ASM][(__M32R__) && W_TYPE_SIZE == 32](add_ss): 
Modified.
+   [(__GNUC__) && ! NO_ASM][(__M32R__) && W_TYPE_SIZE == 32](sub_ddmmss): 
Modified.
+   [(__GNUC__) && ! NO_ASM][(__arc__) && W_TYPE_SIZE == 32](add_ss): 
Modified.
+   [(__GNUC__) && ! NO_ASM][(__arc__) && W_TYPE_SIZE == 32](sub_ddmmss): 
Modified.
+   [(__GNUC__) && ! NO_ASM][(__arm__) && ( (__thumb2__) || ! __thumb__)  
&& W_TYPE_SIZE == 32][(__ARM_ARCH_2__) || (__ARM_ARCH_2A__)  || 
(__ARM_ARCH_3__)](umul_ppmm): Modified.
+   [(__GNUC__) && ! NO_ASM][(__arm__) && ( (__thumb2__) || ! __thumb__)  
&& W_TYPE_SIZE == 32](add_ss): Modified.
+   [(__GNUC__) && ! NO_ASM][(__arm__) && ( (__thumb2__) || ! __thumb__)  
&& W_TYPE_SIZE == 32](sub_ddmmss): Modified.
+   [(__GNUC__) && ! NO_ASM][(__hppa) && W_TYPE_SIZE == 32](add_ss): 
Modified.
+   [(__GNUC__) && ! NO_ASM][(__hppa) && W_TYPE_SIZE == 32](sub_ddmmss): 
Modified.
+   [(__GNUC__) && ! NO_ASM][(__i960__) && W_TYPE_SIZE == 32](umul_ppmm): 
Modified.
+   [(__GNUC__) && ! NO_ASM][(__i960__) && W_TYPE_SIZE == 32](__umulsidi3): 
Modified.
+   [(__GNUC__) && ! NO_ASM][(__ibm032__)  && W_TYPE_SIZE == 
32](add_ss): Modified.
+   [(__GNUC__) && ! NO_ASM][(__ibm032__)  && W_TYPE_SIZE == 
32](sub_ddmmss): Modified.
+   [(__GNUC__) && ! NO_ASM][(__ibm032__)  && W_TYPE_SIZE == 
32](umul_ppmm): Modified.
+   [(__GNUC__) && ! NO_ASM][(__ibm032__)  && W_TYPE_SIZE == 
32](count_leading_zeros): Modified.
+   [(__GNUC__) && ! NO_ASM][(__m88000__) && W_TYPE_SIZE == 
32][(__mc88110__)](umul_ppmm): Modified.
+   [(__GNUC__) && ! NO_ASM][(__m88000__) && W_TYPE_SIZE == 
32][(__mc88110__)](udiv_qrnnd): Modified.
+   [(__GNUC__) && ! NO_ASM][(__m88000__) && W_TYPE_SIZE == 
32](add_ss): Modified.
+   [(__GNUC__) && ! NO_ASM][(__m88000__) && W_TYPE_SIZE == 
32](sub_ddmmss): Modified.
+   [(__GNUC__) && ! NO_ASM][(__m88000__) && W_TYPE_SIZE == 
32](count_leading_zeros): Modified.
+   [(__GNUC__) && ! NO_ASM][(__mc68000__) && W_TYPE_SIZE == 
32][!((__mcoldfire__))](umul_ppmm): Modified.
+   [(__GNUC__) && ! NO_ASM][(__mc68000__) && W_TYPE_SIZE == 32][( 
(__mc68020__) && ! __mc68060__)](umul_ppmm): Modified.
+   [(__GNUC__) && ! NO_ASM][(__mc68000__) && W_TYPE_SIZE == 32][( 
(__mc68020__) && ! __mc68060__)](udiv_qrnnd): Modified.
+   [(__GNUC__) && ! NO_ASM][(__mc68000__) && W_TYPE_SIZE == 32][( 
(__mc68020__) && ! __mc68060__)](sdiv_qrnnd): Modified.
+   [(__GNUC__) && ! NO_ASM][(__mc68000__) && W_TYPE_SIZE == 
32][(__mcoldfire__)](umul_ppmm): Modified.
+   [(__GNUC__) && ! NO_ASM][(__mc68000__) && W_TYPE_SIZE == 
32](add_ss): Modified.
+   [(__GNUC__) && ! NO_ASM][(__mc68000__) && W_TYPE_SIZE == 
32](sub_ddmmss): Modified.
+   [(__GNUC__) && ! NO_ASM][(__sh__) && W_TYPE_SIZE == 32][! 
__sh1__](umul_ppmm): Modified.
+   [(__GNUC__) && ! NO_ASM][(__sparc__) && ! __arch64__ && ! __sparcv9  && 
W_TYPE_SIZE == 
32][!((__sparc_v9__))][!((__sparc_v8__))][!((__sparclite__))](umul_ppmm): 
Modified.
+   [(__GNUC__) && ! NO_ASM][(__sparc__) && ! __arch64__ && ! __sparcv9  && 
W_TYPE_SIZE == 
32][!((__sparc_v9__))][!((__sparc_v8__))][!((__sparclite__))](udiv_qrnnd): 
Modified.
+   [(__GNUC__) && ! 

Re: [PATCH 3/3] elf: Add _dl_find_eh_frame function

2021-11-23 Thread Adhemerval Zanella via Gcc-patches



On 17/11/2021 10:40, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> However the code is somewhat complex and I would like to have some feedback
>> if gcc will be willing to accept this change (I assume it would require
>> this code merge on glibc beforehand).
> 
> There's a long review queue on the GCC side due to the stage1 close.
> It may still be considered for GCC 12.  Jakub has also requested that
> we hold off committing the glibc side until the GCC side is reviewed.
> 
> I'll flesh out the commit message and NEWS entry once we have agreed
> upon the interface.
> 
>>> new file mode 100644
>>> index 00..c7313c122d
>>> --- /dev/null
>>> +++ b/elf/dl-find_eh_frame.c
> 
>>> +/* Data for the main executable.  There is usually a large gap between
>>> +   the main executable and initially loaded shared objects.  Record
>>> +   the main executable separately, to increase the chance that the
>>> +   range for the non-closeable mappings below covers only the shared
>>> +   objects (and not also the gap between main executable and shared
>>> +   objects).  */
>>> +static uintptr_t _dl_eh_main_map_start attribute_relro;
>>> +static struct dl_eh_frame_info _dl_eh_main_info attribute_relro;
>>> +
>>> +/* Data for initally loaded shared objects that cannot be unlaoded.
>>
>> s/initally/initially and s/unlaoded/unloaded.
> 
> Fixed.
> 
>>
>>> +   The mapping base addresses are stored in address order in the
>>> +   _dl_eh_nodelete_mappings_bases array (containing
>>> +   _dl_eh_nodelete_mappings_size elements).  The EH data for a base
>>> +   address is stored in the parallel _dl_eh_nodelete_mappings_infos.
>>> +   These arrays are not modified after initialization.  */
>>> +static uintptr_t _dl_eh_nodelete_mappings_end attribute_relro;
>>> +static size_t _dl_eh_nodelete_mappings_size attribute_relro;
>>> +static uintptr_t *_dl_eh_nodelete_mappings_bases attribute_relro;
>>> +static struct dl_eh_frame_info *_dl_eh_nodelete_mappings_infos
>>> +  attribute_relro;
>>> +
>>> +/* Mappings created by dlopen can go away with dlclose, so a data
>>> +   dynamic data structure with some synchronization is needed.
>>
>> This sounds strange ("a data dynamic data").
> 
> I dropped the first data.
> 
>>
>>> +   Individual segments are similar to the _dl_eh_nodelete_mappings
>>
>> Maybe use _dl_eh_nodelete_mappings_*, because '_dl_eh_nodelete_mappings'
>> itself if not defined anywhere.
> 
> Right.
> 
>>> +   Adding new elements to this data structure is another source of
>>> +   quadratic behavior for dlopen.  If the other causes of quadratic
>>> +   behavior are eliminated, a more complicated data structure will be
>>> +   needed.  */
>>
>> This worries me, specially we have reports that python and other dynamic
>> environments do use a lot of plugin and generates a lot of dlopen() calls.
>> What kind of performance implication do you foresee here?
> 
> The additional overhead is not disproportionate to the other sources of
> quadratic behavior.  With 1,000 dlopen'ed objects, overall run-time
> seems to be comparable to the strcmp time required soname matching, for
> example, and is quite difficult to measure.  So we could fix the
> performance regression if we used a hash table for that …
> 
> It's just an undesirable complexity class.  The implementation is not
> actually slow because it's a mostly-linear copy (although a backwards
> one).  Other parts of dlopen involve pointer chasing and are much
> slower.

Right, I agree this should probably won't incur in performance issues,
I was curious if you have any numbers about it.

> 
>>> +/* Allocate an empty segment that is at least SIZE large.  PREVIOUS */
>>
>> What this PREVIOUS refer to?
> 
> Oops, it's now:
> 
> /* Allocate an empty segment that is at least SIZE large.  PREVIOUS
>points to the chain of previously allocated segments and can be
>NULL.  */
> 
>>> +/* Update the version to reflect that an update is happening.  This
>>> +   does not change the bit that controls the active segment chain.
>>> +   Returns the index of the currently active segment chain.  */
>>> +static inline unsigned int
>>> +_dl_eh_mappings_begin_update (void)
>>> +{
>>> +  unsigned int v
>>> += __atomic_wide_counter_fetch_add_relaxed 
>>> (&_dl_eh_loaded_mappings_version,
>>> +   2);
>>
>> Why use an 'unsigned int' for the wide counter here?
> 
> Because …
> 
>>> +  /* Subsequent stores to the TM data must not be reordered before the
>>> + store above with the version update.  */
>>> +  atomic_thread_fence_release ();
>>> +  return v & 1;
>>> +}
> 
> … we only need the lower bit.

Ack, I guess it won't matter to compiler.

> 
>>> +  /* Other initially loaded objects.  */
>>> +  if (pc >= *_dl_eh_nodelete_mappings_bases
>>> +  && pc < _dl_eh_nodelete_mappings_end)
>>> +{
>>> +  size_t idx = _dl_eh_find_lower_bound (pc,
>>> +

Re: [PATCH 3/3] elf: Add _dl_find_eh_frame function

2021-11-16 Thread Adhemerval Zanella via Gcc-patches



On 03/11/2021 13:28, Florian Weimer via Gcc-patches wrote:
> This function is similar to __gnu_Unwind_Find_exidx as used on arm.
> It can be used to speed up the libgcc unwinder.

Besides the terse patch description, the design seems ok to accomplish the
lock-free read and update.  There are some question and remarks below,
and I still need to revise the tests.

However the code is somewhat complex and I would like to have some feedback
if gcc will be willing to accept this change (I assume it would require
this code merge on glibc beforehand).

> ---
>  NEWS  |   4 +
>  bits/dlfcn_eh_frame.h |  33 +
>  dlfcn/Makefile|   2 +-
>  dlfcn/dlfcn.h |   2 +
>  elf/Makefile  |  31 +-
>  elf/Versions  |   3 +
>  elf/dl-close.c|   4 +
>  elf/dl-find_eh_frame.c| 864 ++
>  elf/dl-find_eh_frame.h|  90 ++
>  elf/dl-find_eh_frame_slow.h   |  55 ++
>  elf/dl-libc_freeres.c |   2 +
>  elf/dl-open.c |   5 +
>  elf/rtld.c|   7 +
>  elf/tst-dl_find_eh_frame-mod1.c   |  10 +
>  elf/tst-dl_find_eh_frame-mod2.c   |  10 +
>  elf/tst-dl_find_eh_frame-mod3.c   |  10 +
>  elf/tst-dl_find_eh_frame-mod4.c   |  10 +
>  elf/tst-dl_find_eh_frame-mod5.c   |  11 +
>  elf/tst-dl_find_eh_frame-mod6.c   |  11 +
>  elf/tst-dl_find_eh_frame-mod7.c   |  10 +
>  elf/tst-dl_find_eh_frame-mod8.c   |  10 +
>  elf/tst-dl_find_eh_frame-mod9.c   |  10 +
>  elf/tst-dl_find_eh_frame-threads.c| 237 +
>  elf/tst-dl_find_eh_frame.c| 179 
>  include/atomic_wide_counter.h |  14 +
>  include/bits/dlfcn_eh_frame.h |   1 +
>  include/link.h|   3 +
>  manual/Makefile   |   2 +-
>  manual/dynlink.texi   |  69 ++
>  manual/libdl.texi |  10 -
>  manual/probes.texi|   2 +-
>  manual/threads.texi   |   2 +-
>  sysdeps/i386/bits/dlfcn_eh_frame.h|  34 +
>  sysdeps/mach/hurd/i386/ld.abilist |   1 +
>  sysdeps/nios2/bits/dlfcn_eh_frame.h   |  34 +
>  sysdeps/unix/sysv/linux/aarch64/ld.abilist|   1 +
>  sysdeps/unix/sysv/linux/alpha/ld.abilist  |   1 +
>  sysdeps/unix/sysv/linux/arc/ld.abilist|   1 +
>  sysdeps/unix/sysv/linux/arm/be/ld.abilist |   1 +
>  sysdeps/unix/sysv/linux/arm/le/ld.abilist |   1 +
>  sysdeps/unix/sysv/linux/csky/ld.abilist   |   1 +
>  sysdeps/unix/sysv/linux/hppa/ld.abilist   |   1 +
>  sysdeps/unix/sysv/linux/i386/ld.abilist   |   1 +
>  sysdeps/unix/sysv/linux/ia64/ld.abilist   |   1 +
>  .../unix/sysv/linux/m68k/coldfire/ld.abilist  |   1 +
>  .../unix/sysv/linux/m68k/m680x0/ld.abilist|   1 +
>  sysdeps/unix/sysv/linux/microblaze/ld.abilist |   1 +
>  .../unix/sysv/linux/mips/mips32/ld.abilist|   1 +
>  .../sysv/linux/mips/mips64/n32/ld.abilist |   1 +
>  .../sysv/linux/mips/mips64/n64/ld.abilist |   1 +
>  sysdeps/unix/sysv/linux/nios2/ld.abilist  |   1 +
>  .../sysv/linux/powerpc/powerpc32/ld.abilist   |   1 +
>  .../linux/powerpc/powerpc64/be/ld.abilist |   1 +
>  .../linux/powerpc/powerpc64/le/ld.abilist |   1 +
>  sysdeps/unix/sysv/linux/riscv/rv32/ld.abilist |   1 +
>  sysdeps/unix/sysv/linux/riscv/rv64/ld.abilist |   1 +
>  .../unix/sysv/linux/s390/s390-32/ld.abilist   |   1 +
>  .../unix/sysv/linux/s390/s390-64/ld.abilist   |   1 +
>  sysdeps/unix/sysv/linux/sh/be/ld.abilist  |   1 +
>  sysdeps/unix/sysv/linux/sh/le/ld.abilist  |   1 +
>  .../unix/sysv/linux/sparc/sparc32/ld.abilist  |   1 +
>  .../unix/sysv/linux/sparc/sparc64/ld.abilist  |   1 +
>  sysdeps/unix/sysv/linux/x86_64/64/ld.abilist  |   1 +
>  sysdeps/unix/sysv/linux/x86_64/x32/ld.abilist |   1 +
>  64 files changed, 1795 insertions(+), 16 deletions(-)
>  create mode 100644 bits/dlfcn_eh_frame.h
>  create mode 100644 elf/dl-find_eh_frame.c
>  create mode 100644 elf/dl-find_eh_frame.h
>  create mode 100644 elf/dl-find_eh_frame_slow.h
>  create mode 100644 elf/tst-dl_find_eh_frame-mod1.c
>  create mode 100644 elf/tst-dl_find_eh_frame-mod2.c
>  create mode 100644 elf/tst-dl_find_eh_frame-mod3.c
>  create mode 100644 elf/tst-dl_find_eh_frame-mod4.c
>  create mode 100644 elf/tst-dl_find_eh_frame-mod5.c
>  create mode 100644 elf/tst-dl_find_eh_frame-mod6.c
>  create mode 100644 elf/tst-dl_find_eh_frame-mod7.c
>  create mode 100644 elf/tst-dl_find_eh_frame-mod8.c
>  create mode 100644 elf/tst-dl_find_eh_frame-mod9.c
>  create mode 100644 

Re: [PATCH 2/3] elf: Introduce GLRO (dl_libc_freeres), called from __libc_freeres

2021-11-15 Thread Adhemerval Zanella via Gcc-patches
Maybe add a comment why this is will be used.

Reviewed-by: Adhemerval Zanella  

On 03/11/2021 13:27, Florian Weimer via Gcc-patches wrote:
> ---
>  elf/Makefile   |  2 +-
>  elf/dl-libc_freeres.c  | 24 
>  elf/rtld.c |  1 +
>  malloc/set-freeres.c   |  5 +
>  sysdeps/generic/ldsodefs.h |  7 +++
>  5 files changed, 38 insertions(+), 1 deletion(-)
>  create mode 100644 elf/dl-libc_freeres.c
> 
> diff --git a/elf/Makefile b/elf/Makefile
> index cb9bcfb799..1c768bdf47 100644
> --- a/elf/Makefile
> +++ b/elf/Makefile
> @@ -68,7 +68,7 @@ elide-routines.os = $(all-dl-routines) dl-support 
> enbl-secure dl-origin \
>  rtld-routines= rtld $(all-dl-routines) dl-sysdep dl-environ 
> dl-minimal \
>dl-error-minimal dl-conflict dl-hwcaps dl-hwcaps_split dl-hwcaps-subdirs \
>dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu \
> -  dl-mutex
> +  dl-mutex dl-libc_freeres
>  all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
>  
>  CFLAGS-dl-runtime.c += -fexceptions -fasynchronous-unwind-tables

Ok.

> diff --git a/elf/dl-libc_freeres.c b/elf/dl-libc_freeres.c
> new file mode 100644
> index 00..68f305a6f9
> --- /dev/null
> +++ b/elf/dl-libc_freeres.c
> @@ -0,0 +1,24 @@
> +/* Deallocating malloc'ed memory from the dynamic loader.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   .  */
> +
> +#include 
> +
> +void
> +__rtld_libc_freeres (void)
> +{
> +}

Ok.

> diff --git a/elf/rtld.c b/elf/rtld.c
> index be2d5d8e74..847141e21d 100644
> --- a/elf/rtld.c
> +++ b/elf/rtld.c
> @@ -378,6 +378,7 @@ struct rtld_global_ro _rtld_global_ro attribute_relro =
>  ._dl_catch_error = _rtld_catch_error,
>  ._dl_error_free = _dl_error_free,
>  ._dl_tls_get_addr_soft = _dl_tls_get_addr_soft,
> +._dl_libc_freeres = __rtld_libc_freeres,
>  #ifdef HAVE_DL_DISCOVER_OSVERSION
>  ._dl_discover_osversion = _dl_discover_osversion
>  #endif

Ok.

> diff --git a/malloc/set-freeres.c b/malloc/set-freeres.c
> index 5c19a2725c..856ff7831f 100644
> --- a/malloc/set-freeres.c
> +++ b/malloc/set-freeres.c
> @@ -21,6 +21,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "../nss/nsswitch.h"
>  #include "../libio/libioP.h"
> @@ -67,6 +68,10 @@ __libc_freeres (void)
>  
>call_function_static_weak (__libc_dlerror_result_free);
>  
> +#ifdef SHARED
> +  GLRO (dl_libc_freeres) ();
> +#endif
> +
>for (p = symbol_set_first_element (__libc_freeres_ptrs);
> !symbol_set_end_p (__libc_freeres_ptrs, p); ++p)
>  free (*p);

OK.

> diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
> index 1318c36dce..c26860430c 100644
> --- a/sysdeps/generic/ldsodefs.h
> +++ b/sysdeps/generic/ldsodefs.h
> @@ -712,6 +712,10 @@ struct rtld_global_ro
>   namespace.  */
>void (*_dl_error_free) (void *);
>void *(*_dl_tls_get_addr_soft) (struct link_map *);
> +
> +  /* Called from __libc_shared to deallocate malloc'ed memory.  */
> +  void (*_dl_libc_freeres) (void);
> +
>  #ifdef HAVE_DL_DISCOVER_OSVERSION
>int (*_dl_discover_osversion) (void);
>  #endif
> @@ -1416,6 +1420,9 @@ __rtld_mutex_init (void)
>  }
>  #endif /* !PTHREAD_IN_LIBC */
>  
> +/* Implementation of GL (dl_libc_freeres).  */
> +void __rtld_libc_freeres (void) attribute_hidden;
> +
>  void __thread_gscope_wait (void) attribute_hidden;
>  # define THREAD_GSCOPE_WAIT() __thread_gscope_wait ()
>  
> 

OK.


Re: [PATCH 1/3] nptl: Extract from pthread_cond_common.c

2021-11-15 Thread Adhemerval Zanella via Gcc-patches



On 03/11/2021 13:27, Florian Weimer via Libc-alpha wrote:
> And make it an installed header.  This addresses a few aliasing
> violations (which do not seem to result in miscompilation due to
> the use of atomics), and also enables use of wide counters in other
> parts of the library.
> 
> The debug output in nptl/tst-cond22 has been adjusted to print
> the 32-bit values instead because it avoids a big-endian/little-endian
> difference.

LGTM, thanks.

Reviewed-by: Adhemerval Zanella  

> ---
>  bits/atomic_wide_counter.h  |  35 
>  include/atomic_wide_counter.h   |  89 +++
>  include/bits/atomic_wide_counter.h  |   1 +
>  misc/Makefile   |   3 +-
>  misc/atomic_wide_counter.c  | 127 +++
>  nptl/Makefile   |  13 +-
>  nptl/pthread_cond_common.c  | 204 
>  nptl/tst-cond22.c   |  14 +-
>  sysdeps/nptl/bits/thread-shared-types.h |  22 +--
>  9 files changed, 310 insertions(+), 198 deletions(-)
>  create mode 100644 bits/atomic_wide_counter.h
>  create mode 100644 include/atomic_wide_counter.h
>  create mode 100644 include/bits/atomic_wide_counter.h
>  create mode 100644 misc/atomic_wide_counter.c
> 
> diff --git a/bits/atomic_wide_counter.h b/bits/atomic_wide_counter.h
> new file mode 100644
> index 00..0687eb554e
> --- /dev/null
> +++ b/bits/atomic_wide_counter.h
> @@ -0,0 +1,35 @@
> +/* Monotonically increasing wide counters (at least 62 bits).
> +   Copyright (C) 2016-2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   .  */
> +
> +#ifndef _BITS_ATOMIC_WIDE_COUNTER_H
> +#define _BITS_ATOMIC_WIDE_COUNTER_H
> +
> +/* Counter that is monotonically increasing (by less than 2**31 per
> +   increment), with a single writer, and an arbitrary number of
> +   readers.  */
> +typedef union
> +{
> +  __extension__ unsigned long long int __value64;
> +  struct
> +  {
> +unsigned int __low;
> +unsigned int __high;
> +  } __value32;
> +} __atomic_wide_counter;
> +
> +#endif /* _BITS_ATOMIC_WIDE_COUNTER_H */

Ok, it would be included in multiple places so we can't tie to a specific
header.

> diff --git a/include/atomic_wide_counter.h b/include/atomic_wide_counter.h
> new file mode 100644
> index 00..31f009d5e6
> --- /dev/null
> +++ b/include/atomic_wide_counter.h
> @@ -0,0 +1,89 @@
> +/* Monotonically increasing wide counters (at least 62 bits).
> +   Copyright (C) 2016-2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   .  */
> +
> +#ifndef _ATOMIC_WIDE_COUNTER_H
> +#define _ATOMIC_WIDE_COUNTER_H
> +
> +#include 
> +#include 
> +
> +#if __HAVE_64B_ATOMICS
> +
> +static inline uint64_t
> +__atomic_wide_counter_load_relaxed (__atomic_wide_counter *c)
> +{
> +  return atomic_load_relaxed (>__value64);
> +}
> +
> +static inline uint64_t
> +__atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c,
> + unsigned int val)
> +{
> +  return atomic_fetch_add_relaxed (>__value64, val);
> +}
> +
> +static inline uint64_t
> +__atomic_wide_counter_fetch_add_acquire (__atomic_wide_counter *c,
> + unsigned int val)
> +{
> +  return atomic_fetch_add_acquire (>__value64, val);
> +}
> +
> +static inline void
> +__atomic_wide_counter_add_relaxed (__atomic_wide_counter *c,
> +   unsigned int val)
> +{
> +  atomic_store_relaxed (>__value64,
> +