date:20210817

[PATCH 2/3] driver: for_each_pass: Pass to callback whether dir is machine-disambiguated

2021-08-17 Thread John Ericson

We will use this in the subsequent diff to control what basenames we
search for. In machine-specific subdirectories, we should just look for
the original basename, but in machine-agnostic subdirectories, we might
additionally look for prefixed disambiguated names, as an alternate
method of keeping targets apart.
---
 gcc/gcc.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/gcc.c b/gcc/gcc.c
index 710cbfe9a66..f32c7a8de46 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -2766,7 +2766,7 @@ static void *
 for_each_path (const struct path_prefix *paths,
   bool do_multi,
   size_t extra_space,
-  void *(*callback) (char *, void *),
+  void *(*callback) (char *, bool, void *),
   void *callback_info)
 {
   struct prefix_list *pl;
@@ -2827,7 +2827,7 @@ for_each_path (const struct path_prefix *paths,
  if (!skip_multi_dir)
{
  memcpy (path + len, multi_suffix, suffix_len + 1);
- ret = callback (path, callback_info);
+ ret = callback (path, true, callback_info);
  if (ret)
break;
}
@@ -2838,7 +2838,7 @@ for_each_path (const struct path_prefix *paths,
  && pl->require_machine_suffix == 2)
{
  memcpy (path + len, just_multi_suffix, just_suffix_len + 1);
- ret = callback (path, callback_info);
+ ret = callback (path, true, callback_info);
  if (ret)
break;
}
@@ -2848,7 +2848,7 @@ for_each_path (const struct path_prefix *paths,
  && !pl->require_machine_suffix && multiarch_dir)
{
  memcpy (path + len, multiarch_suffix, multiarch_len + 1);
- ret = callback (path, callback_info);
+ ret = callback (path, true, callback_info);
  if (ret)
break;
}
@@ -2876,7 +2876,7 @@ for_each_path (const struct path_prefix *paths,
  else
path[len] = '\0';
 
- ret = callback (path, callback_info);
+ ret = callback (path, false, callback_info);
  if (ret)
break;
}
@@ -2931,7 +2931,7 @@ struct add_to_obstack_info {
 };
 
 static void *
-add_to_obstack (char *path, void *data)
+add_to_obstack (char *path, bool, void *data)
 {
   struct add_to_obstack_info *info = (struct add_to_obstack_info *) data;
 
@@ -3023,7 +3023,7 @@ struct file_at_path_info {
 };
 
 static void *
-file_at_path (char *path, void *data)
+file_at_path (char *path, bool, void *data)
 {
   struct file_at_path_info *info = (struct file_at_path_info *) data;
   size_t len = strlen (path);
@@ -3074,7 +3074,7 @@ find_a_file (const struct path_prefix *pprefix, const 
char *name, int mode,
path. Like file_at_path but tries machine prefix and exe suffix too. */
 
 static void *
-program_at_path (char *path, void *data)
+program_at_path (char *path, bool machine_specific, void *data)
 {
   /* try first with machine-prefixed name */
   struct file_at_path_info *info = (struct file_at_path_info *) data;
@@ -5945,7 +5945,7 @@ struct spec_path_info {
 };
 
 static void *
-spec_path (char *path, void *data)
+spec_path (char *path, bool, void *data)
 {
   struct spec_path_info *info = (struct spec_path_info *) data;
   size_t len = 0;
-- 
2.31.1

[PATCH 1/3] find_a_program: First search with machine prefix

2021-08-17 Thread John Ericson

This matches the behavior of Clang, and makes it easier to work with
cross compilers without heeding to hard-code paths at build time.
---
 gcc/gcc.c | 78 ---
 1 file changed, 68 insertions(+), 10 deletions(-)

diff --git a/gcc/gcc.c b/gcc/gcc.c
index 1a74bf92f7a..710cbfe9a66 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -1582,6 +1582,11 @@ static const char *machine_suffix = 0;
 
 static const char *just_machine_suffix = 0;
 
+/* Prefix to attach to *basename* of commands being searched.
+   This is just `MACHINE-'.  */
+
+static const char *just_machine_prefix = 0;
+
 /* Adjusted value of GCC_EXEC_PREFIX envvar.  */
 
 static const char *gcc_exec_prefix;
@@ -3026,15 +3031,6 @@ file_at_path (char *path, void *data)
   memcpy (path + len, info->name, info->name_len);
   len += info->name_len;
 
-  /* Some systems have a suffix for executable files.
- So try appending that first.  */
-  if (info->suffix_len)
-{
-  memcpy (path + len, info->suffix, info->suffix_len + 1);
-  if (access_check (path, info->mode) == 0)
-   return path;
-}
-
   path[len] = '\0';
   if (access_check (path, info->mode) == 0)
 return path;
@@ -3074,12 +3070,52 @@ find_a_file (const struct path_prefix *pprefix, const 
char *name, int mode,
file_at_path, &info);
 }
 
+/* Callback for find_a_program.  Appends the file name to the directory
+   path. Like file_at_path but tries machine prefix and exe suffix too. */
+
+static void *
+program_at_path (char *path, void *data)
+{
+  /* try first with machine-prefixed name */
+  struct file_at_path_info *info = (struct file_at_path_info *) data;
+  size_t path_len = strlen (path);
+
+  for (auto prefix : { just_machine_prefix, "" })
+{
+  auto len = path_len;
+
+  auto prefix_len = strlen(prefix);
+  memcpy (path + len, prefix, prefix_len);
+  len += prefix_len;
+
+  memcpy (path + len, info->name, info->name_len);
+  len += info->name_len;
+
+  /* Some systems have a suffix for executable files.
+So try appending that first.  */
+  if (info->suffix_len)
+   {
+ memcpy (path + len, info->suffix, info->suffix_len + 1);
+ if (access_check (path, info->mode) == 0)
+   return path;
+   }
+
+  path[len] = '\0';
+  if (access_check (path, info->mode) == 0)
+   return path;
+}
+
+  return NULL;
+}
+
 /* Specialization of find_a_file for programs that also takes into account
configure-specified default programs. */
 
 static char*
 find_a_program (const char *name)
 {
+  const int mode = X_OK;
+
   /* Do not search if default matches query. */
 
 #ifdef DEFAULT_ASSEMBLER
@@ -3097,7 +3133,28 @@ find_a_program (const char *name)
 return xstrdup (DEFAULT_DSYMUTIL);
 #endif
 
-  return find_a_file (&exec_prefixes, name, X_OK, false);
+  /* Find the filename in question (special case for absolute paths).  */
+
+  if (IS_ABSOLUTE_PATH (name))
+{
+  if (access (name, mode) == 0)
+   return xstrdup (name);
+
+  return NULL;
+}
+
+  struct file_at_path_info info;
+
+  info.name = name;
+  info.suffix = HOST_EXECUTABLE_SUFFIX;
+  info.name_len = strlen (info.name);
+  info.suffix_len = strlen (info.suffix);
+  info.mode = mode;
+
+  return (char*) for_each_path (
+&exec_prefixes, false,
+info.name_len + info.suffix_len + strlen(just_machine_prefix),
+program_at_path, &info);
 }
 
 /* Ranking of prefixes in the sort list. -B prefixes are put before
@@ -8328,6 +8385,7 @@ driver::set_up_specs () const
   machine_suffix = concat (spec_host_machine, dir_separator_str, spec_version,
   accel_dir_suffix, dir_separator_str, NULL);
   just_machine_suffix = concat (spec_machine, dir_separator_str, NULL);
+  just_machine_prefix = concat (spec_machine, "-", NULL);
 
   specs_file = find_a_file (&startfile_prefixes, "specs", R_OK, true);
   /* Read the specs file unless it is a default one.  */
-- 
2.31.1

[PATCH 3/3] find_a_program: Only search for prefixed paths in undisambiguated dirs

2021-08-17 Thread John Ericson

This means, we might search for:

- path/$machine/$version/prog
- path/$machine/prog
- path/$machine-prog

But not

- path/$machine/$version/$machine-prog

because disambiguating $machine twice is unnecessary.

This does mean we less liberal in what we accept than LLVM, but that's
OK. The down side of always Postel's law is everyone converges on
accepting all sorts of garbage, which makes debugging end-to-end hard
when mistakes are not caught early.
---
 gcc/gcc.c | 25 -
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/gcc/gcc.c b/gcc/gcc.c
index f32c7a8de46..7b6b89ac6e9 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -3080,15 +3080,9 @@ program_at_path (char *path, bool machine_specific, void 
*data)
   struct file_at_path_info *info = (struct file_at_path_info *) data;
   size_t path_len = strlen (path);
 
-  for (auto prefix : { just_machine_prefix, "" })
+  auto search = [=](size_t len) -> void *
 {
-  auto len = path_len;
-
-  auto prefix_len = strlen(prefix);
-  memcpy (path + len, prefix, prefix_len);
-  len += prefix_len;
-
-  memcpy (path + len, info->name, info->name_len);
+  memcpy (path + len, info->name, info->name_len + 1);
   len += info->name_len;
 
   /* Some systems have a suffix for executable files.
@@ -3103,9 +3097,22 @@ program_at_path (char *path, bool machine_specific, void 
*data)
   path[len] = '\0';
   if (access_check (path, info->mode) == 0)
return path;
+
+  return NULL;
+};
+
+  /* Additionally search for $target-prog in machine-agnostic dirs, as an
+ additional way to disambiguate targets. Do not do this in machine-specific
+ dirs because so further disambiguation is needed. */
+  if (!machine_specific)
+{
+  auto prefix_len = strlen(just_machine_prefix);
+  memcpy (path + path_len, just_machine_prefix, prefix_len);
+  auto res = search(path_len + prefix_len);
+  if (res) return res;
 }
 
-  return NULL;
+  return search(path_len);
 }
 
 /* Specialization of find_a_file for programs that also takes into account
-- 
2.31.1

Re: Optional machine prefix for programs in for -B dirs

2021-08-17 Thread John Ericson

OK I have polished off my code in light of previous discussion and will
submit it in follow-up emails.

As mentioned before, this patch series is on top of the
non-behavior-changing cleanup I previously submitted in
https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576725.html

The first patch implements my original approach of always searching for
`$machine-prog`. The next two patches refine that approach by only
searching for `$machine-prog` in directories that are not already
machine-disambiguated, as discussed. I wanted to include this fuller
history to allow both approaches to be compared, but if desired I am
happy to submit a v2 patch set with a more condensed history for
whichever option is chosen.

Thanks,

John

Re: [PATCH] [i386] Add x86 tune to enable v2df vector reduction by paddpd.

2021-08-17 Thread Hongtao Liu via Gcc-patches

On Tue, Aug 17, 2021 at 5:06 PM liuhongt  wrote:
>
> Hi:
>   This patch add a new x86 tune named X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD
> to enable haddpd for v2df vector reduction, the tune is disabled by default.
>
>   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
>   Ok for trunk?
>
Pushed to trunk.
> gcc/ChangeLog:
>
> PR target/97147
> * config/i386/i386.h (TARGET_V2DF_REDUCTION_PREFER_HADDPD):
> New macro.
> * config/i386/sse.md (*sse3_haddv2df3_low): Add
> TARGET_V2DF_REDUCTION_PREFER_HADDPD.
> (*sse3_hsubv2df3_low): Ditto.
> * config/i386/x86-tune.def
> (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD): New tune.
>
> gcc/testsuite/ChangeLog:
>
> PR target/97147
> * gcc.target/i386/pr54400.c: Adjust testcase.
> * gcc.target/i386/pr94147.c: New test.
> ---
>  gcc/config/i386/i386.h  |  2 ++
>  gcc/config/i386/sse.md  |  4 ++--
>  gcc/config/i386/x86-tune.def|  5 +
>  gcc/testsuite/gcc.target/i386/pr54400.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pr94147.c | 22 ++
>  5 files changed, 32 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr94147.c
>
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 21fe51bba40..b3e57a83846 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -418,6 +418,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
> ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
>  #define TARGET_EXPAND_ABS \
> ix86_tune_features[X86_TUNE_EXPAND_ABS]
> +#define TARGET_V2DF_REDUCTION_PREFER_HADDPD \
> +   ix86_tune_features[X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD]
>
>  /* Feature tests against the various architecture variations.  */
>  enum ix86_arch_indices {
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 27e25cc7952..13889687793 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -2771,7 +2771,7 @@ (define_insn "*sse3_haddv2df3_low"
>   (vec_select:DF
> (match_dup 1)
> (parallel [(match_operand:SI 3 "const_0_to_1_operand")]]
> -  "TARGET_SSE3
> +  "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD
> && INTVAL (operands[2]) != INTVAL (operands[3])"
>"@
> haddpd\t{%0, %0|%0, %0}
> @@ -2790,7 +2790,7 @@ (define_insn "*sse3_hsubv2df3_low"
>   (vec_select:DF
> (match_dup 1)
> (parallel [(const_int 1)]]
> -  "TARGET_SSE3"
> +  "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD"
>"@
> hsubpd\t{%0, %0|%0, %0}
> vhsubpd\t{%1, %1, %0|%0, %1, %1}"
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index eb057a67750..8f55da89c92 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -452,6 +452,11 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, 
> "avoid_fma_chains", m_ZNVER)
> smaller FMA chain.  */
>  DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | 
> m_ZNVER3)
>
> +/* X86_TUNE_V2DF_REDUCTION_PREFER_PHADDPD: Prefer haddpd
> +   for v2df vector reduction.  */
> +DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
> + "v2df_reduction_prefer_haddpd", m_NONE)
> +
>  
> /*/
>  /* AVX instruction selection tuning (some of SSE flags affects AVX, too) 
> */
>  
> /*/
> diff --git a/gcc/testsuite/gcc.target/i386/pr54400.c 
> b/gcc/testsuite/gcc.target/i386/pr54400.c
> index 5ed5ba06644..3a450376b9e 100644
> --- a/gcc/testsuite/gcc.target/i386/pr54400.c
> +++ b/gcc/testsuite/gcc.target/i386/pr54400.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
> +/* { dg-options "-O2 -msse3 -mfpmath=sse 
> -mtune-ctrl=v2df_reduction_prefer_haddpd" } */
>
>  #include 
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr94147.c 
> b/gcc/testsuite/gcc.target/i386/pr94147.c
> new file mode 100644
> index 000..8ff5c34834f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr94147.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
> +
> +#include 
> +
> +double f (__m128d p)
> +{
> +  return p[0] - p[1];
> +}
> +
> +double g1 (__m128d p)
> +{
> +  return p[0] + p[1];
> +}
> +
> +double g2 (__m128d p)
> +{
> +  return p[1] + p[0];
> +}
> +
> +/* { dg-final { scan-assembler-not "hsubpd" } } */
> +/* { dg-final { scan-assembler-not "haddpd" } } */
> --
> 2.18.1
>


-- 
BR,
Hongtao

Re: [PATCH] move x86 to use gather/scatter internal functions

2021-08-17 Thread Hongtao Liu via Gcc-patches

On Wed, Aug 18, 2021 at 11:24 AM Hongtao Liu  wrote:
>
> On Tue, Aug 17, 2021 at 10:43 PM Richard Biener via Gcc-patches
>  wrote:
> >
> > On Tue, Aug 17, 2021 at 3:29 PM Richard Biener via Gcc-patches
> >  wrote:
> > >
> > > This is an attempt to start moving the x86 backend to use
> > > standard pattern names for [mask_]gather_load and [mask_]scatter_store
> > > rather than using the builtin_{gather,scatter} target hooks.
> > >
> > > I've started with AVX2 gathers and given x86 only supports masked
> > > gather I only implemented mask_gather_load.  Note while for
> > > the builtin_gather case the vectorizer will provide an all-true
> > > mask operand for non-masked gathers this capability does not
> > > exist for the IFN path yet, so only testcases with actual masked
> > > gathers will work.
> > >
> > > If this looks reasonable on the backend side I'll see to first
> > > complete the vectorizer part, ripping out the target hook and
> > > arranging for the missing pieces.  Another one is the support
> > > for SImode indices with DFmode data which requires unpacking
> > > the index vector and actually recognizing the IFN.
> > >
> > > 2021-08-17  Richard Biener  
> > >
> > > * tree-vect-data-refs.c (vect_check_gather_scatter):
> > > Always use internal functions.
> > > * config/i386/sse.md
> > > (mask_gather_load): New expander.
> > > (mask_gather_load): Likewise.
> > > ---
> > >  gcc/config/i386/sse.md| 56 +++
> > >  gcc/tree-vect-data-refs.c |  4 +--
> > >  2 files changed, 57 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > > index 3957c86c3df..40bec98d9f7 100644
> > > --- a/gcc/config/i386/sse.md
> > > +++ b/gcc/config/i386/sse.md
> > > @@ -23232,12 +23232,22 @@
> > >(V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
> > >(V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
> > >(V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
> > > +(define_mode_attr vec_gather_idxsi
> > > + [(V2DI "v4si") (V4DI "v4si") (V8DI "v8si")
> > > +  (V2DF "v4si") (V4DF "v4si") (V8DF "v8si")
> > > +  (V4SI "v4si") (V8SI "v8si") (V16SI "v16si")
> > > +  (V4SF "v4si") (V8SF "v8si") (V16SF "v16si")])
> > >
> > >  (define_mode_attr VEC_GATHER_IDXDI
> > >   [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
> > >(V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
> > >(V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
> > >(V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
> > > +(define_mode_attr vec_gather_idxdi
> > > + [(V2DI "v2di") (V4DI "v4di") (V8DI "v8di")
> > > +  (V2DF "v2di") (V4DF "v4di") (V8DF "v8di")
> > > +  (V4SI "v2di") (V8SI "v4di") (V16SI "v8di")
> > > +  (V4SF "v2di") (V8SF "v4di") (V16SF "v8di")])
> > >
> > >  (define_mode_attr VEC_GATHER_SRCDI
> > >   [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
> > > @@ -23245,6 +23255,29 @@
> > >(V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
> > >(V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
> > >
> > > +(define_expand "mask_gather_load"
> > > +  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
> > > +  (unspec:VEC_GATHER_MODE
> > > +[(pc)
> > > + (mem:
> > > +   (match_par_dup 6
> > > + [(match_operand 1 "vsib_address_operand")
> > > +  (match_operand:
> > > + 2 "register_operand")
> > > +  (match_operand:SI 4 "const1248_operand")
> > > +  (match_operand:SI 3 "const0_operand")]))
> > > + (mem:BLK (scratch))
> > > + (match_operand: 5 
> > > "register_operand")]
> >
> > One problem of these is that when AVX512[VL] is enabled we get a AVX512 mask
> > mode here and while the internal function expansion check succeeds (it
> > never checks
> > the mask operand!), RTL expansion fails unexpectedly because of this 
> > mismatch.
> >
> > I suppose a more complicated define_mode_attr might do the trick or do I
> I don't think define_mode_attr supports conditional selection based on
> target feature.
> > need to add && !TARGET_AVX512F to these expanders?
> There'll be a duplicated definition of  mask_loadv8sfv8si for avx512
> and no-avx512 versions.
Note gather_loadmn can be shared by avx512 and non-avx512 version, we
can handle mask mode in the preparation statements based on target
feature.
> >
> The best way is like maskloadmn to accept different mask modes in its
> name, but that may create too much complexity in the middle-end to
> choose between avx2 mask_gather_

Re: [PATCH] move x86 to use gather/scatter internal functions

2021-08-17 Thread Hongtao Liu via Gcc-patches

On Tue, Aug 17, 2021 at 10:43 PM Richard Biener via Gcc-patches
 wrote:
>
> On Tue, Aug 17, 2021 at 3:29 PM Richard Biener via Gcc-patches
>  wrote:
> >
> > This is an attempt to start moving the x86 backend to use
> > standard pattern names for [mask_]gather_load and [mask_]scatter_store
> > rather than using the builtin_{gather,scatter} target hooks.
> >
> > I've started with AVX2 gathers and given x86 only supports masked
> > gather I only implemented mask_gather_load.  Note while for
> > the builtin_gather case the vectorizer will provide an all-true
> > mask operand for non-masked gathers this capability does not
> > exist for the IFN path yet, so only testcases with actual masked
> > gathers will work.
> >
> > If this looks reasonable on the backend side I'll see to first
> > complete the vectorizer part, ripping out the target hook and
> > arranging for the missing pieces.  Another one is the support
> > for SImode indices with DFmode data which requires unpacking
> > the index vector and actually recognizing the IFN.
> >
> > 2021-08-17  Richard Biener  
> >
> > * tree-vect-data-refs.c (vect_check_gather_scatter):
> > Always use internal functions.
> > * config/i386/sse.md
> > (mask_gather_load): New expander.
> > (mask_gather_load): Likewise.
> > ---
> >  gcc/config/i386/sse.md| 56 +++
> >  gcc/tree-vect-data-refs.c |  4 +--
> >  2 files changed, 57 insertions(+), 3 deletions(-)
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 3957c86c3df..40bec98d9f7 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -23232,12 +23232,22 @@
> >(V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
> >(V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
> >(V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
> > +(define_mode_attr vec_gather_idxsi
> > + [(V2DI "v4si") (V4DI "v4si") (V8DI "v8si")
> > +  (V2DF "v4si") (V4DF "v4si") (V8DF "v8si")
> > +  (V4SI "v4si") (V8SI "v8si") (V16SI "v16si")
> > +  (V4SF "v4si") (V8SF "v8si") (V16SF "v16si")])
> >
> >  (define_mode_attr VEC_GATHER_IDXDI
> >   [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
> >(V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
> >(V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
> >(V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
> > +(define_mode_attr vec_gather_idxdi
> > + [(V2DI "v2di") (V4DI "v4di") (V8DI "v8di")
> > +  (V2DF "v2di") (V4DF "v4di") (V8DF "v8di")
> > +  (V4SI "v2di") (V8SI "v4di") (V16SI "v8di")
> > +  (V4SF "v2di") (V8SF "v4di") (V16SF "v8di")])
> >
> >  (define_mode_attr VEC_GATHER_SRCDI
> >   [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
> > @@ -23245,6 +23255,29 @@
> >(V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
> >(V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
> >
> > +(define_expand "mask_gather_load"
> > +  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
> > +  (unspec:VEC_GATHER_MODE
> > +[(pc)
> > + (mem:
> > +   (match_par_dup 6
> > + [(match_operand 1 "vsib_address_operand")
> > +  (match_operand:
> > + 2 "register_operand")
> > +  (match_operand:SI 4 "const1248_operand")
> > +  (match_operand:SI 3 "const0_operand")]))
> > + (mem:BLK (scratch))
> > + (match_operand: 5 "register_operand")]
>
> One problem of these is that when AVX512[VL] is enabled we get a AVX512 mask
> mode here and while the internal function expansion check succeeds (it
> never checks
> the mask operand!), RTL expansion fails unexpectedly because of this mismatch.
>
> I suppose a more complicated define_mode_attr might do the trick or do I
I don't think define_mode_attr supports conditional selection based on
target feature.
> need to add && !TARGET_AVX512F to these expanders?
There'll be a duplicated definition of  mask_loadv8sfv8si for avx512
and no-avx512 versions.
>
The best way is like maskloadmn to accept different mask modes in its
name, but that may create too much complexity in the middle-end to
choose between avx2 mask_gather_load and avx512 mask_gather_load.

> I've meanwhile posted a patch to make the vectorizer fall back to
> masked_ when non-masked_ variants are not available and that seems to work
> fine at least.
>
> Richard.
>
> > +UNSPEC_GATHER))
> > + (clobber (match_scratch:VEC_GATHER_MODE 7))])]
> > +  "TARGET_AVX2 && TARGET_USE_GATHER"
> > +{
> > +  operands[5] = gen_lowpart_SUB

Re: [PATCH] move x86 to use gather/scatter internal functions

2021-08-17 Thread Hongtao Liu via Gcc-patches

On Tue, Aug 17, 2021 at 10:43 PM Richard Biener via Gcc-patches
 wrote:
>
> On Tue, Aug 17, 2021 at 3:29 PM Richard Biener via Gcc-patches
>  wrote:
> >
> > This is an attempt to start moving the x86 backend to use
> > standard pattern names for [mask_]gather_load and [mask_]scatter_store
> > rather than using the builtin_{gather,scatter} target hooks.
> >
> > I've started with AVX2 gathers and given x86 only supports masked
> > gather I only implemented mask_gather_load.  Note while for
> > the builtin_gather case the vectorizer will provide an all-true
> > mask operand for non-masked gathers this capability does not
> > exist for the IFN path yet, so only testcases with actual masked
> > gathers will work.
> >
> > If this looks reasonable on the backend side I'll see to first
> > complete the vectorizer part, ripping out the target hook and
> > arranging for the missing pieces.  Another one is the support
> > for SImode indices with DFmode data which requires unpacking
> > the index vector and actually recognizing the IFN.
> >
> > 2021-08-17  Richard Biener  
> >
> > * tree-vect-data-refs.c (vect_check_gather_scatter):
> > Always use internal functions.
> > * config/i386/sse.md
> > (mask_gather_load): New expander.
> > (mask_gather_load): Likewise.
> > ---
> >  gcc/config/i386/sse.md| 56 +++
> >  gcc/tree-vect-data-refs.c |  4 +--
> >  2 files changed, 57 insertions(+), 3 deletions(-)
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 3957c86c3df..40bec98d9f7 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -23232,12 +23232,22 @@
> >(V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
> >(V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
> >(V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
> > +(define_mode_attr vec_gather_idxsi
> > + [(V2DI "v4si") (V4DI "v4si") (V8DI "v8si")
> > +  (V2DF "v4si") (V4DF "v4si") (V8DF "v8si")
> > +  (V4SI "v4si") (V8SI "v8si") (V16SI "v16si")
> > +  (V4SF "v4si") (V8SF "v8si") (V16SF "v16si")])
> >
> >  (define_mode_attr VEC_GATHER_IDXDI
> >   [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
> >(V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
> >(V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
> >(V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
> > +(define_mode_attr vec_gather_idxdi
> > + [(V2DI "v2di") (V4DI "v4di") (V8DI "v8di")
> > +  (V2DF "v2di") (V4DF "v4di") (V8DF "v8di")
> > +  (V4SI "v2di") (V8SI "v4di") (V16SI "v8di")
> > +  (V4SF "v2di") (V8SF "v4di") (V16SF "v8di")])
> >
> >  (define_mode_attr VEC_GATHER_SRCDI
> >   [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
> > @@ -23245,6 +23255,29 @@
> >(V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
> >(V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
> >
> > +(define_expand "mask_gather_load"
> > +  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
> > +  (unspec:VEC_GATHER_MODE
> > +[(pc)
> > + (mem:
> > +   (match_par_dup 6
> > + [(match_operand 1 "vsib_address_operand")
> > +  (match_operand:
> > + 2 "register_operand")
> > +  (match_operand:SI 4 "const1248_operand")
> > +  (match_operand:SI 3 "const0_operand")]))
> > + (mem:BLK (scratch))
> > + (match_operand: 5 "register_operand")]
>
> One problem of these is that when AVX512[VL] is enabled we get a AVX512 mask
> mode here and while the internal function expansion check succeeds (it
> never checks
> the mask operand!), RTL expansion fails unexpectedly because of this mismatch.
>
‘mask_gather_loadmn’
Like ‘gather_loadmn’, but takes an extra mask operand as operand 5. Bit i of
the mask is set if element i of the result should be loaded from memory and
clear if element i of the result should be set to zero.

According to the document, the mask here needs to be an avx512 mask,
would it be ok to use a vector mask?

> I suppose a more complicated define_mode_attr might do the trick or do I
> need to add && !TARGET_AVX512F to these expanders?
>
> I've meanwhile posted a patch to make the vectorizer fall back to
> masked_ when non-masked_ variants are not available and that seems to work
> fine at least

‘gather_loadmn’
Load several separate memory locations into a vector of mode m. Operand 1
is a scalar base address and operand 2 is a vector of mode n containing offsets
from that base. Operand 0 is a destination vector with the same number of
elements

Re: [PATCH] Adding target hook allows to reject initialization of register

2021-08-17 Thread Jojo R via Gcc-patches



— Jojo
在 2021年8月16日 +0800 PM3:15，Richard Biener ，写道：
> On Fri, Aug 13, 2021 at 3:59 AM Jojo R  wrote:
> >
> >
> > — Jojo
> > 在 2021年8月11日 +0800 PM6:44，Richard Biener ，写道：
> >
> > On Wed, Aug 11, 2021 at 11:28 AM Richard Sandiford
> >  wrote:
> >
> >
> > Richard Biener  writes:
> >
> > On Tue, Aug 10, 2021 at 10:33 AM Jojo R via Gcc-patches
> >  wrote:
> >
> >
> > Some target like RISC-V allow to group vector register as a whole,
> > and only operate part of it in fact, but the 'init-regs' pass will add 
> > initialization
> > for uninitialized registers. Add this hook to reject this action for 
> > reducing instruction.
> >
> >
> > Are these groups "visible"? That is, are the pseudos multi-reg
> > pseudos? I wonder
> > if there's a more generic way to tame down initregs w/o introducing a new 
> > target
> > hook.
> >
> > Btw, initregs is a red herring - it ideally should go away. See PR61810.
> >
> > So instead of adding to it can you see whether disabling the pass for RISC-V
> > works w/o fallout (and add a comment to the PR)? Maybe some more RTL
> > literate (in particular DF literate) can look at the remaining issue.
> > Richard, did you
> > ever have a look into the "issue" that initregs covers up (whatever
> > that exactly is)?
> >
> >
> > No, sorry. I don't really understand what it would be from the comment
> > in the code:
> >
> > [...] papers over some problems on the arm and other
> > processors where certain isa constraints cannot be handled by gcc.
> > These are of the form where two operands to an insn my not be the
> > same. The ra will only make them the same if they do not
> > interfere, and this can only happen if one is not initialized.
> >
> > That would definitely be an RA bug if true, since the constraints need
> > to be applied independently of dataflow information. But the comment
> > and code predate LRA and maybe no-one fancied poking around in reload
> > (hard to believe).
> >
> > I'd be very surprised if LRA gets this wrong.
> >
> >
> > OK, we're wondering since quite some time - how about changing the
> > gate of initregs to optimize > 0 && !targetm.lra_p ()? We'll hopefully
> > figure out the "real" issue the pass is papering over. At the same time
> > we're leaving old reload (and likely unmaintianed) targets unaffected.
> >
> > Richard,
> >
> > So this patch is not necessary ?
> >
> > I need to disable this pass in my situation only ?
> > I am afraid some side effect in my projects without this init-regs pass … 
> > ...
>
> Can you try disabling the pass on RISC-V?
Okay, I will do the test on GCC version 10.2, is it ok ?
It will take a few days :)

Or which version do you suggest to do this ?
> Richard.
>
> > Richard.
> >
> > Thanks,
> > Richard

Re: [PATCH] Revert "Add the member integer_to_sse to processor_cost as a cost simulation for movd/pinsrd. It will be used to calculate the cost of vec_construct."

2021-08-17 Thread Hongtao Liu via Gcc-patches

On Tue, Aug 17, 2021 at 8:56 PM H.J. Lu via Gcc-patches
 wrote:
>
> On Tue, Aug 17, 2021 at 5:43 AM liuhongt via Gcc-patches
>  wrote:
> >
> > This reverts commit 872da9a6f664a06d73c987aa0cb2e5b830158a10.
> >
> > PR target/101936
> > PR target/101929
> >
> >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
> >   Pushed to master.
> >
>
> I proposed a different approach earlier.   Will it make a difference?
No, with -mtune=icelake-server, here is dump

0x3d73360 _192 1 times scalar_store costs 12 in body
0x3d73360 _198 1 times scalar_store costs 12 in body
0x3d73360 _195 1 times scalar_store costs 12 in body
0x3d73360 _201 1 times scalar_store costs 12 in body
0x3d73360 t0_184 + t2_188 1 times scalar_stmt costs 4 in body
0x3d73360 t1_186 + t3_190 1 times scalar_stmt costs 4 in body
0x3d73360 t0_184 - t2_188 1 times scalar_stmt costs 4 in body
0x3d73360 t1_186 - t3_190 1 times scalar_stmt costs 4 in body
0x3d73360  1 times vec_construct costs 16 in prologue --> 4 * 4 = 16
0x3d73360  1 times vec_construct costs 16 in prologue --> 4 * 4 = 16
0x3d73360 t0_184 + t2_188 1 times vector_stmt costs 4 in body
0x3d73360 t1_186 - t3_190 1 times vector_stmt costs 4 in body
0x3d73360  1 times vec_perm costs 4 in body
0x3d73360 _192 1 times vector_store costs 16 in body
test.c:37:9: note: Cost model analysis for part in loop 0:
  Vector cost: 60
  Scalar cost: 64

even add 1 more cost(5 then), vector cost become 68 which will prevent
vectorization, and i remember youre proposal have vec_contruct cost as
8.
>
> --
> H.J.



-- 
BR,
Hongtao

[COMMITTED 3/3] Add GORI tracing faciltiies.

2021-08-17 Thread Andrew MacLeod via Gcc-patches


And this final patch provides tracing in the GORI component.

This is what I used to find the ABS problem with 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101938


The code sequence looked like:

     :
    a1_8 = -arg1_7(D);
    _1 = ABS_EXPR ;
    a2_10 = -_1;
    if (a1_8 > a2_10)
  goto ; [INV]

and the threader was threading a condition later on because the outgoing 
range for arg2_9 was being compared to   0x8000 and folded to [0,0] 
as it didnt think that could happen.


The GORi trace looked like:

237 GORI  outgoing_edge for arg2_9(D) on edge 2->3
238 GORI    compute op 2 (a2_10) at if (a1_8 > a2_10)
    GORI  LHS = _Bool [1, 1], a1_8 = int64 VARYING
    GORI  Computes a2_10 = int64 [-INF, 9223372036854775806] 
intersect Known range : int64 VARYING
    GORI    TRUE : (238)  produces  (a2_10) int64 [-INF, 
9223372036854775806]

239 GORI    compute op 1 (_1) at a2_10 = -_1;
    GORI  LHS =int64 [-INF, 9223372036854775806]
    GORI  Computes _1 = long long int [-INF, 
-INF][-9223372036854775806, +INF] intersect Known range : long long int 
VARYING
    GORI    TRUE : (239) produces  (_1) long long int [-INF, 
-INF][-9223372036854775806, +INF]

240 GORI    compute op 1 (arg2_9(D)) at _1 = ABS_EXPR ;
    GORI  LHS =long long int [-INF, -INF][-9223372036854775806, 
+INF]
    GORI  Computes arg2_9(D) = int64 [-9223372036854775807, 
+INF] intersect Known range : int64 VARYING
    GORI    TRUE : (240) produces  (arg2_9(D)) int64 
[-9223372036854775807, +INF]
    GORI  TRUE : (237) outgoing_edge (arg2_9(D)) int64 
[-9223372036854775807, +INF]


Which shows the range can never be -9223372036854775808 (thats 0x800 
or MIN_INT) .


Note the result of request  239 shows that _1 on this edge is calculated 
as [-INF, -INF][0xFFFE, +INF], and when solving the ABS_EXPR:

   [-INF, -INF][0XFFFE, +INF] = ABS_EXPR 

Range-ops was solving that as-9223372036854775807, +INF] ( AKA 
[0x, 0x7FFF])...  losing the [-INF, -INF] possibility.   
which pointed to the bug in op1_range for ABS_EXPR.


Im sure there will be more tweaking to this, but its a start.

Anyway, Bootstrapped on x86_64-pc-linux-gnu  with no regressions. Pushed.

Andrew

>From 4759e1e0453bef163d8dbeebbb96dc40b049c117 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Thu, 12 Aug 2021 12:29:48 -0400
Subject: [PATCH 3/3] Add GORI tracing faciltiies.

Debugging range-ops and gori unwinding needed some help.

	* gimple-range-gori.cc (gori_compute::gori_compute): Enable tracing.
	(gori_compute::compute_operand_range): Add tracing.
	(gori_compute::logical_combine): Ditto.
	(gori_compute::compute_logical_operands): Ditto.
	(gori_compute::compute_operand1_range): Ditto.
	(gori_compute::compute_operand2_range): Ditto.
	(gori_compute::outgoing_edge_range_p): Ditto.
	* gimple-range-gori.h (class gori_compute): Add range_tracer.
---
 gcc/gimple-range-gori.cc | 172 +--
 gcc/gimple-range-gori.h  |   1 +
 2 files changed, 149 insertions(+), 24 deletions(-)

diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index c124b3c1ce4..f78829595dc 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -634,11 +634,13 @@ debug (gori_map &g)
 
 // Construct a gori_compute object.
 
-gori_compute::gori_compute ()
+gori_compute::gori_compute () : tracer ("GORI ")
 {
   // Create a boolean_type true and false range.
   m_bool_zero = int_range<2> (boolean_false_node, boolean_false_node);
   m_bool_one = int_range<2> (boolean_true_node, boolean_true_node);
+  if (dump_file && (param_evrp_mode & EVRP_MODE_GORI))
+tracer.enable_trace ();
 }
 
 // Given the switch S, return an evaluation in R for NAME when the lhs
@@ -712,29 +714,43 @@ gori_compute::compute_operand_range (irange &r, gimple *stmt,
   if (!op1_in_chain && !op2_in_chain)
 return false;
 
+  bool res;
   // Process logicals as they have special handling.
   if (is_gimple_logical_p (stmt))
 {
+  unsigned idx;
+  if ((idx = tracer.header ("compute_operand ")))
+	{
+	  print_generic_expr (dump_file, name, TDF_SLIM);
+	  fprintf (dump_file, " with LHS = ");
+	  lhs.dump (dump_file);
+	  fprintf (dump_file, " at stmt ");
+	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+	}
+
   int_range_max op1_trange, op1_frange;
   int_range_max op2_trange, op2_frange;
   compute_logical_operands (op1_trange, op1_frange, stmt, lhs,
 name, src, op1, op1_in_chain);
   compute_logical_operands (op2_trange, op2_frange, stmt, lhs,
 name, src, op2, op2_in_chain);
-  return logical_combine (r, gimple_expr_code (stmt), lhs,
-			  op1_trange, op1_frange, op2_trange, op2_frange);
+  res = logical_combine (r, gimple_expr_code (stmt), lhs,
+			 op1_trange, op1_frange, op2_trange, op2_frange);
+  if (idx)
+	tracer.trailer (idx, "compute_operand", res, name, r);
 }
-
   // Follow the appropriate operands now.

[COMMITTED 2/3] Change evrp-mode options.

2021-08-17 Thread Andrew MacLeod via Gcc-patches


This patch alters the options for --param=evrp-mode=.

It removes the option of tracing when in hybrid mode, and adds some 
extra discrimination.


legacy/ranger/legacy-first/ranger-first are unchanged.  the default is 
still 'ranger'


The modifications are:

trace    : enable range tracing in ranger (this is the original trace)
gori : enable gori tracing (enabled in the final patch)
cache    : separate out cache debugging.. in theory this is what 'debug' 
was before

tracegori: trace and gori enabled
debug    : just trace EVERYTHING! :-)

Bootstrapped on x86_64-pc-linux-gnu  with no regressions. Pushed.

Andrew

>From 0bb74a28e1318cbac9c895f1079b384a42513a9c Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Thu, 12 Aug 2021 14:02:20 -0400
Subject: [PATCH 2/3] Change evrp-mode options.

Remove tracing in hybrid mode. Add trace/gori/cache tracing options.
tracing options are now  'trace', 'gori', 'cache', or all combined in 'debug'

	* flag-types.h (enum evrp_mode): Adjust evrp-mode values.
	* gimple-range-cache.cc (DEBUG_RANGE_CACHE): Relocate from.
	* gimple-range-trace.h (DEBUG_RANGE_CACHE): Here.
	* params.opt (--param=evrp-mode): Adjust options.
---
 gcc/flag-types.h  | 11 ++-
 gcc/gimple-range-cache.cc |  3 +++
 gcc/gimple-range-trace.h  |  3 ---
 gcc/params.opt| 11 +++
 4 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index e43d1de490d..4fb1cb4743d 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -444,14 +444,15 @@ enum parloops_schedule_type
 /* EVRP mode.  */
 enum evrp_mode
 {
-  EVRP_MODE_EVRP_FIRST = 0,
+  EVRP_MODE_RVRP_ONLY = 0,
   EVRP_MODE_EVRP_ONLY = 1,
-  EVRP_MODE_RVRP_ONLY = 2,
+  EVRP_MODE_EVRP_FIRST = 2,
   EVRP_MODE_RVRP_FIRST = 3,
   EVRP_MODE_TRACE = 4,
-  EVRP_MODE_DEBUG = 8 | EVRP_MODE_TRACE,
-  EVRP_MODE_RVRP_TRACE = EVRP_MODE_RVRP_ONLY | EVRP_MODE_TRACE,
-  EVRP_MODE_RVRP_DEBUG = EVRP_MODE_RVRP_ONLY | EVRP_MODE_DEBUG
+  EVRP_MODE_CACHE = (8 | EVRP_MODE_TRACE),
+  EVRP_MODE_GORI = 16,
+  EVRP_MODE_TRACE_GORI = (EVRP_MODE_TRACE | EVRP_MODE_GORI),
+  EVRP_MODE_DEBUG = (EVRP_MODE_GORI | EVRP_MODE_CACHE)
 };
 
 /* Modes of OpenACC 'kernels' constructs handling.  */
diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index 91541f12c3c..4138d0556c6 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -30,6 +30,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-range.h"
 #include "tree-cfg.h"
 
+#define DEBUG_RANGE_CACHE (dump_file && (param_evrp_mode & EVRP_MODE_CACHE) \
+	 == EVRP_MODE_CACHE)
+
 // During contructor, allocate the vector of ssa_names.
 
 non_null_ref::non_null_ref ()
diff --git a/gcc/gimple-range-trace.h b/gcc/gimple-range-trace.h
index 6f89fcccf4f..d2d1a8b270c 100644
--- a/gcc/gimple-range-trace.h
+++ b/gcc/gimple-range-trace.h
@@ -58,7 +58,4 @@ range_tracer::header (const char *str)
 return do_header (str);
   return 0;
 }
-
-#define DEBUG_RANGE_CACHE (dump_file && (param_evrp_mode & EVRP_MODE_DEBUG))
-
 #endif // GCC_GIMPLE_RANGE_TRACE_H
diff --git a/gcc/params.opt b/gcc/params.opt
index 92b003e38cb..f9264887b40 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -132,7 +132,7 @@ Maximum number of basic blocks before EVRP uses a sparse cache.
 
 -param=evrp-mode=
 Common Joined Var(param_evrp_mode) Enum(evrp_mode) Init(EVRP_MODE_RVRP_ONLY) Param Optimization
---param=evrp-mode=[legacy|ranger|legacy-first|ranger-first|ranger-trace|ranger-debug|trace|debug] Specifies the mode Early VRP should operate in.
+--param=evrp-mode=[legacy|ranger|legacy-first|ranger-first|trace|gori|cache|tracegori|debug] Specifies the mode Early VRP should operate in.
 
 Enum
 Name(evrp_mode) Type(enum evrp_mode) UnknownError(unknown evrp mode %qs)
@@ -150,13 +150,16 @@ EnumValue
 Enum(evrp_mode) String(ranger-first) Value(EVRP_MODE_RVRP_FIRST)
 
 EnumValue
-Enum(evrp_mode) String(ranger-trace) Value(EVRP_MODE_RVRP_TRACE)
+Enum(evrp_mode) String(trace) Value(EVRP_MODE_TRACE)
 
 EnumValue
-Enum(evrp_mode) String(ranger-debug) Value(EVRP_MODE_RVRP_DEBUG)
+Enum(evrp_mode) String(cache) Value(EVRP_MODE_CACHE)
 
 EnumValue
-Enum(evrp_mode) String(trace) Value(EVRP_MODE_TRACE)
+Enum(evrp_mode) String(gori) Value(EVRP_MODE_GORI)
+
+EnumValue
+Enum(evrp_mode) String(tracegori) Value(EVRP_MODE_TRACE_GORI)
 
 EnumValue
 Enum(evrp_mode) String(debug) Value(EVRP_MODE_DEBUG)
-- 
2.17.2

[COMMITTED 1/3] Abstract range tracing routines into a class.

2021-08-17 Thread Andrew MacLeod via Gcc-patches

I originally implemented range tracing as a derived class so I wouldn't 
mess-up the basic range routines in ranger.  Having tracing enabled this 
way had its advantages, but also had some disadvantages, such as 
requiring a different class to be instantiated when we want to turn on 
tracing.


Regardless,there is an ongoing need to be able to debug range-ops and 
GORI. It seems that the tracing mechanism can be utilized there as well, 
so this patch abstracts the tracing routines into a class and 
re-implements range tracing in ranger using it.


If you have never looked at a ranger trace, it looks something like this 
(an early part of a run where the backedge hasn't been resolved fully yet) :


42 range_of_stmt (j_32) at stmt j_32 = j_14 + 1;
43   range_of_expr(j_14) at stmt j_32 = j_14 + 1;
44 range_on_entry (j_14) to BB 8
45   range_of_stmt (j_14) at stmt j_14 = PHI <0(19), 
j_32(8)>

 TRUE : (45)  cached (j_14) int VARYING
   TRUE : (44) range_on_entry (j_14) int [-INF, 31]
 TRUE : (43) range_of_expr (j_14) int [-INF, 31]
 Registering value_relation (j_32 > j_14) (bb8) at j_32 = j_14 + 1;
   TRUE : (42) range_of_stmt (j_32) int [-2147483647, 32]

It follows all the various range query calls and shows ranges as they 
are requested/calculated.


Request 42 is asking for the range of j_32 on it's defining statement.
That is followed by request 43 which asks for the range of j_14 on that 
statement, and the series of requests that go off and find that value.
Eventually we see the TRUE returned for request 43 and the range of j_14 
was determined to be int [-INF, 31].
When that is applied to request 42, we see true returned and [-INF, 31] 
+ 1 is calculated as int [-2147483647, 32]


This allows us to trace the range calculations based on each request, 
and see where something has gone wrong.


Furthermore, the trace index is now static, so the index is unique 
across the compilation unit, and a 'breakpoint' routine has been added 
to the range_tracer class which allows one to easily set a breakpoint on 
a specific index.  so within gdb,

   b range_tracer::breakpoint if index == 43
Will cause the debugger to stop when we are beginning to process request 
43... making it much easier to look around when something is wrong.


Bootstrapped on x86_64-pc-linux-gnu  with no regressions. Pushed.

Andrew





>From e68c8280fa2e1b7071378cfdd876155c73ec944f Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Fri, 30 Jul 2021 15:15:29 -0400
Subject: [PATCH 1/3] Abstract tracing routines into a class.

Generalize range tracing into a class and integrae it with gimple_ranger.
Remove the old derived trace_ranger class.

	* Makefile.in (OBJS): Add gimple-range-trace.o.
	* gimple-range-cache.h (enable_new_values): Remove unused prototype.
	* gimple-range-fold.cc: Adjust headers.
	* gimple-range-trace.cc: New.
	* gimple-range-trace.h: New.
	* gimple-range.cc (gimple_ranger::gimple_ranger): Enable tracer.
	(gimple_ranger::range_of_expr): Add tracing.
	(gimple_ranger::range_on_entry): Ditto.
	(gimple_ranger::range_on_exit): Ditto.
	(gimple_ranger::range_on_edge): Ditto.
	(gimple_ranger::fold_range_internal): Ditto.
	(gimple_ranger::dump_bb): Do not calculate edge range twice.
	(trace_ranger::*): Remove.
	(enable_ranger): Never create a trace_ranger.
	(debug_seed_ranger): Move to gimple-range-trace.cc.
	(dump_ranger): Ditto.
	(debug_ranger): Ditto.
	* gimple-range.h: Include gimple-range-trace.h.
	(range_on_entry, range_on_exit): No longer virtual.
	(class trace_ranger): Remove.
	(DEBUG_RANGE_CACHE): Move to gimple-range-trace.h.
---
 gcc/Makefile.in   |   1 +
 gcc/gimple-range-cache.h  |   1 -
 gcc/gimple-range-fold.cc  |   4 +-
 gcc/gimple-range-trace.cc | 206 
 gcc/gimple-range-trace.h  |  64 +++
 gcc/gimple-range.cc   | 393 ++
 gcc/gimple-range.h|  34 +---
 7 files changed, 377 insertions(+), 326 deletions(-)
 create mode 100644 gcc/gimple-range-trace.cc
 create mode 100644 gcc/gimple-range-trace.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 6653e9e2142..9714fcaac37 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1406,6 +1406,7 @@ OBJS = \
 	gimple-range-edge.o \
 	gimple-range-fold.o \
 	gimple-range-gori.o \
+	gimple-range-trace.o \
 	gimple-ssa-backprop.o \
 	gimple-ssa-evrp.o \
 	gimple-ssa-evrp-analyze.o \
diff --git a/gcc/gimple-range-cache.h b/gcc/gimple-range-cache.h
index 1e77c9bf3a9..3b55673fd29 100644
--- a/gcc/gimple-range-cache.h
+++ b/gcc/gimple-range-cache.h
@@ -103,7 +103,6 @@ public:
   bool get_non_stale_global_range (irange &r, tree name);
   void set_global_range (tree name, const irange &r);
 
-  bool enable_new_values (bool state);
   non_null_ref m_non_null;
   gori_compute m_gori;
 
diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc
index

Re: [PATCH] more warning code refactoring

2021-08-17 Thread Martin Sebor via Gcc-patches


On 8/17/21 2:51 AM, Richard Biener wrote:

On Tue, Aug 17, 2021 at 3:52 AM Martin Sebor via Gcc-patches
 wrote:


The attached patch continues with the move of warning code from
builtins.c and calls.c into a more suitable home.  As before, it
is mostly free of functional changes.  The one exception is that
as pleasant a side-effect, moving the attribute access checking
from initialize_argument_information() in calls.c to the new
warning pass also happens to fix PR 101854.  This is thanks to
the latter iterating over function arguments explicitly provided
in the program and not having to worry about skipping over
the additional pointer argument synthesized for calls to functions
that return a large struct by value that the former function sneaks
into the argument list.

Tested on x86_64-linux.


OK.


Jit testing exposed a bug due to an uninitialized variable (oddly
tests for no other front end did so).  I fixed it and (after
retesting the result) committed r12-2976.

Thanks
Martin

Re: [PATCH] Fix incorrect computation in fill_always_executed_in_1

2021-08-17 Thread Segher Boessenkool

Hi!

As an aside...

On Mon, Aug 16, 2021 at 03:46:12AM -0500, Xiong Hu Luo wrote:
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-19.c

> --- a/gcc/tree-ssa-loop-im.c
> +++ b/gcc/tree-ssa-loop-im.c

You can make a saner order for your diffs by putting the testsuite
changes after the rest.  A very mimimal example would use

[diff]
orderfile = .gitorder

with that file containing something like

gcc/*.*

and nothing else even.  Yeah this is minimal ;-)

Segher

Re: [PATCH] configure, jit: Allow for 'make check-gcc-jit'.

2021-08-17 Thread David Malcolm via Gcc-patches

On Tue, 2021-08-17 at 19:59 +0100, Iain Sandoe wrote:
> Hi,
> 
> For those of us who habitually build Ada, it’s convenient to 
> have a way of running individual test suites without invoking
> the acats tests…
> 
> being able to do “make check-gcc-jit” from the top level is very
> useful when debugging jit testsuite issues.
> 
> one can do "cd gcc ; make check-jit "- but this doesn’t seem 100%
> identical since the invocations from the top level set the host
> exports first.
> 
> … the patch itself is trivial / obvious - I am just curious as to
> whether there was a reason for omitting it so far?

Probably just a mistake on my part; Makefile glue is not my strongest
skill.

> 
> If not, 
> 
> OK for master?

Sounds OK to me - but then again, Makefile glue is not my strongest
skill, so not sure if I'm qualified to approve this.

> 
> thanks
> Iain
> 
> 
> 
> 
> This is a convenience feature that allows the user to
> do "make check-gcc-jit" at the top level of the build
> to check that facility in isolation from others.
> 
> Signed-off-by: Iain Sandoe 
> 
> ChangeLog:
> 
> * Makefile.def: Add a jit check target for the jit
> language.
> * Makefile.in: Regenerate.
> ---
>  Makefile.def | 1 +
>  Makefile.in  | 8 
>  2 files changed, 9 insertions(+)
> 
> diff --git a/Makefile.def b/Makefile.def
> index fbfdb6fee08..7cbeca5b181 100644
> --- a/Makefile.def
> +++ b/Makefile.def
> @@ -654,6 +654,7 @@ languages = { language=go;  gcc-check-
> target=check-go;
> lib-check-target=check-gotools; };
>  languages = { language=d;  gcc-check-target=check-d;
> lib-check-target=check-target-
> libphobos; };
> +languages = { language=jit;gcc-check-target=check-jit; };
>  
>  // Toplevel bootstrap
>  bootstrap_stage = { id=1 ; };
>

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-17 Thread Qing Zhao via Gcc-patches



> On Aug 17, 2021, at 10:04 AM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> 
> 
>> On Aug 16, 2021, at 11:48 AM, Qing Zhao via Gcc-patches 
>>  wrote:
>> 
 From the above IR file after “FRE”, we can see that the major issue with 
 this IR is:
 
 The address taken auto variable “alt_reloc” has been completely replaced 
 by the temporary variable “_1” in all
 the uses of the original “alt_reloc”. 
>>> 
>>> Well, this can happen with regular code as well, there's no need for
>>> .DEFERRED_INIT.  This is the usual problem with reporting uninitialized
>>> uses late.
>>> 
>>> IMHO this shouldn't be a blocker.  The goal of zero "regressions" wrt
>>> -Wuninitialized isn't really achievable.
>> 
>> Okay. Sounds reasonable to me too.
>> 
>>> 
 The major problem with such IR is,  during uninitialized analysis phase, 
 the original use of “alt_reloc” disappeared completely.
 So, the warning cannot be reported.
 
 
 My questions:
 
 1. Is it possible to get the original “alt_reloc” through the temporary 
 variable “_1” with some available information recorded in the IR?
 2. If not, then we have to record the relationship between “alt_reloc” and 
 “_1” when the original “alt_reloc” is replaced by “_1” and get such 
 relationship during
  Uninitialized analysis phase.  Is this doable?
>>> 
>>> Well, you could add a fake argument to .DEFERRED_INIT for the purpose of
>>> diagnostics.  The difficulty is to avoid tracking it as actual use so
>>> you could for example pass a string with the declarations name though
>>> this wouldn't give the association with the actual decl.
>> Good suggestion, I can try this a little bit. 
> 
> I tried this yesterday, added the 4th argument to .DEFERRED_INIT as:
> 
>1st argument: SIZE of the DECL;
>2nd argument: INIT_TYPE;
>3rd argument: IS_VLA, 0 NO, 1 YES;
> +   4th argument: The NAME for the DECL;
> 
> -   as LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA)
> +   as LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA, NAME)
> 
> +  tree name_node
> += build_string_literal (IDENTIFIER_LENGTH (DECL_NAME (decl)),
> +   IDENTIFIER_POINTER (DECL_NAME (decl)));
> 
>   tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, 
> IFN_DEFERRED_INIT,
> -   TREE_TYPE (decl), 3,
> +   TREE_TYPE (decl), 4,
>decl_size, init_type_node,
> -   is_vla_node);
> +   is_vla_node, name_node);
> 
> 
> And got the following IR in .uninit1 dump:
> 
> 
> ….
> 
>  _1 = .DEFERRED_INIT (4, 2, 0, &"alt_reloc"[0]);
>  if (_1 != 0)
> ….
> 
> 
> My questions:
> 
> 1. Is “build_string_literal” the correct utility routine to use for this new 
> argument? 
> 2. Will Such string literal nodes have potential other impact?

I tried to get the 4th argument from the call to .DEFERED_INIT during 
uninitialized variable analysis in tree-ssa-uninit.c:

@@ -197,18 +197,25 @@ warn_uninit (enum opt_code wc, tree t, tree expr, tree 
var,
  the COMPLEX_EXPRs real part in that case.  See PR71581.  */
   if (expr == NULL_TREE
   && var == NULL_TREE
-  && SSA_NAME_VAR (t) == NULL_TREE
-  && is_gimple_assign (SSA_NAME_DEF_STMT (t))
-  && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (t)) == COMPLEX_EXPR)
-{
-  tree v = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (t));
-  if (TREE_CODE (v) == SSA_NAME
- && has_undefined_value_p (v)
- && zerop (gimple_assign_rhs2 (SSA_NAME_DEF_STMT (t
+  && SSA_NAME_VAR (t) == NULL_TREE)
+{
+  if (is_gimple_assign (SSA_NAME_DEF_STMT (t))
+ && (gimple_assign_rhs_code (SSA_NAME_DEF_STMT (t)) == COMPLEX_EXPR))
{
- expr = SSA_NAME_VAR (v);
- var = expr;
+ tree v = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (t));
+ if (TREE_CODE (v) == SSA_NAME
+ && has_undefined_value_p (v)
+ && zerop (gimple_assign_rhs2 (SSA_NAME_DEF_STMT (t
+   {
+ expr = SSA_NAME_VAR (v);
+ var = expr;
+   }
}
+  else if (gimple_call_internal_p (SSA_NAME_DEF_STMT (t), 
IFN_DEFERRED_INIT))
+  {
+   expr = gimple_call_arg (SSA_NAME_DEF_STMT (t), 3);
+   var = expr;
+  }
 }

However, this 4th argument is not a regular variable, it’s just an ADDR_EXPR 
that includes the constant string for the name of 
the deleted variable. 
If we’d like to report the warning based on this ADDR_EXPR, a complete new code 
to report the warnings other than the current one that based on 
“Variables” need to be added, this might make the code very ugly. 

My questions:

1. Is there better way to do this?
1. As you mentioned before, it’s very unrealistic to meet the goal of “zero 
regression” for -Wuninitialized, can we leave this part of work in

[PATCH v2] libstdc++: improve documentation for bits/stl_function.h [PR51539]

2021-08-17 Thread Krzysztof Żelechowski

PR  libstdc++/PR51539

ChangeLog
* libstdc++-v3/include/bits/stl_function.h: Improve documentation.

diff --git a/libstdc++-v3/include/bits/stl_function.h b/libstdc++-v3/include/
bits/stl_function.h
index 073018d522d..a0b84f93d18 100644
--- a/libstdc++-v3/include/bits/stl_function.h
+++ b/libstdc++-v3/include/bits/stl_function.h
@@ -112,7 +112,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 };
 
   /**
-   *  This is one of the @link functors functor base classes@endlink.
+   *  a base class for @link functors functors@endlink taking 2 parameters
*/
   template
 struct binary_function
@@ -162,60 +162,66 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct negate;
 #endif
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
addition
   template
 struct plus : public binary_function<_Tp, _Tp, _Tp>
 {
+/// Returns the sum (`operator+`) of two parameters.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x, const _Tp& __y) const
   { return __x + __y; }
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
subtraction
   template
 struct minus : public binary_function<_Tp, _Tp, _Tp>
 {
+/// Returns the difference (`operator-`) between parameter 2 and 
parameter 1.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x, const _Tp& __y) const
   { return __x - __y; }
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
multiplication
   template
 struct multiplies : public binary_function<_Tp, _Tp, _Tp>
 {
+/// Returns the product (`operator*`) of two parameters.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x, const _Tp& __y) const
   { return __x * __y; }
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
division
   template
 struct divides : public binary_function<_Tp, _Tp, _Tp>
 {
+/// Returns the quotient (`operator/`) of filling parameter 2 with 
parameter 1.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x, const _Tp& __y) const
   { return __x / __y; }
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
remainder
   template
 struct modulus : public binary_function<_Tp, _Tp, _Tp>
 {
+/// Returns the remainder (`operator%`) after filling parameter 2 
with parameter 1.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x, const _Tp& __y) const
   { return __x % __y; }
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
reflexion
   template
 struct negate : public unary_function<_Tp, _Tp>
 {
+/// Returns the opposite value (`operator-`) to the parameter.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x) const
@@ -225,10 +231,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #if __cplusplus > 201103L
 
 #define __cpp_lib_transparent_operators 201510
-
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
generic addition
+  /// @since C++11
   template<>
 struct plus
 {
+/// Returns the sum (`operator+`) of two parameters.
   template 
_GLIBCXX14_CONSTEXPR
auto
@@ -240,10 +248,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typedef __is_transparent is_transparent;
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
generic subtraction
+  /// @since C++11
   template<>
 struct minus
 {
+   /// Returns the difference (`operator-`) between parameter 2 and 
parameter 1.
   template 
_GLIBCXX14_CONSTEXPR
auto
@@ -255,10 +265,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typedef __is_transparent is_transparent;
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
generic multiplication
+  /// @since C++11
   template<>
 struct multiplies
 {
+/// Returns the product (`operator*`) of two parameters.
   template 
_GLIBCXX14_CONSTEXPR
auto
@@ -270,10 +282,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typedef __is_transparent is_transparent;
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
generic division
+  /// @since C++11
   template<>
 struct divides
 {
+

[committed] wwwdocs: Add missing punctuation to elements

2021-08-17 Thread Jonathan Wakely via Gcc-patches

Pushed to wwwdocs as obvious.


commit 44d97225cc39f2cfbc3109c6a6473bde3886357a
Author: Jonathan Wakely 
Date:   Tue Aug 17 21:30:28 2021 +0100

Add missing punctuation to  elements

diff --git a/htdocs/bugs/index.html b/htdocs/bugs/index.html
index 0a1b582a..99a1ddb1 100644
--- a/htdocs/bugs/index.html
+++ b/htdocs/bugs/index.html
@@ -102,11 +102,11 @@ three of which can be obtained from the output of 
gcc -v:
   An error that occurs only some of the times a certain file is
   compiled, such that retrying a sufficient number of times results in
   a successful compilation; this is a symptom of a hardware problem,
-  not of a compiler bug (sorry)
+  not of a compiler bug (sorry).
 
   Assembly files (*.s) produced by the compiler, or any
   binary files, such as object files, executables, core files, or
-  precompiled header files
+  precompiled header files.
 
   Duplicate bug reports, or reports of bugs already fixed in the
   development tree, especially those that have already been reported
@@ -114,14 +114,14 @@ three of which can be obtained from the output of 
gcc -v:
 
   Bugs in the assembler, the linker or the C library.  These are
   separate projects, with separate mailing lists and different bug
-  reporting procedures
+  reporting procedures.
 
   Bugs in releases or snapshots of GCC not issued by the GNU
-  Project.  Report them to whoever provided you with the release
+  Project.  Report them to whoever provided you with the release.
 
   Questions about the correctness or the expected behavior of
   certain constructs that are not GCC extensions.  Ask them in forums
-  dedicated to the discussion of the programming language
+  dedicated to the discussion of the programming language.
 
 
 Where to post it

[PATCH] libstdc++: improve documentation for bits/stl_function.h [PR51539]

2021-08-17 Thread Krzysztof Żelechowski

PR  libstdc++/PR51539

ChangeLog
* libstdc++-v3/include/bits/stl_function.h: Improve documentation.

diff --git a/libstdc++-v3/include/bits/stl_function.h b/libstdc++-v3/include/
bits/stl_function.h
index 073018d522d..a0b84f93d18 100644
--- a/libstdc++-v3/include/bits/stl_function.h
+++ b/libstdc++-v3/include/bits/stl_function.h
@@ -112,7 +112,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 };
 
   /**
-   *  This is one of the @link functors functor base classes@endlink.
+   *  a base class for @link functors functors@endlink taking 2 parameters
*/
   template
 struct binary_function
@@ -162,60 +162,66 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct negate;
 #endif
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
addition
   template
 struct plus : public binary_function<_Tp, _Tp, _Tp>
 {
+/// Returns the sum (@c operator+) of two parameters.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x, const _Tp& __y) const
   { return __x + __y; }
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
subtraction
   template
 struct minus : public binary_function<_Tp, _Tp, _Tp>
 {
+/// Returns the difference (@c operator-) between parameter 2 and 
parameter 1.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x, const _Tp& __y) const
   { return __x - __y; }
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
multiplication
   template
 struct multiplies : public binary_function<_Tp, _Tp, _Tp>
 {
+/// Returns the product (@c operator*) of two parameters.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x, const _Tp& __y) const
   { return __x * __y; }
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
division
   template
 struct divides : public binary_function<_Tp, _Tp, _Tp>
 {
+/// Returns the quotient (@c operator/) of filling parameter 2 with 
parameter 1.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x, const _Tp& __y) const
   { return __x / __y; }
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
remainder
   template
 struct modulus : public binary_function<_Tp, _Tp, _Tp>
 {
+/// Returns the remainder (@c operator%) after filling parameter 2 
with parameter 1.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x, const _Tp& __y) const
   { return __x % __y; }
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
reflexion
   template
 struct negate : public unary_function<_Tp, _Tp>
 {
+/// Returns the opposite value (@c operator-) to the parameter.
   _GLIBCXX14_CONSTEXPR
   _Tp
   operator()(const _Tp& __x) const
@@ -225,10 +231,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #if __cplusplus > 201103L
 
 #define __cpp_lib_transparent_operators 201510
-
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
generic addition
+  /// @since C++11
   template<>
 struct plus
 {
+/// Returns the sum (@c operator+) of two parameters.
   template 
_GLIBCXX14_CONSTEXPR
auto
@@ -240,10 +248,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typedef __is_transparent is_transparent;
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
generic subtraction
+  /// @since C++11
   template<>
 struct minus
 {
+   /// Returns the difference (@c operator-) between parameter 2 and 
parameter 1.
   template 
_GLIBCXX14_CONSTEXPR
auto
@@ -255,10 +265,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typedef __is_transparent is_transparent;
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
generic multiplication
+  /// @since C++11
   template<>
 struct multiplies
 {
+/// Returns the product (@c operator*) of two parameters.
   template 
_GLIBCXX14_CONSTEXPR
auto
@@ -270,10 +282,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typedef __is_transparent is_transparent;
 };
 
-  /// One of the @link arithmetic_functors math functors@endlink.
+  /// An @link arithmetic_functors arithmetic functor@endlink representing 
generic division
+  /// @since C++11
   template<>
 struct divides

Re: Better memory statistics, take 2

2021-08-17 Thread Thomas Schwinge

Hi!

On 2021-08-17T09:27:46-0400, David Malcolm via Gcc-patches 
 wrote:
> On Tue, 2021-08-17 at 11:17 +0200, Thomas Schwinge wrote:
>> "Turn
>> global 'ggc_force_collect' variable into 'force_collect' parameter to
>> 'ggc_collect'"

> Looks good to me, but bool params can be unclear - maybe introduce an
> enum to make the meaning more explicit to the reader of the code?

I actually had contemplated that, but then went for the simpler 'bool'
variant...  ;-) But yes, it's a good suggestion, thanks.  OK to push the
attached "Turn 'bool force_collect' parameter to 'ggc_collect' into an
'enum ggc_collect mode'"?


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 73e4b9869e2cc515ee3393bffa220e775bbbcd45 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 17 Aug 2021 21:15:46 +0200
Subject: [PATCH] Turn 'bool force_collect' parameter to 'ggc_collect' into an
 'enum ggc_collect mode'

... to make the meaning more explicit to the reader of the code.

Follow-up to recent commit 0edf2e81bb02cba43b649b3f6e7258b68a779ac0
"Turn global 'ggc_force_collect' variable into 'force_collect' parameter to
'ggc_collect'".

	gcc/
	* ggc.h (enum ggc_collect): New.
	(ggc_collect): Use it.
	* ggc-page.c: Adjust.
	* ggc-common.c: Likewise.
	* ggc-tests.c: Likewise.
	* read-rtl-function.c: Likewise.
	* selftest-run-tests.c: Likewise.
	* doc/gty.texi (Invoking the garbage collector): Likewise.

Suggested-by: David Malcolm 
---
 gcc/doc/gty.texi |  6 +++---
 gcc/ggc-common.c |  2 +-
 gcc/ggc-page.c   |  5 +++--
 gcc/ggc-tests.c  | 18 +-
 gcc/ggc.h| 10 ++
 gcc/read-rtl-function.c  |  2 +-
 gcc/selftest-run-tests.c |  2 +-
 7 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/gcc/doc/gty.texi b/gcc/doc/gty.texi
index b667d1d19ba..2ad7793191b 100644
--- a/gcc/doc/gty.texi
+++ b/gcc/doc/gty.texi
@@ -655,9 +655,9 @@ with many other garbage collectors, it is not implicitly invoked by
 allocation routines when a lot of memory has been consumed. So the
 only way to have GGC reclaim storage is to call the @code{ggc_collect}
 function explicitly.
-When the @var{force_collect} parameter is set or otherwise an internal
-heuristic decides whether to actually collect, this call is
-potentially an expensive operation, as it may
+With @var{mode} @code{GGC_COLLECT_FORCE} or otherwise (default
+@code{GGC_COLLECT_HEURISTIC}) when the internal heuristic decides to
+collect, this call is potentially an expensive operation, as it may
 have to scan the entire heap.  Beware that local variables (on the GCC
 call stack) are not followed by such an invocation (as many other
 garbage collectors do): you should reference all your data from static
diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c
index f38e4d5020d..32ba5be42b2 100644
--- a/gcc/ggc-common.c
+++ b/gcc/ggc-common.c
@@ -962,7 +962,7 @@ dump_ggc_loc_statistics ()
   if (! GATHER_STATISTICS)
 return;
 
-  ggc_collect (true);
+  ggc_collect (GGC_COLLECT_FORCE);
 
   ggc_mem_desc.dump (GGC_ORIGIN);
 }
diff --git a/gcc/ggc-page.c b/gcc/ggc-page.c
index a6fbecaa1d8..1c49643e7e7 100644
--- a/gcc/ggc-page.c
+++ b/gcc/ggc-page.c
@@ -2184,7 +2184,7 @@ validate_free_objects (void)
 /* Top level mark-and-sweep routine.  */
 
 void
-ggc_collect (bool force_collect)
+ggc_collect (enum ggc_collect mode)
 {
   /* Avoid frequent unnecessary work by skipping collection if the
  total allocations haven't expanded much since the last
@@ -2196,7 +2196,8 @@ ggc_collect (bool force_collect)
   memory_block_pool::trim ();
 
   float min_expand = allocated_last_gc * param_ggc_min_expand / 100;
-  if (G.allocated < allocated_last_gc + min_expand && !force_collect)
+  if (mode == GGC_COLLECT_HEURISTIC
+  && G.allocated < allocated_last_gc + min_expand)
 return;
 
   timevar_push (TV_GC);
diff --git a/gcc/ggc-tests.c b/gcc/ggc-tests.c
index 2891c20ceac..e83f7019863 100644
--- a/gcc/ggc-tests.c
+++ b/gcc/ggc-tests.c
@@ -47,7 +47,7 @@ test_basic_struct ()
   root_test_struct = ggc_cleared_alloc  ();
   root_test_struct->other = ggc_cleared_alloc  ();
 
-  ggc_collect (true);
+  ggc_collect (GGC_COLLECT_FORCE);
 
   ASSERT_TRUE (ggc_marked_p (root_test_struct));
   ASSERT_TRUE (ggc_marked_p (root_test_struct->other));
@@ -77,7 +77,7 @@ test_length ()
   for (int i = 0; i < count; i++)
 root_test_of_length->elem[i] = ggc_cleared_alloc  ();
 
-  ggc_collect (true);
+  ggc_collect (GGC_COLLECT_FORCE);
 
   ASSERT_TRUE (ggc_marked_p (root_test_of_length));
   for (int i = 0; i < count; i++)
@@ -151,7 +151,7 @@ test_union ()
   test_struct *referenced_by_other = ggc_cleared_alloc  ();
   other->m_ptr = referenced_by_other;
 
-  ggc_collect (true);
+  ggc_collect (GGC_COLLECT_FORCE);
 
   ASSERT_TRU

Re: [PATCH] Avoid illegal argument to verbose in dg-test callback

2021-08-17 Thread Jonathan Wakely via Gcc-patches

On Tue, 17 Aug 2021 at 20:15, Thomas Schwinge  wrote:
>
> Hi!
>
> On 2020-04-16T15:21:44+0200, Matthias Kretz  wrote:
> > If extra_tool_flags starts with a dash, an error like 'ERROR: verbose:
> > illegal argument: -march=native -O2 -std=c++17' is printed. This is
> > easily fixed by inserting a double dash before the variable.
>
> > --- a/libstdc++-v3/testsuite/lib/libstdc++.exp
> > +++ b/libstdc++-v3/testsuite/lib/libstdc++.exp
> > @@ -408,7 +408,7 @@ proc libstdc++-dg-test { prog do_what extra_tool_flags 
> > } {
> >  set options ""
> >  if { $extra_tool_flags != "" } {
> >   verbose -log "extra_tool_flags are:"
> > - verbose -log $extra_tool_flags
> > + verbose -log -- $extra_tool_flags
>
> I'm confirming the original problem, but on one system, it's not
> resolved by this change, because instead we get:
>
> extra_tool_flags are:
> ERROR: tcl error sourcing 
> [...]/libstdc++-v3/testsuite/libstdc++-dg/conformance.exp.
> ERROR: usage: send [args] string
> while executing
> "send_log "$message\n""
> (procedure "verbose" line 48)
> invoked from within
> "verbose -log -- $extra_tool_flags"
> (procedure "libstdc++-dg-test" line 45)
> invoked from within
> "${tool}-dg-test $prog [lindex ${dg-do-what} 0] "$tool_flags 
> ${dg-extra-tool-flags}""
> (procedure "saved-dg-test" line 115)
> invoked from within
> [...]
>
> That's Ubuntu's dejagnu 1.5-3ubuntu1 being so old that it doesn't include
> DejaGnu commit 57c22601afe43d2c2b8819df4f2ecacb034516fd "Protect from leading
> dash in message".  (I suppose that's what'd make this work, but have not
> verified.)
>
> Thus, as obvious, pushed "libstdc++: Avoid illegal argument to verbose in
> dg-test callback, continued" to master branch in commit
> 60b94d8bd2280837e980a53cf81bdf902d726e61, and cherry-picked into
> releases/gcc-11 branch in commit
> 112bbc8d1d81c1b6375ea3cfb083cdeb0a06ea3a, and into releases/gcc-10 branch
> in commit 6e64bbec7dead374628abe525b6f56e20f9507c3, see attached.

Thanks. This makes the logs "denser", as there isn't the extra line
break between each test. I think I can live with it ;-) but if anybody
objects we can add a \n into that string.

Re: [PATCH] Avoid illegal argument to verbose in dg-test callback

2021-08-17 Thread Thomas Schwinge

Hi!

On 2020-04-16T15:21:44+0200, Matthias Kretz  wrote:
> If extra_tool_flags starts with a dash, an error like 'ERROR: verbose:
> illegal argument: -march=native -O2 -std=c++17' is printed. This is
> easily fixed by inserting a double dash before the variable.

> --- a/libstdc++-v3/testsuite/lib/libstdc++.exp
> +++ b/libstdc++-v3/testsuite/lib/libstdc++.exp
> @@ -408,7 +408,7 @@ proc libstdc++-dg-test { prog do_what extra_tool_flags } {
>  set options ""
>  if { $extra_tool_flags != "" } {
>   verbose -log "extra_tool_flags are:"
> - verbose -log $extra_tool_flags
> + verbose -log -- $extra_tool_flags

I'm confirming the original problem, but on one system, it's not
resolved by this change, because instead we get:

extra_tool_flags are:
ERROR: tcl error sourcing 
[...]/libstdc++-v3/testsuite/libstdc++-dg/conformance.exp.
ERROR: usage: send [args] string
while executing
"send_log "$message\n""
(procedure "verbose" line 48)
invoked from within
"verbose -log -- $extra_tool_flags"
(procedure "libstdc++-dg-test" line 45)
invoked from within
"${tool}-dg-test $prog [lindex ${dg-do-what} 0] "$tool_flags 
${dg-extra-tool-flags}""
(procedure "saved-dg-test" line 115)
invoked from within
[...]

That's Ubuntu's dejagnu 1.5-3ubuntu1 being so old that it doesn't include
DejaGnu commit 57c22601afe43d2c2b8819df4f2ecacb034516fd "Protect from leading
dash in message".  (I suppose that's what'd make this work, but have not
verified.)

Thus, as obvious, pushed "libstdc++: Avoid illegal argument to verbose in
dg-test callback, continued" to master branch in commit
60b94d8bd2280837e980a53cf81bdf902d726e61, and cherry-picked into
releases/gcc-11 branch in commit
112bbc8d1d81c1b6375ea3cfb083cdeb0a06ea3a, and into releases/gcc-10 branch
in commit 6e64bbec7dead374628abe525b6f56e20f9507c3, see attached.

Grüße
 Thomas

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 60b94d8bd2280837e980a53cf81bdf902d726e61 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 17 Aug 2021 17:58:30 +0200
Subject: [PATCH] libstdc++: Avoid illegal argument to verbose in dg-test
 callback, continued

This is a follow-up to commit 697b94cfaef4a958132faf0cf4b35b15dfb29acc
"libstdc++: Avoid illegal argument to verbose in dg-test callback".
I'm confirming the original problem, but on one system, it's not
resolved by this change, because instead we get:

extra_tool_flags are:
ERROR: tcl error sourcing [...]/libstdc++-v3/testsuite/libstdc++-dg/conformance.exp.
ERROR: usage: send [args] string
while executing
"send_log "$message\n""
(procedure "verbose" line 48)
invoked from within
"verbose -log -- $extra_tool_flags"
(procedure "libstdc++-dg-test" line 45)
invoked from within
"${tool}-dg-test $prog [lindex ${dg-do-what} 0] "$tool_flags ${dg-extra-tool-flags}""
(procedure "saved-dg-test" line 115)
invoked from within
[...]

That's Ubuntu's dejagnu 1.5-3ubuntu1 being so old that it doesn't include
DejaGnu commit 57c22601afe43d2c2b8819df4f2ecacb034516fd "Protect from leading
dash in message".  (I suppose that's what'd make this work, but have not
verified.)

	libstdc++-v3/
	* testsuite/lib/libstdc++.exp: Avoid illegal argument to verbose,
	continued.
---
 libstdc++-v3/testsuite/lib/libstdc++.exp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libstdc++-v3/testsuite/lib/libstdc++.exp b/libstdc++-v3/testsuite/lib/libstdc++.exp
index 73b202ce212..35ccbe47c39 100644
--- a/libstdc++-v3/testsuite/lib/libstdc++.exp
+++ b/libstdc++-v3/testsuite/lib/libstdc++.exp
@@ -410,8 +410,7 @@ proc libstdc++-dg-test { prog do_what extra_tool_flags } {
 set select_compile "v3_target_compile"
 set options ""
 if { $extra_tool_flags != "" } {
-	verbose -log "extra_tool_flags are:"
-	verbose -log -- $extra_tool_flags
+	verbose -log "extra_tool_flags are: $extra_tool_flags"
 	if { [string first "-x c" $extra_tool_flags ] != -1 } {
 	verbose -log "compiling and executing as C, not C++"
 	set edit_tool_flags $extra_tool_flags
-- 
2.30.2

>From 112bbc8d1d81c1b6375ea3cfb083cdeb0a06ea3a Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 17 Aug 2021 17:58:30 +0200
Subject: [PATCH] libstdc++: Avoid illegal argument to verbose in dg-test
 callback, continued

This is a follow-up to commit 697b94cfaef4a958132faf0cf4b35b15dfb29acc
"libstdc++: Avoid illegal argument to verbose in dg-test callback".
I'm confirming the original problem, but on one system, it's not
resolved by this change, because instead we get:

extra_tool_flags are:
ERROR: tcl error sourcing [...]/libstdc++-v3/testsuite/libstdc++-dg/conformance.exp.
ERROR: usage

[pushed] Objective-C: Default flag_objc_sjlj_exceptions off for NeXT ABI >= 2.

2021-08-17 Thread Iain Sandoe

Hi,

This patch from Matt, adjusts the default exception model for
all targets that might use NeXT ABI 2 (currently, Darwin had
handled this locally).

tested on x86_64-darwin, i686-darwin, x86_64-linux,
pushed to master, thanks,
Iain

=

Signed-off-by: Matt Jacobson 

gcc/c-family/ChangeLog:

* c-opts.c (c_common_post_options): Default to
flag_objc_sjlj_exceptions = 1 only when flag_objc_abi < 2.

gcc/objc/ChangeLog:

* objc-next-runtime-abi-02.c
(objc_next_runtime_abi_02_init): Warn about and reset
flag_objc_sjlj_exceptions regardless of flag_objc_exceptions.
(next_runtime_02_initialize): Use a checking assert that
flag_objc_sjlj_exceptions is off.
---
 gcc/c-family/c-opts.c   | 4 ++--
 gcc/objc/objc-next-runtime-abi-02.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 1c4e832c7ed..373af0cf06f 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -852,9 +852,9 @@ c_common_post_options (const char **pfilename)
   else if (!flag_gnu89_inline && !flag_isoc99)
 error ("%<-fno-gnu89-inline%> is only supported in GNU99 or C99 mode");
 
-  /* Default to ObjC sjlj exception handling if NeXT runtime.  */
+  /* Default to ObjC sjlj exception handling if NeXT runtime < v2.  */
   if (flag_objc_sjlj_exceptions < 0)
-flag_objc_sjlj_exceptions = flag_next_runtime;
+flag_objc_sjlj_exceptions = (flag_next_runtime && flag_objc_abi < 2);
   if (flag_objc_exceptions && !flag_objc_sjlj_exceptions)
 flag_exceptions = 1;
 
diff --git a/gcc/objc/objc-next-runtime-abi-02.c 
b/gcc/objc/objc-next-runtime-abi-02.c
index 963d1bf1ad8..c552013ab27 100644
--- a/gcc/objc/objc-next-runtime-abi-02.c
+++ b/gcc/objc/objc-next-runtime-abi-02.c
@@ -245,7 +245,7 @@ objc_next_runtime_abi_02_init (objc_runtime_hooks *rthooks)
 {
   extern_names = ggc_cleared_vec_alloc (SIZEHASHTABLE);
 
-  if (flag_objc_exceptions && flag_objc_sjlj_exceptions)
+  if (flag_objc_sjlj_exceptions)
 {
   inform (UNKNOWN_LOCATION,
  "%<-fobjc-sjlj-exceptions%> is ignored for "
@@ -507,7 +507,7 @@ static void next_runtime_02_initialize (void)
   objc_getPropertyStruct_decl = NULL_TREE;
   objc_setPropertyStruct_decl = NULL_TREE;
 
-  gcc_assert (!flag_objc_sjlj_exceptions);
+  gcc_checking_assert (!flag_objc_sjlj_exceptions);
 
   /* Although we warn that fobjc-exceptions is required for exceptions
  code, we carry on and create it anyway.  */
-- 
2.24.3 (Apple Git-128)

[pushed] Darwin, libcc1: Handle hosts with mdynamic-no-pic support.

2021-08-17 Thread Iain Sandoe

Hi,

The default for building host-side binaries for mdynamic-no-pic
hosts is to enable this.  However, it is not compatible with
dynamic libraries, so must be switched off for libcc1.

tested on i686-darwin9, x86_64-darwin, x86_64-linux,
pushed to master, thanks,
Iain

Signed-off-by: Iain Sandoe 

libcc1/ChangeLog:

* Makefile.am: Switch mdynamic-no-pic to fPIC.
* Makefile.in: Regenerated.
---
 libcc1/Makefile.am | 1 +
 libcc1/Makefile.in | 1 +
 2 files changed, 2 insertions(+)

diff --git a/libcc1/Makefile.am b/libcc1/Makefile.am
index 9ec021030e2..49a9543ea7d 100644
--- a/libcc1/Makefile.am
+++ b/libcc1/Makefile.am
@@ -27,6 +27,7 @@ AM_CXXFLAGS = $(WARN_FLAGS) $(WERROR) $(visibility) 
$(CET_HOST_FLAGS)
 if DARWIN_DYNAMIC_LOOKUP
 AM_CXXFLAGS += -Wl,-undefined,dynamic_lookup
 endif
+override CXXFLAGS := $(subst -mdynamic-no-pic,-fPIC,$(CXXFLAGS))
 override CXXFLAGS := $(filter-out -fsanitize=address,$(CXXFLAGS))
 override LDFLAGS := $(filter-out -fsanitize=address,$(LDFLAGS))
 # Can be simplified when libiberty becomes a normal convenience library.

[PATCH] configure, jit: Allow for 'make check-gcc-jit'.

2021-08-17 Thread Iain Sandoe

Hi,

For those of us who habitually build Ada, it’s convenient to 
have a way of running individual test suites without invoking
the acats tests…

being able to do “make check-gcc-jit” from the top level is very
useful when debugging jit testsuite issues.

one can do "cd gcc ; make check-jit "- but this doesn’t seem 100%
identical since the invocations from the top level set the host
exports first.

… the patch itself is trivial / obvious - I am just curious as to
whether there was a reason for omitting it so far?

If not, 

OK for master?

thanks
Iain




This is a convenience feature that allows the user to
do "make check-gcc-jit" at the top level of the build
to check that facility in isolation from others.

Signed-off-by: Iain Sandoe 

ChangeLog:

* Makefile.def: Add a jit check target for the jit
language.
* Makefile.in: Regenerate.
---
 Makefile.def | 1 +
 Makefile.in  | 8 
 2 files changed, 9 insertions(+)

diff --git a/Makefile.def b/Makefile.def
index fbfdb6fee08..7cbeca5b181 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -654,6 +654,7 @@ languages = { language=go;  gcc-check-target=check-go;
lib-check-target=check-gotools; };
 languages = { language=d;  gcc-check-target=check-d;
lib-check-target=check-target-libphobos; };
+languages = { language=jit;gcc-check-target=check-jit; };
 
 // Toplevel bootstrap
 bootstrap_stage = { id=1 ; };

[pushed] Darwin: Reset section names table at the end of compile.

2021-08-17 Thread Iain Sandoe

Hi,

This fixes a fail seen when using JIT on Darwin where we fail
to label the start of debug sections properly.  This operation is
controlled by a vector of tokens that is lazily allocated.

For a single use (typical compile) this vector will be reclaimed
as GGC.  For JIT this is not sufficient since it does not reset
the pointer to NULL (and thus we think the the vector is already
allocated when a context is reused).

The clears the vector and sets the pointer to NULL at the end
of object output.

tested on x86_64, i686-darwin (and x86_64-linux).
pushed to master, thanks
Iain

Signed-off-by: Iain Sandoe 

gcc/ChangeLog:

* config/darwin.c (darwin_file_end): Reset and reclaim the
section names table at the end of compile.
---
 gcc/config/darwin.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index b160c23ea56..5d1d13c80aa 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -3129,6 +3129,14 @@ darwin_file_end (void)
  re-arranging data.  */
   if (!DARWIN_SECTION_ANCHORS || !flag_section_anchors)
 fprintf (asm_out_file, "\t.subsections_via_symbols\n");
+
+  /* We rely on this being NULL at the start of compilation; reset it here
+ so that JIT can reuse a context.  */
+  if (dwarf_sect_names_table != NULL)
+{
+  dwarf_sect_names_table->truncate (0);
+  dwarf_sect_names_table = NULL;
+}
 }
 
 /* TODO: Add a language hook for identifying if a decl is a vtable.  */
-- 
2.24.3 (Apple Git-128)

[pushed] Darwin, X86, config: Adjust 'as' command lines [PR100340].

2021-08-17 Thread Iain Sandoe

Hi,

Versions of the assembler using clang from XCode 12.5/12.5.1
have a bug which produces different code layout between debug and
non-debug input, leading to a compare fail for default configure
parameters.

This is a workaround fix to disable the optimisation that is
responsible for the bug.

tested on x86_64-darwin20, x86_64-darwin17, i686-darwin9 and on
x86_64-linux.

Pushed to master, thanks
Iain

Signed-off-by: Iain Sandoe 

PR target/100340 - Bootstrap fails with Clang 12.0.5 (XCode 12.5)

PR target/100340

gcc/ChangeLog:

* config.in: Regenerate.
* config/i386/darwin.h (EXTRA_ASM_OPTS): New
(ASM_SPEC): Pass options to disable branch shortening where
needed.
* configure: Regenerate.
* configure.ac: Detect versions of 'as' that support the
optimisation which has the bug.
---
 gcc/config.in|  7 +++
 gcc/config/i386/darwin.h | 10 +-
 gcc/configure| 35 +++
 gcc/configure.ac |  9 +
 4 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/gcc/config.in b/gcc/config.in
index 7f5b01fad76..d8a810bbc91 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -616,6 +616,13 @@
 #endif
 
 
+/* Define if your Mac OS X assembler supports -mllvm -x86-pad-for-align=false.
+   */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_MLLVM_X86_PAD_FOR_ALIGN
+#endif
+
+
 /* Define if your Mac OS X assembler supports the -mmacos-version-min option.
*/
 #ifndef USED_FOR_TARGET
diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h
index bac32197e83..73b06e2307d 100644
--- a/gcc/config/i386/darwin.h
+++ b/gcc/config/i386/darwin.h
@@ -125,10 +125,18 @@ along with GCC; see the file COPYING3.  If not see
   %{mfentry*:%eDarwin does not support -mfentry or associated options}" \
   DARWIN_CC1_SPEC
 
+/* This is a workaround for a tool bug: see PR100340.  */
+
+#ifdef HAVE_AS_MLLVM_X86_PAD_FOR_ALIGN
+#define EXTRA_ASM_OPTS " -mllvm -x86-pad-for-align=false"
+#else
+#define EXTRA_ASM_OPTS ""
+#endif
+
 #undef ASM_SPEC
 #define ASM_SPEC "-arch %(darwin_arch) \
   " ASM_OPTIONS " -force_cpusubtype_ALL \
-  %{static}" ASM_MMACOSX_VERSION_MIN_SPEC
+  %{static}" ASM_MMACOSX_VERSION_MIN_SPEC EXTRA_ASM_OPTS
 
 #undef ENDFILE_SPEC
 #define ENDFILE_SPEC \
diff --git a/gcc/configure b/gcc/configure
index 08c286764e0..a2d1003a0f5 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -27082,6 +27082,41 @@ $as_echo "$as_me: WARNING: LTO for $target requires 
binutils >= 2.20.1, but vers
fi
;;
 esac
+case $target_os in
+   darwin2[0-9]* | darwin19*)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for llvm 
assembler x86-pad-for-align option" >&5
+$as_echo_n "checking assembler for llvm assembler x86-pad-for-align option... 
" >&6; }
+if ${gcc_cv_as_mllvm_x86_pad_for_align+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_mllvm_x86_pad_for_align=no
+  if test x$gcc_cv_as != x; then
+$as_echo '.text' > conftest.s
+if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mllvm -x86-pad-for-align=false 
-o conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+then
+   gcc_cv_as_mllvm_x86_pad_for_align=yes
+else
+  echo "configure: failed program was" >&5
+  cat conftest.s >&5
+fi
+rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_as_mllvm_x86_pad_for_align" >&5
+$as_echo "$gcc_cv_as_mllvm_x86_pad_for_align" >&6; }
+if test $gcc_cv_as_mllvm_x86_pad_for_align = yes; then
+
+$as_echo "#define HAVE_AS_MLLVM_X86_PAD_FOR_ALIGN 1" >>confdefs.h
+
+fi
+
+   ;;
+esac
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for 
-xbrace_comment" >&5
 $as_echo_n "checking assembler for -xbrace_comment... " >&6; }
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 653a1cc561d..ad8fa5a4604 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -4799,6 +4799,15 @@ foo: nop
fi
;;
 esac
+case $target_os in
+   darwin2[[0-9]]* | darwin19*)
+gcc_GAS_CHECK_FEATURE([llvm assembler x86-pad-for-align option],
+  gcc_cv_as_mllvm_x86_pad_for_align,,
+  [-mllvm -x86-pad-for-align=false], [.text],,
+  [AC_DEFINE(HAVE_AS_MLLVM_X86_PAD_FOR_ALIGN, 1,
+   [Define if your Mac OS X assembler supports -mllvm 
-x86-pad-for-align=false.])])
+   ;;
+esac
 
 gcc_GAS_CHECK_FEATURE([-xbrace_comment], gcc_cv_as_ix86_xbrace_comment,,
   [-xbrace_comment=no], [.text],,
-- 
2.24.3 (Apple Git-128)

Re: [PATCH v4] c++: Add gnu::diagnose_as attribute

2021-08-17 Thread Jason Merrill via Gcc-patches


On 7/23/21 4:58 AM, Matthias Kretz wrote:

Hi Jason,


Hi, thanks for your patience; I've been out on PTO a lot in the last 
month, and will be again this week.



I found a few regressions from the last patch in the meantime. Version 4 of
the patch is attached.

Questions:

1. I simplified the condition for calling dump_template_parms in
dump_function_name. !DECL_FRIEND_PSEUDO_TEMPLATE_INSTANTIATION (t) is
equivalent to DECL_USE_TEMPLATE (t) in this context; implying that
dump_template_parms is unconditionally called with `primary = false`. Or am I
missing something?


Ah, good catch.  That suggests that 
DECL_FRIEND_PSEUDO_TEMPLATE_INSTANTIATION gives false positives; 
DECL_USE_TEMPLATE is also 0 for template patterns themselves, which 
would be why for


template 
void f() { foo; }

the error refers to 'void f()' rather than 'void f()'

The macro should probably also check DECL_FRIEND_CONTEXT.

And then the use of !DECL_USE_TEMPLATE is intending to check whether t 
is a primary template pattern, and pass true in that case.  This would 
be more correct if it also checked instantiates_primary_template_p.




2. Given a DECL_TI_ARGS tree, can I query whether an argument was deduced or
explicitly specified? I'm asking because I still consider diagnostics of
function templates unfortunate. `template  void f()` is fine, as is
`void f(T) [with T = float]`, but `void f() [with T = float]` could be better.
I.e. if the template parameter appears somewhere in the function parameter
list, dump_template_parms would only produce noise. If, however, the template
parameter was given explicitly, it would be nice if it could show up
accordingly in diagnostics.


NON_DEFAULT_TEMPLATE_ARGS_COUNT has that information, though there are 
some issues with it.  Attached is my WIP from May to improve it 
somewhat, if that's interesting.



3. When parsing tentatively and the parse is rejected, input_location is not
reset, correct? In the attached patch I therefore made
cp_parser_namespace_alias_definition reset input_location on a failed
tentative parse. But it feels wrong. Shouldn't input_location be restored on
cp_parser_parse_definitely?


Makes sense, I guess cp_lexer_rollback_tokens should call 
cp_lexer_set_source_position_from_token.


I'll look at the patch soon.


--

This attribute overrides the diagnostics output string for the entity it
appertains to. The motivation is to improve QoI for library TS
implementations, where diagnostics have a very bad signal-to-noise ratio
due to the long namespaces involved.

With the attribute, it is possible to solve PR89370 and make
std::__cxx11::basic_string<_CharT, _Traits, _Alloc> appear as
std::string in diagnostic output without extra hacks to recognize the
type in the C++ frontend.

Signed-off-by: Matthias Kretz 

gcc/ChangeLog:

 PR c++/89370
 * doc/extend.texi: Document the diagnose_as attribute.
 * doc/invoke.texi: Document -fno-diagnostics-use-aliases.

gcc/c-family/ChangeLog:

 PR c++/89370
 * c.opt (fdiagnostics-use-aliases): New diagnostics flag.

gcc/cp/ChangeLog:

 PR c++/89370
 * cp-tree.h: Add is_alias_template_p declaration.
 * decl2.c (is_alias_template_p): New function. Determines
 whether a given TYPE_DECL is actually an alias template that is
 still missing its template_info.
 (is_late_template_attribute): Decls with diagnose_as attribute
 are early attributes only if they are alias templates.
 * error.c (dump_scope): When printing the name of a namespace,
 look for the diagnose_as attribute. If found, print the
 associated string instead of calling dump_decl.
 (dump_decl_name_or_diagnose_as): New function to replace
 dump_decl (pp, DECL_NAME(t), flags) and inspect the tree for the
 diagnose_as attribute before printing the DECL_NAME.
 (dump_template_scope): New function. Prints the scope of a
 template instance correctly applying diagnose_as attributes and
 adjusting the list of template parms accordingly.
 (dump_aggr_type): If the type has a diagnose_as attribute, print
 the associated string instead of printing the original type
 name. Print template parms only if the attribute was not applied
 to the instantiation / full specialization. Delay call to
 dump_scope until the diagnose_as attribute is found. If the
 attribute has a second argument, use it to override the context
 passed to dump_scope.
 (dump_simple_decl): Call dump_decl_name_or_diagnose_as instead
 of dump_decl.
 (dump_decl): Ditto.
 (lang_decl_name): Ditto.
 (dump_function_decl): Walk the functions context list to
 determine whether a call to dump_template_scope is required.
 Ensure function templates diagnosed with pretty templates set
 TFF_TEMPLATE_NAME to skip dump_template_parms.
 (dump_function_nam

Re: [PATCH, V2 2/3] targhooks: New target hook for CTF/BTF debug info emission

2021-08-17 Thread Indu Bhagat via Gcc-patches


On 8/17/21 1:04 AM, Richard Biener wrote:

On Mon, Aug 16, 2021 at 7:39 PM Indu Bhagat  wrote:


On 8/10/21 4:54 AM, Richard Biener wrote:

On Thu, Aug 5, 2021 at 2:52 AM Indu Bhagat via Gcc-patches
 wrote:


This patch adds a new target hook to detect if the CTF container can allow the
emission of CTF/BTF debug info at DWARF debug info early finish time. Some
backends, e.g., BPF when generating code for CO-RE usecase, may need to emit
the CTF/BTF debug info sections around the time when late DWARF debug is
finalized (dwarf2out_finish).


Without looking at the dwarf2out.c usage in the next patch - I think
the CTF part
should be always emitted from dwarf2out_early_finish, the "hooks" should somehow
arrange for the alternate output specific data to be preserved until
dwarf2out_finish
time so the late BTF data can be emitted from there.

Lumping everything together now just makes it harder to see what info
is required
to persist and thus make LTO support more intrusive than necessary.


In principle, I agree the approach to split generate/emit CTF/BTF like
you mention is ideal.  But, the BTF CO-RE relocations format is such
that the .BTF section cannot be finalized until .BTF.ext contents are
all fully known (David Faust summarizes this issue in the other thread
"[PATCH, V2 3/3] dwarf2out: Emit BTF in dwarf2out_finish for BPF CO-RE
usecase".)

In summary, the .BTF.ext section refers to strings in the .BTF section.
These strings are added at the time the CO-RE relocations are added.
Recall that the .BTF section's header has information about the .BTF
string table start offset and length. So, this means the "CTF part" (or
the .BTF section) cannot simply be emitted in the dwarf2out_early_finish
because it's not ready yet. If it is still unclear, please let me know.

My judgement here is that the BTF format itself is not amenable to split
early/late emission like DWARF. BTF has no linker support yet either.


But are the strings used for the CO-RE relocations not all present already?
Or does the "CTF part" have only "foo", "bar" and "baz" while the CO-RE
part wants to output sth like "foo->bar.baz" (which IMHO would be quite
stupid also for size purposes)?



Yes, the latter ("foo->bar.baz") is closer to what the format does for 
CO-RE relocations!



That said, fix the format.

Alternatively hand the CO-RE part its own string table (what's the fuss
with re-using the CTF string table if there's nothing to share ...)



BTF and .BTF.ext formats are specified already by implementations in the 
kernel, libbpf, and LLVM. For that matter, I should add BPF CO-RE to the 
mix and say that BPF CO-RE capability _and_ .BTF/.BTF.ext debug formats 
have been defined already by the BPF kernel developers/associated 
entities. At this time, we as GCC developers simply extending the BPF 
backend/BTF generation support in GCC, cannot fix the format. That ship 
has sailed.


Thanks for reviewing and voicing your concerns.
Indu



Richard.




gcc/ChangeLog:

  * config/bpf/bpf.c (ctfc_debuginfo_early_finish_p): New definition.
  (TARGET_CTFC_DEBUGINFO_EARLY_FINISH_P): Undefine and override.
  * doc/tm.texi: Regenerated.
  * doc/tm.texi.in: Document the new hook.
  * target.def: Add a new hook.
  * targhooks.c (default_ctfc_debuginfo_early_finish_p): Likewise.
  * targhooks.h (default_ctfc_debuginfo_early_finish_p): Likewise.
---
   gcc/config/bpf/bpf.c | 14 ++
   gcc/doc/tm.texi  |  6 ++
   gcc/doc/tm.texi.in   |  2 ++
   gcc/target.def   | 10 ++
   gcc/targhooks.c  |  6 ++
   gcc/targhooks.h  |  2 ++
   6 files changed, 40 insertions(+)

diff --git a/gcc/config/bpf/bpf.c b/gcc/config/bpf/bpf.c
index 028013e..85f6b76 100644
--- a/gcc/config/bpf/bpf.c
+++ b/gcc/config/bpf/bpf.c
@@ -178,6 +178,20 @@ bpf_option_override (void)
   #undef TARGET_OPTION_OVERRIDE
   #define TARGET_OPTION_OVERRIDE bpf_option_override

+/* Return FALSE iff -mcore has been specified.  */
+
+static bool
+ctfc_debuginfo_early_finish_p (void)
+{
+  if (TARGET_BPF_CORE)
+return false;
+  else
+return true;
+}
+
+#undef TARGET_CTFC_DEBUGINFO_EARLY_FINISH_P
+#define TARGET_CTFC_DEBUGINFO_EARLY_FINISH_P ctfc_debuginfo_early_finish_p
+
   /* Define target-specific CPP macros.  This function in used in the
  definition of TARGET_CPU_CPP_BUILTINS in bpf.h */

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index cb01528..2d5ff05 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -10400,6 +10400,12 @@ Define this macro if GCC should produce debugging 
output in BTF debug
   format in response to the @option{-gbtf} option.
   @end defmac

+@deftypefn {Target Hook} bool TARGET_CTFC_DEBUGINFO_EARLY_FINISH_P (void)
+This target hook returns nonzero if the CTF Container can allow the
+ emission of the CTF/BTF debug info at the DWARF debuginfo early finish
+ time.
+@end deftypefn
+
   @node Floating Point
   @section Cross Compilation and Floating Point
   @cin

Re: [PATCH] Fall back to masked_gather_load/masked_scatter_store

2021-08-17 Thread Richard Biener via Gcc-patches

On Tue, 17 Aug 2021, Richard Sandiford wrote:

> Richard Biener via Gcc-patches  writes:
> > This adds a fallback to the masked_ variants for gather_load
> > and scatter_store if the latter are not available.
> >
> > Bootstrap / regtest running on x86_64-unknown-linux-gnu.
> 
> LGTM FWIW.  I don't know the history behind the TREE_CODE (*mask) != SSA_NAME
> check.

I've traced it up to 045c12782cc8c but that just moved it as well.

> I guess we could probably remove the unmasked SVE optabs with this.

Yeah, it will make my life simpler in not needing to write expanders
for sth the CPU cannot do on x86.

The patch tested OK on x86_64-unknown-linux-gnu so I pushed it now.

Richard.

> Thanks,
> Richard
> 
> >
> > 2021-08-17  Richard Biener  
> >
> > * optabs-query.c (supports_vec_gather_load_p): Also check
> > for masked optabs.
> > (supports_vec_scatter_store_p): Likewise.
> > * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Fall
> > back to masked variants if non-masked are not supported.
> > * tree-vect-patterns.c (vect_recog_gather_scatter_pattern):
> > When we need to use masked gather/scatter but do not have
> > a mask set up a constant true one.
> > * tree-vect-stmts.c (vect_check_scalar_mask): Also allow
> > non-SSA_NAME masks.
> > ---
> >  gcc/optabs-query.c|  6 --
> >  gcc/tree-vect-data-refs.c | 22 +++---
> >  gcc/tree-vect-patterns.c  |  7 +--
> >  gcc/tree-vect-stmts.c |  8 
> >  4 files changed, 28 insertions(+), 15 deletions(-)
> >
> > diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
> > index 05ee5f517da..a6dd0fed610 100644
> > --- a/gcc/optabs-query.c
> > +++ b/gcc/optabs-query.c
> > @@ -740,7 +740,8 @@ supports_vec_gather_load_p ()
> >this_fn_optabs->supports_vec_gather_load_cached = true;
> >  
> >this_fn_optabs->supports_vec_gather_load
> > -= supports_vec_convert_optab_p (gather_load_optab);
> > += (supports_vec_convert_optab_p (gather_load_optab)
> > +   || supports_vec_convert_optab_p (mask_gather_load_optab));
> >  
> >return this_fn_optabs->supports_vec_gather_load;
> >  }
> > @@ -757,7 +758,8 @@ supports_vec_scatter_store_p ()
> >this_fn_optabs->supports_vec_scatter_store_cached = true;
> >  
> >this_fn_optabs->supports_vec_scatter_store
> > -= supports_vec_convert_optab_p (scatter_store_optab);
> > += (supports_vec_convert_optab_p (scatter_store_optab)
> > +   || supports_vec_convert_optab_p (mask_scatter_store_optab));
> >  
> >return this_fn_optabs->supports_vec_scatter_store;
> >  }
> > diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> > index c4c14d74065..97745a830a2 100644
> > --- a/gcc/tree-vect-data-refs.c
> > +++ b/gcc/tree-vect-data-refs.c
> > @@ -3735,11 +3735,17 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool 
> > read_p, bool masked_p,
> >  return false;
> >  
> >/* Work out which function we need.  */
> > -  internal_fn ifn;
> > +  internal_fn ifn, alt_ifn;
> >if (read_p)
> > -ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
> > +{
> > +  ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
> > +  alt_ifn = IFN_MASK_GATHER_LOAD;
> > +}
> >else
> > -ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
> > +{
> > +  ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
> > +  alt_ifn = IFN_MASK_SCATTER_STORE;
> > +}
> >  
> >for (;;)
> >  {
> > @@ -3755,6 +3761,16 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool 
> > read_p, bool masked_p,
> >   *offset_vectype_out = offset_vectype;
> >   return true;
> > }
> > +  else if (!masked_p
> > +  && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
> > + memory_type,
> > + offset_vectype,
> > + scale))
> > +   {
> > + *ifn_out = alt_ifn;
> > + *offset_vectype_out = offset_vectype;
> > + return true;
> > +   }
> >  
> >if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
> >   && TYPE_PRECISION (offset_type) >= element_bits)
> > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> > index 25de97bd9b0..899734005ce 100644
> > --- a/gcc/tree-vect-patterns.c
> > +++ b/gcc/tree-vect-patterns.c
> > @@ -4820,6 +4820,9 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
> >if (mask)
> >  mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
> >   loop_vinfo);
> > +  else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
> > +  || gs_info.ifn == IFN_MASK_GATHER_LOAD)
> > +mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
> >  
> >/* Get the invariant base and non-invariant offset, converting the
> >   latter to the same width as the vector elements.  */
> > @@ -4847,11 +4850,11 @@ vect_r

Re: [PATCH] libcpp: __VA_OPT__ p1042r1 placemarker changes [PR101488]

2021-08-17 Thread Jakub Jelinek via Gcc-patches

On Tue, Aug 17, 2021 at 08:32:50AM -0700, Jason Merrill wrote:
> > We want to remove the latter  but not the former one, and
> > the patch adds the vaopt_padding_tokens counter for it to control
> > how many placemarkers are removed on vaopt_state::END.
> > As can be seen in #c1 and #c2 of the PR, I've tried various approaches,
> > but neither worked out for all the cases except the posted one.
> 
> I notice that the second placemarker you mention is avoid_paste, which seems
> relevant.  This seems to also work, at least it doesn't seem to break any of
> the va_opt tests.  Thoughts?

I've verified my patch + your incremental patch works not just on the
va-opt* tests in gcc testsuite, but also behaves the same as without the
incremental patch on the clang testcases (I think it is all covered now in
our testsuite, checked just to make sure).

So, looks just fine to me.  I can include your patch in my bootstrap/regtest
tonight.

> >From d6cc54280e1c4dba91e883721e05ab0037f4a896 Mon Sep 17 00:00:00 2001
> From: Jason Merrill 
> Date: Tue, 17 Aug 2021 08:12:02 -0700
> Subject: [PATCH] libcpp: __VA_OPT__ tweak
> To: gcc-patches@gcc.gnu.org
> 
> libcpp/ChangeLog:
> 
>   * macro.c (replace_args): When __VA_OPT__ is on the LHS of ##,
>   remove trailing avoid_paste tokens.
> ---
>  libcpp/macro.c | 24 
>  1 file changed, 8 insertions(+), 16 deletions(-)
> 
> diff --git a/libcpp/macro.c b/libcpp/macro.c
> index 35eaae383a7..acdbe6ab14f 100644
> --- a/libcpp/macro.c
> +++ b/libcpp/macro.c
> @@ -2025,7 +2025,6 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, 
> cpp_macro *macro,
>i = 0;
>vaopt_state vaopt_tracker (pfile, macro->variadic, &args[macro->paramc - 
> 1]);
>const cpp_token **vaopt_start = NULL;
> -  unsigned vaopt_padding_tokens = 0;
>for (src = macro->exp.tokens; src < limit; src++)
>  {
>unsigned int arg_tokens_count;
> @@ -2058,16 +2057,7 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, 
> cpp_macro *macro,
> const cpp_token **start = vaopt_start;
> vaopt_start = NULL;
>  
> -   /* Remove any tail padding from inside the __VA_OPT__.  */
> paste_flag = tokens_buff_last_token_ptr (buff);
> -   while (vaopt_padding_tokens--
> -  && paste_flag
> -  && paste_flag != start
> -  && (*paste_flag)->type == CPP_PADDING)
> - {
> -   tokens_buff_remove_last_token (buff);
> -   paste_flag = tokens_buff_last_token_ptr (buff);
> - }
>  
> if (vaopt_tracker.stringify ())
>   {
> @@ -2088,6 +2078,14 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, 
> cpp_macro *macro,
>   }
> else if (src->flags & PASTE_LEFT)
>   {
> +   /* Don't avoid paste after all.  */
> +   while (paste_flag && paste_flag != start
> +  && *paste_flag == &pfile->avoid_paste)
> + {
> +   tokens_buff_remove_last_token (buff);
> +   paste_flag = tokens_buff_last_token_ptr (buff);
> + }
> +
> /* With a non-empty __VA_OPT__ on the LHS of ##, the last
>token should be flagged PASTE_LEFT.  */
> if (paste_flag && (*paste_flag)->type != CPP_PADDING)
> @@ -2106,7 +2104,6 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, 
> cpp_macro *macro,
> continue;
>   }
>  
> -  vaopt_padding_tokens = 0;
>if (src->type != CPP_MACRO_ARG)
>   {
> /* Allocate a virtual location for token SRC, and add that
> @@ -2261,10 +2258,6 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, 
> cpp_macro *macro,
>  
> index = expanded_token_index (pfile, macro, src, token_index);
> const cpp_token *tok = macro_arg_token_iter_get_token (&from);
> -   if (tok->type == CPP_PADDING)
> - vaopt_padding_tokens++;
> -   else
> - vaopt_padding_tokens = 0;
> tokens_buff_add_token (buff, virt_locs, tok,
>macro_arg_token_iter_get_location (&from),
>src->src_loc, map, index);
> @@ -2311,7 +2304,6 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, 
> cpp_macro *macro,
> tokens_buff_add_token (buff, virt_locs,
>t, t->src_loc, t->src_loc,
>NULL, 0);
> -   vaopt_padding_tokens++;
>   }
>  
>/* Add a new paste flag, or remove an unwanted one.  */
> -- 
> 2.27.0
> 


Jakub

Re: [PATCH] Optimize seed_seq construction

2021-08-17 Thread Jonathan Wakely via Gcc-patches

On Tue, 17 Aug 2021 at 14:40, Antony Polukhin  wrote:
>
> вт, 17 авг. 2021 г. в 16:37, Jonathan Wakely :
> <...>
> > Thanks, this is a nice improvement. We can avoid tag dispatching to
> > make it simpler though:
> >
> > @@ -3248,6 +3249,9 @@ namespace __detail
> >   template
> > seed_seq::seed_seq(_InputIterator __begin, _InputIterator __end)
> > {
> > +  if _GLIBCXX17_CONSTEXPR
> > (__is_random_access_iter<_InputIterator>::value)
> > +   _M_v.reserve(std::distance(__begin, __end));
> > +
> >   for (_InputIterator __iter = __begin; __iter != __end; ++__iter)
> >_M_v.push_back(__detail::__mod >   __detail::_Shift::__value>(*__iter));
> >
> > The call to std::distance is well-formed for input iterators, but we
> > won't actually call it unless we have random access iterators.
> >
> > Unless you see a problem with this that I'm missing, I'll go with that 
> > version.
>
> Looks much better. Thanks!

Here's what I've tested and pushed to trunk.

Thanks again!
commit 174f9257a75dec93221eca26c236e0a6346c9dfd
Author: Antony Polukhin 
Date:   Tue Aug 17 13:50:53 2021

libstdc++: Optimize std::seed_seq construction

When std::seed_seq is constructed from random access iterators we can
detect the internal vector size in O(1). Reserving memory for elements
in such cases may avoid multiple memory allocations.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* include/bits/random.tcc (seed_seq::seed_seq): Reserve capacity
if distance is O(1).
* testsuite/26_numerics/random/pr60037-neg.cc: Adjust dg-error
line number.

Co-authored-by: Jonathan Wakely 

diff --git a/libstdc++-v3/include/bits/random.tcc 
b/libstdc++-v3/include/bits/random.tcc
index 0be50d90e8a..023fded7f5d 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -3240,6 +3240,7 @@ namespace __detail
   template
 seed_seq::seed_seq(std::initializer_list<_IntType> __il)
 {
+  _M_v.reserve(__il.size());
   for (auto __iter = __il.begin(); __iter != __il.end(); ++__iter)
_M_v.push_back(__detail::__mod::__value>(*__iter));
@@ -3248,6 +3249,9 @@ namespace __detail
   template
 seed_seq::seed_seq(_InputIterator __begin, _InputIterator __end)
 {
+  if _GLIBCXX17_CONSTEXPR (__is_random_access_iter<_InputIterator>::value)
+   _M_v.reserve(std::distance(__begin, __end));
+
   for (_InputIterator __iter = __begin; __iter != __end; ++__iter)
_M_v.push_back(__detail::__mod::__value>(*__iter));
diff --git a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc 
b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
index 8fba7144d8a..3ab9c44232e 100644
--- a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
+++ b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
@@ -12,4 +12,4 @@ auto x = std::generate_canonical

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-17 Thread Qing Zhao via Gcc-patches




> On Aug 17, 2021, at 9:50 AM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> 
> 
>> On Aug 17, 2021, at 3:29 AM, Richard Biener  wrote:
>> 
>> On Mon, 16 Aug 2021, Qing Zhao wrote:
>> 
>>> My current code for expand_DEFERRED_INIT is like the following, could you 
>>> check and see whether there is any issue for it:
>>> 
>>> #define INIT_PATTERN_VALUE  0xFE
>>> static void
>>> expand_DEFERRED_INIT (internal_fn, gcall *stmt)
>>> {
>>> tree lhs = gimple_call_lhs (stmt);
>>> tree var_size = gimple_call_arg (stmt, 0);
>>> enum auto_init_type init_type
>>>   = (enum auto_init_type) TREE_INT_CST_LOW (gimple_call_arg (stmt, 1));
>>> bool is_vla = (bool) TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
>>> 
>>> tree var_type = TREE_TYPE (lhs);
>>> gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
>>> 
>>> if (is_vla || (!use_register_for_decl (lhs)))
>>>   {
>>> if (TREE_CODE (lhs) == SSA_NAME)
>>>   lhs = SSA_NAME_VAR (lhs);
>> 
>> this should not be necessary (in fact you shouldn't see a SSA_NAME
>> here, if you do then using SSA_NAME_VAR is wrong)
> You mean during RTL expansion phase, all SSA_NAMEs are gone already?

Actually, the lhs could be SSA_NAME here, 

Breakpoint 1, expand_DEFERRED_INIT (stmt=0x7fffe96ae348) at 
../../latest-gcc/gcc/internal-fn.c:3021
3021  mark_addressable (lhs);
(gdb) call debug_tree(lhs)
 
unit-size 
align:32 warn_if_not_align:0 symtab:0 alias-set 2 canonical-type 
0x7fffe959b2a0 precision:32
pointer_to_this >
visited var 
def_stmt temp1_5 = .DEFERRED_INIT (4, 2, 0, &"temp1"[0]);
version:5>

 when I deleted:

 if (TREE_CODE (lhs) == SSA_NAME
   lhs = SSA_NAME_VAR (lhs);

Many testing cases failed with internal compiler error:

/home/opc/Work/GCC/latest-gcc/gcc/testsuite/c-c++-common/auto-init-3.c:9:9: 
internal compiler error: in expand_expr_addr_expr_1, at expr.c:8437
0xe237aa expand_expr_addr_expr_1
../../latest-gcc/gcc/expr.c:8437
0xe24059 expand_expr_addr_expr
../../latest-gcc/gcc/expr.c:8525
0xe32b56 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, 
expand_modifier, rtx_def**, bool)
../../latest-gcc/gcc/expr.c:11741
0xe2da52 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, 
expand_modifier, rtx_def**, bool)
../../latest-gcc/gcc/expr.c:10777
0xe24706 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, 
rtx_def**, bool)
../../latest-gcc/gcc/expr.c:8713
0xc13f15 expand_expr
../../latest-gcc/gcc/expr.h:301
0xc17acb get_memory_rtx
../../latest-gcc/gcc/builtins.c:1370
0xc2223d expand_builtin_memset_args
../../latest-gcc/gcc/builtins.c:4102
0xc21a20 expand_builtin_memset(tree_node*, rtx_def*, machine_mode)
../../latest-gcc/gcc/builtins.c:3886
0xfb5c85 expand_DEFERRED_INIT
../../latest-gcc/gcc/internal-fn.c:3031


So, did I do anything wrong?

Qing

RE: [PATCH] [MIPS] Hazard barrier return support

2021-08-17 Thread Dragan Mladjenovic via Gcc-patches



> -Original Message-
> From: Dragan Mladjenovic
> Sent: 16 August 2021 22:40
> To: 'Andrew Pinski' 
> Cc: gcc-patches@gcc.gnu.org
> Subject: RE: [PATCH] [MIPS] Hazard barrier return support
> 
> 
> 
> > -Original Message-
> > From: Andrew Pinski [mailto:pins...@gmail.com]
> > Sent: 16 August 2021 21:17
> > To: Dragan Mladjenovic 
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [PATCH] [MIPS] Hazard barrier return support
> >
> > On Mon, Aug 16, 2021 at 7:43 AM Dragan Mladjenovic via Gcc-patches
> >  wrote:
> > >
> > > This patch allows a function to request clearing of all instruction
> > > and execution hazards upon normal return via __attribute__
> > ((use_hazard_barrier_return)).
> > >
> > > 2017-04-25  Prachi Godbole  
> > >
> > > gcc/
> > > * config/mips/mips.h (machine_function): New variable
> > > use_hazard_barrier_return_p.
> > > * config/mips/mips.md (UNSPEC_JRHB): New unspec.
> > > (mips_hb_return_internal): New insn pattern.
> > > * config/mips/mips.c (mips_attribute_table): Add attribute
> > > use_hazard_barrier_return.
> > > (mips_use_hazard_barrier_return_p): New static function.
> > > (mips_function_attr_inlinable_p): Likewise.
> > > (mips_compute_frame_info): Set use_hazard_barrier_return_p.
> > > Emit error for unsupported architecture choice.
> > > (mips_function_ok_for_sibcall, mips_can_use_return_insn):
> > > Return false for use_hazard_barrier_return.
> > > (mips_expand_epilogue): Emit hazard barrier return.
> > > * doc/extend.texi: Document use_hazard_barrier_return.
> > >
> > > gcc/testsuite/
> > > * gcc.target/mips/hazard-barrier-return-attribute.c: New test.
> > > ---
> > > Rehash of original patch posted by Prachi with minimal changes.
> > > Tested against mips-mti-elf with mips32r2/-EB and mips32r2/-EB/-
> micromips.
> > >
> > >  gcc/config/mips/mips.c| 58 +--
> > >  gcc/config/mips/mips.h|  3 +
> > >  gcc/config/mips/mips.md   | 15 +
> > >  gcc/doc/extend.texi   |  6 ++
> > >  .../mips/hazard-barrier-return-attribute.c| 20 +++
> > >  5 files changed, 98 insertions(+), 4 deletions(-)  create mode
> > > 100644
> > > gcc/testsuite/gcc.target/mips/hazard-barrier-return-attribute.c
> > >
> > > diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index
> > > 89d1be6cea6..6ce12fce52e 100644
> > > --- a/gcc/config/mips/mips.c
> > > +++ b/gcc/config/mips/mips.c
> > > @@ -630,6 +630,7 @@ static const struct attribute_spec
> > mips_attribute_table[] = {
> > >  mips_handle_use_shadow_register_set_attr, NULL },
> > >{ "keep_interrupts_masked",  0, 0, false, true,  true, false, NULL, 
> > > NULL },
> > >{ "use_debug_exception_return", 0, 0, false, true, true, false,
> > > NULL, NULL },
> > > +  { "use_hazard_barrier_return", 0, 0, true, false, false, false,
> > > + NULL, NULL },
> > >{ NULL, 0, 0, false, false, false, false, NULL, NULL }
> > >  };
> > >
> > > @@ -1309,6 +1310,16 @@ mips_use_debug_exception_return_p (tree
> > type)
> > >TYPE_ATTRIBUTES (type)) != NULL;  }
> > >
> > > +/* Check if the attribute to use hazard barrier return is set for
> > > +   the function declaration DECL.  */
> > > +
> > > +static bool
> > > +mips_use_hazard_barrier_return_p (const_tree decl) {
> > > +  return lookup_attribute ("use_hazard_barrier_return",
> > > +  DECL_ATTRIBUTES (decl)) != NULL; }
> > > +
> > >  /* Return the set of compression modes that are explicitly required
> > > by the attributes in ATTRIBUTES.  */
> > >
> > > @@ -1494,6 +1505,19 @@ mips_can_inline_p (tree caller, tree callee)
> > >return default_target_can_inline_p (caller, callee);  }
> > >
> > > +/* Implement TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P.
> > > +
> > > +   A function requesting clearing of all instruction and execution 
> > > hazards
> > > +   before returning cannot be inlined - thereby not clearing any hazards.
> > > +   All our other function attributes are related to how out-of-line 
> > > copies
> > > +   should be compiled or called.  They don't in themselves prevent
> > > + inlining.  */
> > > +
> > > +static bool
> > > +mips_function_attr_inlinable_p (const_tree decl) {
> > > +  return !mips_use_hazard_barrier_return_p (decl); }
> > > +
> > >  /* Handle an "interrupt" attribute with an optional argument.  */
> > >
> > >  static tree
> > > @@ -7921,6 +7945,11 @@ mips_function_ok_for_sibcall (tree decl, tree
> > exp ATTRIBUTE_UNUSED)
> > >&& !targetm.binds_local_p (decl))
> > >  return false;
> > >
> > > +  /* Can't generate sibling calls if returning from current function 
> > > using
> > > + hazard barrier return.  */
> > > +  if (mips_use_hazard_barrier_return_p (current_function_decl))
> > > +return false;
> > > +
> > >/* Otherwise OK.  */
> >

Re: [PATCH v2] libstdc++: Fix testsuite for skipping gdb tests on remote/non-native target

2021-08-17 Thread Jonathan Wakely via Gcc-patches

On Mon, 9 Aug 2021 at 12:47, Luc Michel wrote:
>
> This fixes an incorrect invocation of gdb on remote targets where
> DejaGNU would try to run host's gdb in remote target simulator.
> gdb-test skips the testing when target is remote or non native but the
> gdb version check function does not.
>
> Co-authored-by: Marc Poulhies 
> Suggested-by: Jonathan Wakely 
> Signed-off-by: Luc Michel 
> ---
> Hi
>
> Marc has now left Kalray so I'll carry on with this patch. I applied
> Jonathan's suggestion. Let me know if things are OK.
>
> v2 changes:
> * Put the check in gdb_version_check/gdb_version_check_xmethods
>   instead of gdb_batch_check [Jonathan]
>
> Thanks.

Thanks for the updated patch, I've tested it and pushed it to trunk now.


> Luc
> ---
>  libstdc++-v3/testsuite/lib/gdb-test.exp | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/libstdc++-v3/testsuite/lib/gdb-test.exp 
> b/libstdc++-v3/testsuite/lib/gdb-test.exp
> index af20c85e5a0..f993355c2b4 100644
> --- a/libstdc++-v3/testsuite/lib/gdb-test.exp
> +++ b/libstdc++-v3/testsuite/lib/gdb-test.exp
> @@ -278,18 +278,22 @@ proc gdb_batch_check {command pattern} {
>  # require gdb 7.3, but we don't want to test versions, so instead we
>  # check for the python "lookup_global_symbol" method, which is in 7.3
>  # but not earlier versions.
>  # Return 1 if the version is ok, 0 otherwise.
>  proc gdb_version_check {} {
> +if { ![isnative] || [is_remote target] } { return 0 }
> +
>  return [gdb_batch_check "python print(gdb.lookup_global_symbol)" \
>   ""]
>  }
>
>  # Check for a version of gdb which supports xmethod tests.  It is done
>  # in a manner similar to the check for a version of gdb which supports the
>  # pretty-printer tests below.
>  proc gdb_version_check_xmethods {} {
> +if { ![isnative] || [is_remote target] } { return 0 }
> +
>  return [gdb_batch_check \
>   "python import gdb.xmethod; print(gdb.xmethod.XMethod)" \
>   ""]
>  }
>
> --
> 2.17.1
>

Re: [PATCH] c++, v3: Implement P0466R5 __cpp_lib_is_layout_compatible compiler helpers [PR101539]

2021-08-17 Thread Jason Merrill via Gcc-patches


On 8/17/21 10:55 AM, Jakub Jelinek wrote:

On Tue, Aug 17, 2021 at 07:10:28AM -0700, Jason Merrill wrote:

Looks good, thanks.  I think you didn't see that I also asked for some added
comments; OK with those added.


Oops, I've indeed missed them, sorry.

On Mon, Aug 16, 2021 at 03:57:21PM -0400, Jason Merrill wrote:

Add a comment that discussion in core suggests that we might move toward
treating multiple union fields of the same type as the same field, so this
constraint might get dropped in the future.


Just same type fields, or even any fields with layout compatible types?
Anyway, either of that would require further changes in the code.


Just same type.


So that I don't repost the whole large patch, here is just incremental
diff with the added comments:


Looks good, thanks.


--- gcc/cp/semantics.c  2021-08-17 11:36:44.024227609 +0200
+++ gcc/cp/semantics.c  2021-08-17 16:41:57.070923754 +0200
@@ -10923,6 +10923,16 @@
   basetype2, membertype2, arg2);
if (TREE_TYPE (ret) == boolean_type_node)
  return ret;
+  /* If both arg1 and arg2 are INTEGER_CSTs, is_corresponding_member_aggr
+ already returns boolean_{true,false}_node whether those particular
+ members are corresponding members or not.  Otherwise, if only
+ one of them is INTEGER_CST (canonicalized to first being INTEGER_CST
+ above), it returns boolean_false_node if it is certainly not a
+ corresponding member and otherwise we need to do a runtime check that
+ those two OFFSET_TYPE offsets are equal.
+ If neither of the operands is INTEGER_CST, is_corresponding_member_aggr
+ returns the largest offset at which the members would be corresponding
+ members, so perform arg1 <= ret && arg1 == arg2 runtime check.  */
gcc_assert (TREE_CODE (arg2) != INTEGER_CST);
if (TREE_CODE (arg1) == INTEGER_CST)
  return fold_build2 (EQ_EXPR, boolean_type_node, arg1,
--- gcc/cp/typeck.c 2021-08-17 11:18:53.271850970 +0200
+++ gcc/cp/typeck.c 2021-08-17 16:48:56.165115017 +0200
@@ -1727,6 +1727,15 @@
  field2 = DECL_CHAIN (field2);
}
}
+  /* Otherwise both types must be union types.
+The standard says:
+"Two standard-layout unions are layout-compatible if they have
+the same number of non-static data members and corresponding
+non-static data members (in any order) have layout-compatible
+types."
+but the code anticipates that bitfield vs. non-bitfield,
+different bitfield widths or presence/absence of
+[[no_unique_address]] should be checked as well.  */
auto_vec vec;
unsigned int count = 0;
for (; field1; field1 = DECL_CHAIN (field1))
@@ -1735,6 +1744,9 @@
for (; field2; field2 = DECL_CHAIN (field2))
if (TREE_CODE (field2) == FIELD_DECL)
  vec.safe_push (field2);
+  /* Discussions on core lean towards treating multiple union fields
+of the same type as the same field, so this might need changing
+in the future.  */
if (count != vec.length ())
return false;
for (field1 = TYPE_FIELDS (type1); field1; field1 = DECL_CHAIN (field1))

Jakub

Re: [PATCH] libcpp: __VA_OPT__ p1042r1 placemarker changes [PR101488]

2021-08-17 Thread Jason Merrill via Gcc-patches


On 8/17/21 4:25 AM, Jakub Jelinek wrote:

On Mon, Aug 16, 2021 at 06:07:57PM -0400, Jason Merrill wrote:

It is unclear if it would be enough
to remove just one or if all padding tokens should be removed.
Anyway, e.g. the previous removal of all padding tokens at the end of
__VA_OPT__ is undesirable, as it e.g. eats also the padding tokens needed
for the H4 example from the paper.


Hmm, I don't see why.  Looking at the H4 example, it seems that the
expansion of __VA_OPT__ should be

  a 

so when we paste to b, b is pasted to the placemarker, leaving a as a
separate token.


#define H4(X, ...) __VA_OPT__(a X ## X) ## b
H4(, 1)  // replaced by a b

We actually get with vanilla trunk
   a  
where the former comes from:
2216  /* Padding on the left of an argument (unless RHS of ##).  */
2217  if ((!pfile->state.in_directive || 
pfile->state.directive_wants_padding)
2218  && src != macro->exp.tokens && !(src[-1].flags & PASTE_LEFT)
2219  && !last_token_is (buff, vaopt_start))
2220{
2221  const cpp_token *t = padding_token (pfile, src);
  unsigned index = expanded_token_index (pfile, macro, src, i);
2223  /* Allocate a virtual location for the padding token and
2224 append the token and its location to BUFF and
2225 VIRT_LOCS.   */
2226  tokens_buff_add_token (buff, virt_locs, t,
2227 t->src_loc, t->src_loc,
2228 map, index);
2229}
and the latter one is added at
2303  /* Avoid paste on RHS (even case count == 0).  */
2304  if (!pfile->state.in_directive && !(src->flags & PASTE_LEFT)
2305  && !last_token_is (buff, vaopt_start))
2306{
2307  const cpp_token *t = &pfile->avoid_paste;
2308  tokens_buff_add_token (buff, virt_locs,
2309 t, t->src_loc, t->src_loc,
2310 NULL, 0);
2311}
and trunk eats both s in:
   /* Remove any tail padding from inside the __VA_OPT__.  */
   paste_flag = tokens_buff_last_token_ptr (buff);
   while (paste_flag && paste_flag != start
  && (*paste_flag)->type == CPP_PADDING)
 {
   tokens_buff_remove_last_token (buff);
   paste_flag = tokens_buff_last_token_ptr (buff);
 }
and thus H4(, 1) is replaced by ab instead of the right a b.

We want to remove the latter  but not the former one, and
the patch adds the vaopt_padding_tokens counter for it to control
how many placemarkers are removed on vaopt_state::END.
As can be seen in #c1 and #c2 of the PR, I've tried various approaches,
but neither worked out for all the cases except the posted one.


I notice that the second placemarker you mention is avoid_paste, which 
seems relevant.  This seems to also work, at least it doesn't seem to 
break any of the va_opt tests.  Thoughts?


Jason
>From d6cc54280e1c4dba91e883721e05ab0037f4a896 Mon Sep 17 00:00:00 2001
From: Jason Merrill 
Date: Tue, 17 Aug 2021 08:12:02 -0700
Subject: [PATCH] libcpp: __VA_OPT__ tweak
To: gcc-patches@gcc.gnu.org

libcpp/ChangeLog:

	* macro.c (replace_args): When __VA_OPT__ is on the LHS of ##,
	remove trailing avoid_paste tokens.
---
 libcpp/macro.c | 24 
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/libcpp/macro.c b/libcpp/macro.c
index 35eaae383a7..acdbe6ab14f 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -2025,7 +2025,6 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, cpp_macro *macro,
   i = 0;
   vaopt_state vaopt_tracker (pfile, macro->variadic, &args[macro->paramc - 1]);
   const cpp_token **vaopt_start = NULL;
-  unsigned vaopt_padding_tokens = 0;
   for (src = macro->exp.tokens; src < limit; src++)
 {
   unsigned int arg_tokens_count;
@@ -2058,16 +2057,7 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, cpp_macro *macro,
 	  const cpp_token **start = vaopt_start;
 	  vaopt_start = NULL;
 
-	  /* Remove any tail padding from inside the __VA_OPT__.  */
 	  paste_flag = tokens_buff_last_token_ptr (buff);
-	  while (vaopt_padding_tokens--
-		 && paste_flag
-		 && paste_flag != start
-		 && (*paste_flag)->type == CPP_PADDING)
-		{
-		  tokens_buff_remove_last_token (buff);
-		  paste_flag = tokens_buff_last_token_ptr (buff);
-		}
 
 	  if (vaopt_tracker.stringify ())
 		{
@@ -2088,6 +2078,14 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, cpp_macro *macro,
 		}
 	  else if (src->flags & PASTE_LEFT)
 		{
+		  /* Don't avoid paste after all.  */
+		  while (paste_flag && paste_flag != start
+			 && *paste_flag == &pfile->avoid_paste)
+		{
+		  tokens_buff_remove_last_token (buff);
+		  paste_flag = tokens_buff_last_token_ptr (buff);
+		}
+
 		  /* With a non-

Re: [PATCH 1/2] analyzer: detect and analyze calls via function pointer (GSoC)

2021-08-17 Thread Ankur Saini via Gcc-patches

Here is the final patch after fixing all the nits mentioned. 

Successfully bootstrapped and completed regress tests on x86_64-linux-gnu.

- - -


vfunc.patch
Description: Binary data


Thanks 
- Ankur

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-17 Thread Qing Zhao via Gcc-patches



> On Aug 16, 2021, at 11:48 AM, Qing Zhao via Gcc-patches 
>  wrote:
> 
>>> From the above IR file after “FRE”, we can see that the major issue with 
>>> this IR is:
>>> 
>>> The address taken auto variable “alt_reloc” has been completely replaced by 
>>> the temporary variable “_1” in all
>>> the uses of the original “alt_reloc”. 
>> 
>> Well, this can happen with regular code as well, there's no need for
>> .DEFERRED_INIT.  This is the usual problem with reporting uninitialized
>> uses late.
>> 
>> IMHO this shouldn't be a blocker.  The goal of zero "regressions" wrt
>> -Wuninitialized isn't really achievable.
> 
> Okay. Sounds reasonable to me too.
> 
>> 
>>> The major problem with such IR is,  during uninitialized analysis phase, 
>>> the original use of “alt_reloc” disappeared completely.
>>> So, the warning cannot be reported.
>>> 
>>> 
>>> My questions:
>>> 
>>> 1. Is it possible to get the original “alt_reloc” through the temporary 
>>> variable “_1” with some available information recorded in the IR?
>>> 2. If not, then we have to record the relationship between “alt_reloc” and 
>>> “_1” when the original “alt_reloc” is replaced by “_1” and get such 
>>> relationship during
>>>   Uninitialized analysis phase.  Is this doable?
>> 
>> Well, you could add a fake argument to .DEFERRED_INIT for the purpose of
>> diagnostics.  The difficulty is to avoid tracking it as actual use so
>> you could for example pass a string with the declarations name though
>> this wouldn't give the association with the actual decl.
> Good suggestion, I can try this a little bit. 

I tried this yesterday, added the 4th argument to .DEFERRED_INIT as:

1st argument: SIZE of the DECL;
2nd argument: INIT_TYPE;
3rd argument: IS_VLA, 0 NO, 1 YES;
+   4th argument: The NAME for the DECL;
 
-   as LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA)
+   as LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA, NAME)

+  tree name_node
+= build_string_literal (IDENTIFIER_LENGTH (DECL_NAME (decl)),
+   IDENTIFIER_POINTER (DECL_NAME (decl)));
 
   tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, 
IFN_DEFERRED_INIT,
-   TREE_TYPE (decl), 3,
+   TREE_TYPE (decl), 4,
decl_size, init_type_node,
-   is_vla_node);
+   is_vla_node, name_node);


And got the following IR in .uninit1 dump:


….

  _1 = .DEFERRED_INIT (4, 2, 0, &"alt_reloc"[0]);
  if (_1 != 0)
….


My questions:

1. Is “build_string_literal” the correct utility routine to use for this new 
argument? 
2. Will Such string literal nodes have potential other impact?

Qing

> 
>> 
>>> 3. Looks like that for “address taken” auto variable, if we have to 
>>> introduce a new temporary variable and split the call to .DEFERRED_INIT 
>>> into two:
>>> 
>>> temp = .DEFERRED_INIT (4, 2, 0);
>>> alt_reloc = temp;
>>> 
>>>  More issues might possible.
>>> 
>>> Any comments and suggestions on this issue?
>> 
>> I don't see any good possibilities that would not make optimizing code
>> as good as w/o .DEFERRED_INIT more difficult.  My stake here is always
>> that GCC is an optimizing compiler, not a static analysis engine and
>> thus I side with "broken" diagnostics and better optimization.
> That’s true and reasonable, too.
> 
> thanks.
> 
> Qing
>> 
>> Richard.
>> 
>>> Qing
>>> 
>>> j
 On Aug 11, 2021, at 11:55 AM, Richard Biener  wrote:
 
 On August 11, 2021 6:22:00 PM GMT+02:00, Qing Zhao  
 wrote:
> 
> 
>> On Aug 11, 2021, at 10:53 AM, Richard Biener  wrote:
>> 
>> On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao 
>>  wrote:
>>> I modified the routine “gimple_add_init_for_auto_var” as the following:
>>> 
>>> /* Generate initialization to automatic variable DECL based on 
>>> INIT_TYPE.
>>> Build a call to internal const function DEFERRED_INIT:
>>> 1st argument: SIZE of the DECL;
>>> 2nd argument: INIT_TYPE;
>>> 3rd argument: IS_VLA, 0 NO, 1 YES;
>>> 
>>> as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA).  */
>>> static void
>>> gimple_add_init_for_auto_var (tree decl,
>>>  enum auto_init_type init_type,
>>>  bool is_vla,
>>>  gimple_seq *seq_p)
>>> {
>>> gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC 
>>> (decl));
>>> gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
>>> tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl));
>>> 
>>> tree init_type_node
>>> = build_int_cst (integer_type_node, (int) init_type);
>>> tree is_vla_node
>>> = build_int_cst (integer_type_node, (int) is_vla);
>>> 
>>> tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, 
>>> IFN_DEFERRED_INIT,

Re: Expensive selftests

2021-08-17 Thread Martin Sebor via Gcc-patches


On 8/17/21 12:40 AM, Thomas Schwinge wrote:

Hi!

On 2021-08-16T14:10:00-0600, Martin Sebor  wrote:

On 8/16/21 6:44 AM, Thomas Schwinge wrote:

[...], to document the current behavior, I propose to
"Add more self-tests for 'hash_map' with Value type with non-trivial
constructor/destructor", see attached.  OK to push to master branch?
(Also cherry-pick into release branches, eventually?)


(Attached again, for easy reference.)


Adding more tests sounds like an excellent idea.  I'm not sure about
the idea of adding loopy selftests that iterate as many times as in
the patch (looks like 1234 times two?)


Correct, and I agree it's a sensible concern, generally.

The current 1234 times two iterations is really arbitrary (should
document that in the test case), just so that we trigger a few hash table
expansions.

For 'selftest-c', we've got originally:

 -fself-test: 74775 pass(es) in 0.309299 seconds
 -fself-test: 74775 pass(es) in 0.366041 seconds
 -fself-test: 74775 pass(es) in 0.356663 seconds
 -fself-test: 74775 pass(es) in 0.355009 seconds
 -fself-test: 74775 pass(es) in 0.367575 seconds
 -fself-test: 74775 pass(es) in 0.320406 seconds

..., and with my changes we've got:

 -fself-test: 94519 pass(es) in 0.327755 seconds
 -fself-test: 94519 pass(es) in 0.369522 seconds
 -fself-test: 94519 pass(es) in 0.355531 seconds
 -fself-test: 94519 pass(es) in 0.362179 seconds
 -fself-test: 94519 pass(es) in 0.363176 seconds
 -fself-test: 94519 pass(es) in 0.318930 seconds

So it really seems to be all in the noise?

Yet:


Selftests run each time GCC
builds (i.e., even during day to day development).  It seems to me
that it might be better to run such selftests only as part of
the bootstrap process.


I'd rather have thought about a '--param self-test-expensive' (or
similar), and then invoke the selftests via a new
'gcc/testsuite/selftests/expensive.exp' (or similar).

Or, adapt 'gcc/testsuite/gcc.dg/plugin/expensive_selftests_plugin.c',
that is, invoke them via the GCC plugin mechanism, which also seems to be
easy enough?

I don't have a strong opinion about where/when these tests get run, so
will happily take any suggestions.


I think the right design is to move all these basic building blocks
(at a minimum, all containers, but ultimately even higher level
general-purpose APIs) into a standalone library with its own unit
tests run independently of GCC.

I'm fine with adding these tests if no one else is concerned about
the overhead, especially with a lower number of iterations like
Richard suggests (as long as it still exercises the expansion,
of course).

Thanks
Martin




Grüße
  Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: [PATCH] c++, v3: Implement P0466R5 __cpp_lib_is_layout_compatible compiler helpers [PR101539]

2021-08-17 Thread Jakub Jelinek via Gcc-patches

On Tue, Aug 17, 2021 at 07:10:28AM -0700, Jason Merrill wrote:
> Looks good, thanks.  I think you didn't see that I also asked for some added
> comments; OK with those added.

Oops, I've indeed missed them, sorry.

On Mon, Aug 16, 2021 at 03:57:21PM -0400, Jason Merrill wrote:
> Add a comment that discussion in core suggests that we might move toward
> treating multiple union fields of the same type as the same field, so this
> constraint might get dropped in the future.

Just same type fields, or even any fields with layout compatible types?
Anyway, either of that would require further changes in the code.


So that I don't repost the whole large patch, here is just incremental
diff with the added comments:

--- gcc/cp/semantics.c  2021-08-17 11:36:44.024227609 +0200
+++ gcc/cp/semantics.c  2021-08-17 16:41:57.070923754 +0200
@@ -10923,6 +10923,16 @@
   basetype2, membertype2, arg2);
   if (TREE_TYPE (ret) == boolean_type_node)
 return ret;
+  /* If both arg1 and arg2 are INTEGER_CSTs, is_corresponding_member_aggr
+ already returns boolean_{true,false}_node whether those particular
+ members are corresponding members or not.  Otherwise, if only
+ one of them is INTEGER_CST (canonicalized to first being INTEGER_CST
+ above), it returns boolean_false_node if it is certainly not a
+ corresponding member and otherwise we need to do a runtime check that
+ those two OFFSET_TYPE offsets are equal.
+ If neither of the operands is INTEGER_CST, is_corresponding_member_aggr
+ returns the largest offset at which the members would be corresponding
+ members, so perform arg1 <= ret && arg1 == arg2 runtime check.  */
   gcc_assert (TREE_CODE (arg2) != INTEGER_CST);
   if (TREE_CODE (arg1) == INTEGER_CST)
 return fold_build2 (EQ_EXPR, boolean_type_node, arg1,
--- gcc/cp/typeck.c 2021-08-17 11:18:53.271850970 +0200
+++ gcc/cp/typeck.c 2021-08-17 16:48:56.165115017 +0200
@@ -1727,6 +1727,15 @@
  field2 = DECL_CHAIN (field2);
}
}
+  /* Otherwise both types must be union types.
+The standard says:
+"Two standard-layout unions are layout-compatible if they have
+the same number of non-static data members and corresponding
+non-static data members (in any order) have layout-compatible
+types."
+but the code anticipates that bitfield vs. non-bitfield,
+different bitfield widths or presence/absence of
+[[no_unique_address]] should be checked as well.  */
   auto_vec vec;
   unsigned int count = 0;
   for (; field1; field1 = DECL_CHAIN (field1))
@@ -1735,6 +1744,9 @@
   for (; field2; field2 = DECL_CHAIN (field2))
if (TREE_CODE (field2) == FIELD_DECL)
  vec.safe_push (field2);
+  /* Discussions on core lean towards treating multiple union fields
+of the same type as the same field, so this might need changing
+in the future.  */
   if (count != vec.length ())
return false;
   for (field1 = TYPE_FIELDS (type1); field1; field1 = DECL_CHAIN (field1))

Jakub

Re: [PATCH] Fall back to masked_gather_load/masked_scatter_store

2021-08-17 Thread Richard Sandiford via Gcc-patches

Richard Biener via Gcc-patches  writes:
> This adds a fallback to the masked_ variants for gather_load
> and scatter_store if the latter are not available.
>
> Bootstrap / regtest running on x86_64-unknown-linux-gnu.

LGTM FWIW.  I don't know the history behind the TREE_CODE (*mask) != SSA_NAME
check.

I guess we could probably remove the unmasked SVE optabs with this.

Thanks,
Richard

>
> 2021-08-17  Richard Biener  
>
>   * optabs-query.c (supports_vec_gather_load_p): Also check
>   for masked optabs.
>   (supports_vec_scatter_store_p): Likewise.
>   * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Fall
>   back to masked variants if non-masked are not supported.
>   * tree-vect-patterns.c (vect_recog_gather_scatter_pattern):
>   When we need to use masked gather/scatter but do not have
>   a mask set up a constant true one.
>   * tree-vect-stmts.c (vect_check_scalar_mask): Also allow
>   non-SSA_NAME masks.
> ---
>  gcc/optabs-query.c|  6 --
>  gcc/tree-vect-data-refs.c | 22 +++---
>  gcc/tree-vect-patterns.c  |  7 +--
>  gcc/tree-vect-stmts.c |  8 
>  4 files changed, 28 insertions(+), 15 deletions(-)
>
> diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
> index 05ee5f517da..a6dd0fed610 100644
> --- a/gcc/optabs-query.c
> +++ b/gcc/optabs-query.c
> @@ -740,7 +740,8 @@ supports_vec_gather_load_p ()
>this_fn_optabs->supports_vec_gather_load_cached = true;
>  
>this_fn_optabs->supports_vec_gather_load
> -= supports_vec_convert_optab_p (gather_load_optab);
> += (supports_vec_convert_optab_p (gather_load_optab)
> +   || supports_vec_convert_optab_p (mask_gather_load_optab));
>  
>return this_fn_optabs->supports_vec_gather_load;
>  }
> @@ -757,7 +758,8 @@ supports_vec_scatter_store_p ()
>this_fn_optabs->supports_vec_scatter_store_cached = true;
>  
>this_fn_optabs->supports_vec_scatter_store
> -= supports_vec_convert_optab_p (scatter_store_optab);
> += (supports_vec_convert_optab_p (scatter_store_optab)
> +   || supports_vec_convert_optab_p (mask_scatter_store_optab));
>  
>return this_fn_optabs->supports_vec_scatter_store;
>  }
> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> index c4c14d74065..97745a830a2 100644
> --- a/gcc/tree-vect-data-refs.c
> +++ b/gcc/tree-vect-data-refs.c
> @@ -3735,11 +3735,17 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool 
> read_p, bool masked_p,
>  return false;
>  
>/* Work out which function we need.  */
> -  internal_fn ifn;
> +  internal_fn ifn, alt_ifn;
>if (read_p)
> -ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
> +{
> +  ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
> +  alt_ifn = IFN_MASK_GATHER_LOAD;
> +}
>else
> -ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
> +{
> +  ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
> +  alt_ifn = IFN_MASK_SCATTER_STORE;
> +}
>  
>for (;;)
>  {
> @@ -3755,6 +3761,16 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool 
> read_p, bool masked_p,
> *offset_vectype_out = offset_vectype;
> return true;
>   }
> +  else if (!masked_p
> +&& internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
> +   memory_type,
> +   offset_vectype,
> +   scale))
> + {
> +   *ifn_out = alt_ifn;
> +   *offset_vectype_out = offset_vectype;
> +   return true;
> + }
>  
>if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
> && TYPE_PRECISION (offset_type) >= element_bits)
> diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> index 25de97bd9b0..899734005ce 100644
> --- a/gcc/tree-vect-patterns.c
> +++ b/gcc/tree-vect-patterns.c
> @@ -4820,6 +4820,9 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>if (mask)
>  mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
> loop_vinfo);
> +  else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
> +|| gs_info.ifn == IFN_MASK_GATHER_LOAD)
> +mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
>  
>/* Get the invariant base and non-invariant offset, converting the
>   latter to the same width as the vector elements.  */
> @@ -4847,11 +4850,11 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>  {
>tree rhs = vect_get_store_rhs (stmt_info);
>if (mask != NULL)
> - pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
> + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
>  base, offset, scale, rhs,
>  mask);
>else
> - pattern_stmt = gimple_build_call_internal

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-17 Thread Qing Zhao via Gcc-patches



> On Aug 17, 2021, at 9:45 AM, Richard Biener  wrote:
> 
> On Tue, 17 Aug 2021, Qing Zhao wrote:
> 
>> 
>> 
>>> On Aug 17, 2021, at 3:43 AM, Richard Biener  wrote:
>>> 
>>> On Mon, 16 Aug 2021, Qing Zhao wrote:
>>> 
 
 
> On Aug 16, 2021, at 2:40 AM, Richard Biener  wrote:
> 
> On Thu, 12 Aug 2021, Qing Zhao wrote:
> 
>> Hi, Richard,
>> 
>> For RTL expansion of call to .DEFERRED_INIT, I changed my code per your 
>> suggestions like following:
>> 
>> ==
>> #define INIT_PATTERN_VALUE  0xFE
>> static void
>> expand_DEFERRED_INIT (internal_fn, gcall *stmt)
>> {
>> tree lhs = gimple_call_lhs (stmt);
>> tree var_size = gimple_call_arg (stmt, 0);
>> enum auto_init_type init_type
>>  = (enum auto_init_type) TREE_INT_CST_LOW (gimple_call_arg (stmt, 1));
>> bool is_vla = (bool) TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
>> 
>> tree var_type = TREE_TYPE (lhs);
>> gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
>> 
>> if (is_vla || (!can_native_interpret_type_p (var_type)))
>>  {
>>  /* If this is a VLA or the type of the variable cannot be natively
>> interpreted, expand to a memset to initialize it.  */
>>if (TREE_CODE (lhs) == SSA_NAME)
>>  lhs = SSA_NAME_VAR (lhs);
>>tree var_addr = NULL_TREE;
>>if (is_vla)
>>  var_addr = TREE_OPERAND (lhs, 0);
>>else
>>  {
>>   TREE_ADDRESSABLE (lhs) = 1;
>>   var_addr = build_fold_addr_expr (lhs);
>>  }
>>tree value = (init_type == AUTO_INIT_PATTERN) ?
>>  build_int_cst (unsigned_char_type_node,
>> INIT_PATTERN_VALUE) :
>>  build_zero_cst (unsigned_char_type_node);
>>tree m_call = build_call_expr (builtin_decl_implicit 
>> (BUILT_IN_MEMSET),
>>   3, var_addr, value, var_size);
>>/* Expand this memset call.  */
>>expand_builtin_memset (m_call, NULL_RTX, TYPE_MODE (var_type));
>>  }
>> else
>>  {
>>  /* If this is not a VLA and the type of the variable can be natively 
>> interpreted, expand to assignment to generate better code.  */
>>tree pattern = NULL_TREE;
>>unsigned HOST_WIDE_INT total_bytes
>>  = tree_to_uhwi (TYPE_SIZE_UNIT (var_type));
>> 
>>if (init_type == AUTO_INIT_PATTERN)
>>  {
>>unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
>>memset (buf, INIT_PATTERN_VALUE, total_bytes);
>>pattern = native_interpret_expr (var_type, buf, total_bytes);
>>gcc_assert (pattern);
>>  }
>> 
>>tree init = (init_type == AUTO_INIT_PATTERN) ?
>> pattern :
>> build_zero_cst (var_type);
>>expand_assignment (lhs, init, false);
>>  }
>> }
>> ===
>> 
>> Now, I used “can_native_interpret_type_p (var_type)” instead of 
>> “use_register_for_decl (lhs)” to decide 
>> whether to use “memset” or use “assign” to expand this function.
>> 
>> However, this exposed an bug that is very hard to be addressed:
>> 
>> ***For the testing case: test suite/gcc.dg/uninit-I.c:
>> 
>> /* { dg-do compile } */
>> /* { dg-options "-O2 -Wuninitialized" } */
>> 
>> int sys_msgctl (void)
>> {
>> struct { int mode; } setbuf;
>> return setbuf.mode;  /* { dg-warning "'setbuf\.mode' is used" } */
>> ==
>> 
>> **the above auto var “setbuf” has “struct” type, which 
>> “can_native_interpret_type_p(var_type)” is false, therefore, 
>> Expanding this .DEFERRED_INIT call went down the “memset” expansion 
>> route. 
>> 
>> However, this structure type can be fitted into a register, therefore 
>> cannot be taken address anymore at this stage, even though I tried:
>> 
>>   TREE_ADDRESSABLE (lhs) = 1;
>>   var_addr = build_fold_addr_expr (lhs);
>> 
>> To create an address variable for it, the expansion still failed at 
>> expr.c: line 8412:
>> during RTL pass: expand
>> /home/opc/Work/GCC/latest-gcc/gcc/testsuite/gcc.dg/auto-init-uninit-I.c:6:24:
>>  internal compiler error: in expand_expr_addr_expr_1, at expr.c:8412
>> 0xd04104 expand_expr_addr_expr_1
>>  ../../latest-gcc/gcc/expr.c:8412
>> 0xd04a95 expand_expr_addr_expr
>>  ../../latest-gcc/gcc/expr.c:8525
>> 0xd13592 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, 
>> expand_modifier, rtx_def**, bool)
>>  ../../latest-gcc/gcc/expr.c:11741
>> 0xd05142 expand_expr_real(tree_node*, rtx_def*, machine_mode, 
>> expand_modifier, rtx_def**, bool)
>>  ../../latest-gcc/gcc/expr.c:8713
>> 0xaed1d3 expand_expr
>>  ../../latest-gcc/gcc/expr.h:301
>> 0xaf0d89 get_memory_rtx
>>  ../../lat

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-17 Thread Qing Zhao via Gcc-patches



> On Aug 17, 2021, at 3:29 AM, Richard Biener  wrote:
> 
> On Mon, 16 Aug 2021, Qing Zhao wrote:
> 
>> My current code for expand_DEFERRED_INIT is like the following, could you 
>> check and see whether there is any issue for it:
>> 
>> #define INIT_PATTERN_VALUE  0xFE
>> static void
>> expand_DEFERRED_INIT (internal_fn, gcall *stmt)
>> {
>>  tree lhs = gimple_call_lhs (stmt);
>>  tree var_size = gimple_call_arg (stmt, 0);
>>  enum auto_init_type init_type
>>= (enum auto_init_type) TREE_INT_CST_LOW (gimple_call_arg (stmt, 1));
>>  bool is_vla = (bool) TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
>> 
>>  tree var_type = TREE_TYPE (lhs);
>>  gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
>> 
>>  if (is_vla || (!use_register_for_decl (lhs)))
>>{
>>  if (TREE_CODE (lhs) == SSA_NAME)
>>lhs = SSA_NAME_VAR (lhs);
> 
> this should not be necessary (in fact you shouldn't see a SSA_NAME
> here, if you do then using SSA_NAME_VAR is wrong)
You mean during RTL expansion phase, all SSA_NAMEs are gone already?
> 
>>/* If this is a VLA or the variable is not in register,
>>   expand to a memset to initialize it.  */
>>  tree var_addr = NULL_TREE;
>>  if (is_vla)
>>var_addr = TREE_OPERAND (lhs, 0);
>>  else
>>{
>>  TREE_ADDRESSABLE (lhs) = 1;
>>  var_addr = build_fold_addr_expr (lhs);
>>}
> 
> use, independent of is_vla
> 
> mark_addressable (lhs);
> var_addr = build_fold_addr_expr (lhs);
Okay.
> 
>> 
>>  tree value = (init_type == AUTO_INIT_PATTERN) ?
>>build_int_cst (unsigned_char_type_node,
>>   INIT_PATTERN_VALUE) :
>>build_zero_cst (unsigned_char_type_node);
> 
> since memset has an integer argument for the value use
> integer_zero_node for the zero case and build_int_cst (integer_type_node, 
> ...) for the pattern case

Okay.

> 
>>  tree m_call = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMSET),
>> 3, var_addr, value, var_size);
>>  /* Expand this memset call.  */
>>  expand_builtin_memset (m_call, NULL_RTX, TYPE_MODE (var_type));
>>}
>>  else
>>{
>>/* If this variable is in a register, use expand_assignment might
>>   generate better code.  */
>>  tree pattern = NULL_TREE;
>>  unsigned HOST_WIDE_INT total_bytes
>>= tree_to_uhwi (TYPE_SIZE_UNIT (var_type));
>> 
>>  if (init_type == AUTO_INIT_PATTERN)
>>{
>>  if (can_native_interpret_type_p (var_type))
>>{
>>  unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
>>  memset (buf, INIT_PATTERN_VALUE, total_bytes);
>>  pattern = native_interpret_expr (var_type, buf, total_bytes);
>>  gcc_assert (pattern);
>>}
>>  else
>>{
>>  tree index_type = build_index_type (size_int (total_bytes - 1));
>>  tree array_type = build_array_type (unsigned_char_type_node,
>>  index_type);
>>  tree element = build_int_cst (unsigned_char_type_node,
>>INIT_PATTERN_VALUE);
>>  vec *elts = NULL;
>>  for (unsigned int i = 0; i < total_bytes; i++)
>>CONSTRUCTOR_APPEND_ELT (elts, NULL_TREE, element);
>>  pattern = build_constructor (array_type, elts);
>>  pattern = build1 (VIEW_CONVERT_EXPR, var_type, pattern);
>>}
>>}
>> 
>>  tree init = (init_type == AUTO_INIT_PATTERN) ?
>>   pattern :
>>   build_zero_cst (var_type);
> 
> maybe conditionally initialize init instead of pattern and init?
> Thus replace pattern by init and do
> 
>else
>  init = build_zero_cst (var_type);

You mean the following:


tree init = pattern;
If (init_type != AUTO_INIT_PATTERN)
  Init = build_zero_cst (var_type);

Or something else?

> 
> 
> the above should work, as said the RTL expansion part can possibly
> be improved but we can do this as followup as well.

Okay.

Qing
> 
>>  expand_assignment (lhs, init, false);
>>}
>> }
>> 
>> Thanks.
>> 
>> Qing
>> 
>> 
>> 
>> 
> 
> -- 
> Richard Biener 
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
> Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-17 Thread Richard Biener via Gcc-patches

On Tue, 17 Aug 2021, Qing Zhao wrote:

> 
> 
> > On Aug 17, 2021, at 3:43 AM, Richard Biener  wrote:
> > 
> > On Mon, 16 Aug 2021, Qing Zhao wrote:
> > 
> >> 
> >> 
> >>> On Aug 16, 2021, at 2:40 AM, Richard Biener  wrote:
> >>> 
> >>> On Thu, 12 Aug 2021, Qing Zhao wrote:
> >>> 
>  Hi, Richard,
>  
>  For RTL expansion of call to .DEFERRED_INIT, I changed my code per your 
>  suggestions like following:
>  
>  ==
>  #define INIT_PATTERN_VALUE  0xFE
>  static void
>  expand_DEFERRED_INIT (internal_fn, gcall *stmt)
>  {
>  tree lhs = gimple_call_lhs (stmt);
>  tree var_size = gimple_call_arg (stmt, 0);
>  enum auto_init_type init_type
>    = (enum auto_init_type) TREE_INT_CST_LOW (gimple_call_arg (stmt, 1));
>  bool is_vla = (bool) TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
>  
>  tree var_type = TREE_TYPE (lhs);
>  gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
>  
>  if (is_vla || (!can_native_interpret_type_p (var_type)))
>    {
>    /* If this is a VLA or the type of the variable cannot be natively
>   interpreted, expand to a memset to initialize it.  */
>  if (TREE_CODE (lhs) == SSA_NAME)
>    lhs = SSA_NAME_VAR (lhs);
>  tree var_addr = NULL_TREE;
>  if (is_vla)
>    var_addr = TREE_OPERAND (lhs, 0);
>  else
>    {
> TREE_ADDRESSABLE (lhs) = 1;
> var_addr = build_fold_addr_expr (lhs);
>    }
>  tree value = (init_type == AUTO_INIT_PATTERN) ?
>    build_int_cst (unsigned_char_type_node,
>   INIT_PATTERN_VALUE) :
>    build_zero_cst (unsigned_char_type_node);
>  tree m_call = build_call_expr (builtin_decl_implicit 
>  (BUILT_IN_MEMSET),
> 3, var_addr, value, var_size);
>  /* Expand this memset call.  */
>  expand_builtin_memset (m_call, NULL_RTX, TYPE_MODE (var_type));
>    }
>  else
>    {
>    /* If this is not a VLA and the type of the variable can be natively 
>   interpreted, expand to assignment to generate better code.  */
>  tree pattern = NULL_TREE;
>  unsigned HOST_WIDE_INT total_bytes
>    = tree_to_uhwi (TYPE_SIZE_UNIT (var_type));
>  
>  if (init_type == AUTO_INIT_PATTERN)
>    {
>  unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
>  memset (buf, INIT_PATTERN_VALUE, total_bytes);
>  pattern = native_interpret_expr (var_type, buf, total_bytes);
>  gcc_assert (pattern);
>    }
>  
>  tree init = (init_type == AUTO_INIT_PATTERN) ?
>   pattern :
>   build_zero_cst (var_type);
>  expand_assignment (lhs, init, false);
>    }
>  }
>  ===
>  
>  Now, I used “can_native_interpret_type_p (var_type)” instead of 
>  “use_register_for_decl (lhs)” to decide 
>  whether to use “memset” or use “assign” to expand this function.
>  
>  However, this exposed an bug that is very hard to be addressed:
>  
>  ***For the testing case: test suite/gcc.dg/uninit-I.c:
>  
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -Wuninitialized" } */
>  
>  int sys_msgctl (void)
>  {
>  struct { int mode; } setbuf;
>  return setbuf.mode;  /* { dg-warning "'setbuf\.mode' is used" } */
>  ==
>  
>  **the above auto var “setbuf” has “struct” type, which 
>  “can_native_interpret_type_p(var_type)” is false, therefore, 
>  Expanding this .DEFERRED_INIT call went down the “memset” expansion 
>  route. 
>  
>  However, this structure type can be fitted into a register, therefore 
>  cannot be taken address anymore at this stage, even though I tried:
>  
> TREE_ADDRESSABLE (lhs) = 1;
> var_addr = build_fold_addr_expr (lhs);
>  
>  To create an address variable for it, the expansion still failed at 
>  expr.c: line 8412:
>  during RTL pass: expand
>  /home/opc/Work/GCC/latest-gcc/gcc/testsuite/gcc.dg/auto-init-uninit-I.c:6:24:
>   internal compiler error: in expand_expr_addr_expr_1, at expr.c:8412
>  0xd04104 expand_expr_addr_expr_1
>   ../../latest-gcc/gcc/expr.c:8412
>  0xd04a95 expand_expr_addr_expr
>   ../../latest-gcc/gcc/expr.c:8525
>  0xd13592 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, 
>  expand_modifier, rtx_def**, bool)
>   ../../latest-gcc/gcc/expr.c:11741
>  0xd05142 expand_expr_real(tree_node*, rtx_def*, machine_mode, 
>  expand_modifier, rtx_def**, bool)
>   ../../latest-gcc/gcc/expr.c:8713
>  0xaed1d3 expand_expr
>   ../../latest-gcc/gcc/expr.h:301
>  0xaf0d89 get_memory_rtx
>   ../../latest-gcc/gcc/builtins.c

Re: [PATCH] move x86 to use gather/scatter internal functions

2021-08-17 Thread Richard Biener via Gcc-patches

On Tue, Aug 17, 2021 at 3:29 PM Richard Biener via Gcc-patches
 wrote:
>
> This is an attempt to start moving the x86 backend to use
> standard pattern names for [mask_]gather_load and [mask_]scatter_store
> rather than using the builtin_{gather,scatter} target hooks.
>
> I've started with AVX2 gathers and given x86 only supports masked
> gather I only implemented mask_gather_load.  Note while for
> the builtin_gather case the vectorizer will provide an all-true
> mask operand for non-masked gathers this capability does not
> exist for the IFN path yet, so only testcases with actual masked
> gathers will work.
>
> If this looks reasonable on the backend side I'll see to first
> complete the vectorizer part, ripping out the target hook and
> arranging for the missing pieces.  Another one is the support
> for SImode indices with DFmode data which requires unpacking
> the index vector and actually recognizing the IFN.
>
> 2021-08-17  Richard Biener  
>
> * tree-vect-data-refs.c (vect_check_gather_scatter):
> Always use internal functions.
> * config/i386/sse.md
> (mask_gather_load): New expander.
> (mask_gather_load): Likewise.
> ---
>  gcc/config/i386/sse.md| 56 +++
>  gcc/tree-vect-data-refs.c |  4 +--
>  2 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 3957c86c3df..40bec98d9f7 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -23232,12 +23232,22 @@
>(V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
>(V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
>(V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
> +(define_mode_attr vec_gather_idxsi
> + [(V2DI "v4si") (V4DI "v4si") (V8DI "v8si")
> +  (V2DF "v4si") (V4DF "v4si") (V8DF "v8si")
> +  (V4SI "v4si") (V8SI "v8si") (V16SI "v16si")
> +  (V4SF "v4si") (V8SF "v8si") (V16SF "v16si")])
>
>  (define_mode_attr VEC_GATHER_IDXDI
>   [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
>(V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
>(V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
>(V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
> +(define_mode_attr vec_gather_idxdi
> + [(V2DI "v2di") (V4DI "v4di") (V8DI "v8di")
> +  (V2DF "v2di") (V4DF "v4di") (V8DF "v8di")
> +  (V4SI "v2di") (V8SI "v4di") (V16SI "v8di")
> +  (V4SF "v2di") (V8SF "v4di") (V16SF "v8di")])
>
>  (define_mode_attr VEC_GATHER_SRCDI
>   [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
> @@ -23245,6 +23255,29 @@
>(V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
>(V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
>
> +(define_expand "mask_gather_load"
> +  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
> +  (unspec:VEC_GATHER_MODE
> +[(pc)
> + (mem:
> +   (match_par_dup 6
> + [(match_operand 1 "vsib_address_operand")
> +  (match_operand:
> + 2 "register_operand")
> +  (match_operand:SI 4 "const1248_operand")
> +  (match_operand:SI 3 "const0_operand")]))
> + (mem:BLK (scratch))
> + (match_operand: 5 "register_operand")]

One problem of these is that when AVX512[VL] is enabled we get a AVX512 mask
mode here and while the internal function expansion check succeeds (it
never checks
the mask operand!), RTL expansion fails unexpectedly because of this mismatch.

I suppose a more complicated define_mode_attr might do the trick or do I
need to add && !TARGET_AVX512F to these expanders?

I've meanwhile posted a patch to make the vectorizer fall back to
masked_ when non-masked_ variants are not available and that seems to work
fine at least.

Richard.

> +UNSPEC_GATHER))
> + (clobber (match_scratch:VEC_GATHER_MODE 7))])]
> +  "TARGET_AVX2 && TARGET_USE_GATHER"
> +{
> +  operands[5] = gen_lowpart_SUBREG (mode, operands[5]);
> +  operands[6]
> += gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
> +   operands[5]), UNSPEC_VSIBADDR);
> +})
> +
>  (define_expand "avx2_gathersi"
>[(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
>(unspec:VEC_GATHER_MODE
> @@ -23306,6 +23339,29 @@
> (set_attr "prefix" "vex")
> (set_attr "mode" "")])
>
> +(define_expand "mask_gather_load"
> +  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
> +  (unspec:VEC_GATHER_MODE
> +[(pc)
> +

[PATCH] Fall back to masked_gather_load/masked_scatter_store

2021-08-17 Thread Richard Biener via Gcc-patches

This adds a fallback to the masked_ variants for gather_load
and scatter_store if the latter are not available.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

2021-08-17  Richard Biener  

* optabs-query.c (supports_vec_gather_load_p): Also check
for masked optabs.
(supports_vec_scatter_store_p): Likewise.
* tree-vect-data-refs.c (vect_gather_scatter_fn_p): Fall
back to masked variants if non-masked are not supported.
* tree-vect-patterns.c (vect_recog_gather_scatter_pattern):
When we need to use masked gather/scatter but do not have
a mask set up a constant true one.
* tree-vect-stmts.c (vect_check_scalar_mask): Also allow
non-SSA_NAME masks.
---
 gcc/optabs-query.c|  6 --
 gcc/tree-vect-data-refs.c | 22 +++---
 gcc/tree-vect-patterns.c  |  7 +--
 gcc/tree-vect-stmts.c |  8 
 4 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
index 05ee5f517da..a6dd0fed610 100644
--- a/gcc/optabs-query.c
+++ b/gcc/optabs-query.c
@@ -740,7 +740,8 @@ supports_vec_gather_load_p ()
   this_fn_optabs->supports_vec_gather_load_cached = true;
 
   this_fn_optabs->supports_vec_gather_load
-= supports_vec_convert_optab_p (gather_load_optab);
+= (supports_vec_convert_optab_p (gather_load_optab)
+   || supports_vec_convert_optab_p (mask_gather_load_optab));
 
   return this_fn_optabs->supports_vec_gather_load;
 }
@@ -757,7 +758,8 @@ supports_vec_scatter_store_p ()
   this_fn_optabs->supports_vec_scatter_store_cached = true;
 
   this_fn_optabs->supports_vec_scatter_store
-= supports_vec_convert_optab_p (scatter_store_optab);
+= (supports_vec_convert_optab_p (scatter_store_optab)
+   || supports_vec_convert_optab_p (mask_scatter_store_optab));
 
   return this_fn_optabs->supports_vec_scatter_store;
 }
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index c4c14d74065..97745a830a2 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -3735,11 +3735,17 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
bool masked_p,
 return false;
 
   /* Work out which function we need.  */
-  internal_fn ifn;
+  internal_fn ifn, alt_ifn;
   if (read_p)
-ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
+{
+  ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
+  alt_ifn = IFN_MASK_GATHER_LOAD;
+}
   else
-ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
+{
+  ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
+  alt_ifn = IFN_MASK_SCATTER_STORE;
+}
 
   for (;;)
 {
@@ -3755,6 +3761,16 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
bool masked_p,
  *offset_vectype_out = offset_vectype;
  return true;
}
+  else if (!masked_p
+  && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
+ memory_type,
+ offset_vectype,
+ scale))
+   {
+ *ifn_out = alt_ifn;
+ *offset_vectype_out = offset_vectype;
+ return true;
+   }
 
   if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
  && TYPE_PRECISION (offset_type) >= element_bits)
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 25de97bd9b0..899734005ce 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -4820,6 +4820,9 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
   if (mask)
 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
  loop_vinfo);
+  else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
+  || gs_info.ifn == IFN_MASK_GATHER_LOAD)
+mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
 
   /* Get the invariant base and non-invariant offset, converting the
  latter to the same width as the vector elements.  */
@@ -4847,11 +4850,11 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
 {
   tree rhs = vect_get_store_rhs (stmt_info);
   if (mask != NULL)
-   pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
+   pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
   base, offset, scale, rhs,
   mask);
   else
-   pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
+   pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
   base, offset, scale, rhs);
 }
   gimple_call_set_nothrow (pattern_stmt, true);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index e356056be93..59100965d5e 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -

Re: [PATCH 8/8] aarch64: Add -mtune=neoverse-512tvb

2021-08-17 Thread Richard Sandiford via Gcc-patches

Richard Sandiford  writes:
> This patch adds an option to tune for Neoverse cores that have
> a total vector bandwidth of 512 bits (4x128 for Advanced SIMD
> and a vector-length-dependent equivalent for SVE).  This is intended
> to be a compromise between tuning aggressively for a single core like
> Neoverse V1 (which can be too narrow) and tuning for AArch64 cores
> in general (which can be too wide).
>
> -mcpu=neoverse-512tvb is equivalent to -mcpu=neoverse-v1
> -mtune=neoverse-512tvb.
>
> gcc/
>   * doc/invoke.texi: Document -mtune=neoverse-512tvb and
>   -mcpu=neoverse-512tvb.
>   * config/aarch64/aarch64-cores.def (neoverse-512tvb): New entry.
>   * config/aarch64/aarch64-tune.md: Regenerate.
>   * config/aarch64/aarch64.c (neoverse512tvb_sve_vector_cost)
>   (neoverse512tvb_sve_issue_info, neoverse512tvb_vec_issue_info)
>   (neoverse512tvb_vector_cost, neoverse512tvb_tunings): New structures.
>   (aarch64_adjust_body_cost_sve): Handle -mtune=neoverse-512tvb.
>   (aarch64_adjust_body_cost): Likewise.

I've backported this cut-down version to GCC 10 and 9, so that the
option is at least recognised there too.

gcc/
* doc/invoke.texi: Document -mtune=neoverse-512tvb and
-mcpu=neoverse-512tvb.
* config/aarch64/aarch64-cores.def (neoverse-512tvb): New entry.
* config/aarch64/aarch64-tune.md: Regenerate.

---
 gcc/config/aarch64/aarch64-cores.def |  1 +
 gcc/config/aarch64/aarch64-tune.md   |  2 +-
 gcc/doc/invoke.texi  | 24 ++--
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 9c290292479..fc60e2ae1ac 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -135,6 +135,7 @@ AARCH64_CORE("thunderx3t110",  thunderx3t110,  
thunderx3t110, 8_3A,  AARCH64_FL_
 /* Arm ('A') cores.  */
 AARCH64_CORE("zeus", zeus, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | 
AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | 
AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, 
neoversev1, 0x41, 0xd40, -1)
 AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A,  
AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | 
AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | 
AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A,  
AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | 
AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | 
AARCH64_FL_RNG, neoversev1, INVALID_IMP, INVALID_CORE, -1)
 
 /* Qualcomm ('Q') cores. */
 AARCH64_CORE("saphira", saphira,saphira,8_4A,  
AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 
0xC01, -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 7fda2294b8a..aa68d67bdf4 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index eabeec944e7..72d995cd0cc 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -16994,8 +16994,9 @@ performance of the code.  Permissible values for this 
option are:
 @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
 @samp{cortex-a76}, @samp{cortex-a76ae}, @samp{cortex-a77},
 @samp{cortex-a65}, @samp{cortex-a65ae}, @samp{cortex-a34},
-@samp{are

[committed] libstdc++: Optimize std::function move constructor [PR101923]

2021-08-17 Thread Jonathan Wakely via Gcc-patches

PR 101923 points out that the unconditional swap in the std::function
move constructor makes it slower than copying an empty std::function.
The copy constructor has to check for the empty case before doing
anything, and that makes it very fast for the empty case.

Adding the same check to the move constructor avoids copying the
_Any_data POD when we don't need to. We can also inline the effects of
swap, by copying each member and then zeroing the pointer members.

This makes moving an empty object at least as fast as copying an empty
object.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

PR libstdc++/101923
* include/bits/std_function.h (function(function&&)): Check for
non-empty parameter before doing any work.

Tested powerpc64le-linux. Committed to trunk.

commit 0808b0df9c4d31f4c362b9c85fb538b6aafcb517
Author: Jonathan Wakely 
Date:   Tue Aug 17 11:30:56 2021

libstdc++: Optimize std::function move constructor [PR101923]

PR 101923 points out that the unconditional swap in the std::function
move constructor makes it slower than copying an empty std::function.
The copy constructor has to check for the empty case before doing
anything, and that makes it very fast for the empty case.

Adding the same check to the move constructor avoids copying the
_Any_data POD when we don't need to. We can also inline the effects of
swap, by copying each member and then zeroing the pointer members.

This makes moving an empty object at least as fast as copying an empty
object.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

PR libstdc++/101923
* include/bits/std_function.h (function(function&&)): Check for
non-empty parameter before doing any work.

diff --git a/libstdc++-v3/include/bits/std_function.h 
b/libstdc++-v3/include/bits/std_function.h
index c08484465c9..fb86ff1c5f8 100644
--- a/libstdc++-v3/include/bits/std_function.h
+++ b/libstdc++-v3/include/bits/std_function.h
@@ -389,8 +389,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  (if it has one).
*/
   function(function&& __x) noexcept
-  : _Function_base()
-  { __x.swap(*this); }
+  : _Function_base(), _M_invoker(__x._M_invoker)
+  {
+   if (static_cast(__x))
+ {
+   _M_functor = __x._M_functor;
+   _M_manager = __x._M_manager;
+   __x._M_manager = nullptr;
+   __x._M_invoker = nullptr;
+ }
+  }
 
   /**
*  @brief Builds a %function that targets a copy of the incoming

Re: [PATCH] c++, v3: Implement P0466R5 __cpp_lib_is_layout_compatible compiler helpers [PR101539]

2021-08-17 Thread Jason Merrill via Gcc-patches


On 8/17/21 6:44 AM, Jakub Jelinek wrote:

On Mon, Aug 16, 2021 at 03:57:21PM -0400, Jason Merrill wrote:

+static tree
+is_corresponding_member_aggr (location_t loc, tree basetype1, tree membertype1,
+ tree arg1, tree basetype2, tree membertype2,
+ tree arg2)
+{
+  tree field1 = TYPE_FIELDS (basetype1);
+  tree field2 = TYPE_FIELDS (basetype2);
+  tree ret = boolean_false_node;
+  while (1)
+{


Can we share more of the code between this function and
layout_compatible_type_p?  I'm thinking of a function something like

bool next_common_initial_seqence (tree &mem1, tree &mem2)

that would update mem1/mem2 to the next entities of the common initial
sequence (or null at the end) and return true, or return false if the next
fields are not compatible.


Ok, here it is, tested with make check-c++-all RUNTESTFLAGS=dg.exp=cpp2a/*
so far (full bootstrap/regtests are scheduled).

I had to repeat in the anonymous struct is_corresponding_member_aggr case
the [[no_unique_address]] and bit_position checks, because we want to
recurse into the anonymous structs even when they aren't layout compatible
but just have some non-empty common initial sequence.
And the old code for non-empty DECL_FIELD_IS_BASE recursed, while the
new function just continues iterating on TYPE_FIELDS of the base.
That changes behavior of (added to the test):
struct A1 { int a; };
struct B1 { signed int b; };
struct alignas (16) C1 : public A1 {};
struct alignas (16) D1 : public B1 {};
static_assert (std::is_layout_compatible_v);
which failed the assertion previously (because it temporarily tested if
A1 is layout compatible with D1, which it is not), but that actually is
a good thing, I think C1 and D1 are clearly layout compatible.
Or (not in the testsuite):
struct A1 { int a; };
struct B1 { signed int b; };
struct alignas (8) E1 : public A1 {};
struct F1 : public B1 {};
struct alignas (16) G1 : public E1 {};
struct alignas (16) H1 : public F1 {};
static_assert (std::is_layout_compatible_v);
static_assert (!std::is_layout_compatible_v);
static_assert (std::is_layout_compatible_v);
(previously the last assertion would fail).


Looks good, thanks.  I think you didn't see that I also asked for some 
added comments; OK with those added.



2021-08-17  Jakub Jelinek  

PR c++/101539
gcc/c-family/
* c-common.h (enum rid): Add RID_IS_LAYOUT_COMPATIBLE.
* c-common.c (c_common_reswords): Add __is_layout_compatible.
gcc/cp/
* cp-tree.h (enum cp_trait_kind): Add CPTK_IS_LAYOUT_COMPATIBLE.
(enum cp_built_in_function): Add CP_BUILT_IN_IS_CORRESPONDING_MEMBER.
(fold_builtin_is_corresponding_member, next_common_initial_seqence,
layout_compatible_type_p): Declare.
* parser.c (cp_parser_primary_expression): Handle
RID_IS_LAYOUT_COMPATIBLE.
(cp_parser_trait_expr): Likewise.
* cp-objcp-common.c (names_builtin_p): Likewise.
* constraint.cc (diagnose_trait_expr): Handle
CPTK_IS_LAYOUT_COMPATIBLE.
* decl.c (cxx_init_decl_processing): Register
__builtin_is_corresponding_member builtin.
* constexpr.c (cxx_eval_builtin_function_call): Handle
CP_BUILT_IN_IS_CORRESPONDING_MEMBER builtin.
* semantics.c (is_corresponding_member_union,
is_corresponding_member_aggr, fold_builtin_is_corresponding_member):
New functions.
(trait_expr_value): Handle CPTK_IS_LAYOUT_COMPATIBLE.
(finish_trait_expr): Likewise.
* typeck.c (next_common_initial_seqence, layout_compatible_type_p):
New functions.
* cp-gimplify.c (cp_gimplify_expr): Fold
CP_BUILT_IN_IS_CORRESPONDING_MEMBER.
(cp_fold): Likewise.
* tree.c (builtin_valid_in_constant_expr_p): Handle
CP_BUILT_IN_IS_CORRESPONDING_MEMBER.
* cxx-pretty-print.c (pp_cxx_trait_expression): Handle
CPTK_IS_LAYOUT_COMPATIBLE.
* class.c (remove_zero_width_bit_fields): Remove.
(layout_class_type): Don't call it.
gcc/testsuite/
* g++.dg/cpp2a/is-corresponding-member1.C: New test.
* g++.dg/cpp2a/is-corresponding-member2.C: New test.
* g++.dg/cpp2a/is-corresponding-member3.C: New test.
* g++.dg/cpp2a/is-corresponding-member4.C: New test.
* g++.dg/cpp2a/is-corresponding-member5.C: New test.
* g++.dg/cpp2a/is-corresponding-member6.C: New test.
* g++.dg/cpp2a/is-corresponding-member7.C: New test.
* g++.dg/cpp2a/is-corresponding-member8.C: New test.
* g++.dg/cpp2a/is-layout-compatible1.C: New test.
* g++.dg/cpp2a/is-layout-compatible2.C: New test.
* g++.dg/cpp2a/is-layout-compatible3.C: New test.

--- gcc/c-family/c-common.h.jj  2021-08-12 22:40:49.040646930 +0200
+++ gcc/c-family/c-common.h 2021-08-17 10:51:16.976465135 +0200
@@ -173,7 +173,8 @@ enum rid
RID_IS_ABSTRACT, RID_IS_AGGREGATE,
RID_IS_BASE_OF,  RID_IS_CLASS,

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-17 Thread Qing Zhao via Gcc-patches

> On Aug 17, 2021, at 3:43 AM, Richard Biener  wrote:
> 
> On Mon, 16 Aug 2021, Qing Zhao wrote:
> 
>> 
>> 
>>> On Aug 16, 2021, at 2:40 AM, Richard Biener  wrote:
>>> 
>>> On Thu, 12 Aug 2021, Qing Zhao wrote:
>>> 
 Hi, Richard,

 For RTL expansion of call to .DEFERRED_INIT, I changed my code per your 
 suggestions like following:

 ==
 #define INIT_PATTERN_VALUE  0xFE
 static void
 expand_DEFERRED_INIT (internal_fn, gcall *stmt)
 {
 tree lhs = gimple_call_lhs (stmt);
 tree var_size = gimple_call_arg (stmt, 0);
 enum auto_init_type init_type
   = (enum auto_init_type) TREE_INT_CST_LOW (gimple_call_arg (stmt, 1));
 bool is_vla = (bool) TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));

 tree var_type = TREE_TYPE (lhs);
 gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);

 if (is_vla || (!can_native_interpret_type_p (var_type)))
   {
   /* If this is a VLA or the type of the variable cannot be natively
  interpreted, expand to a memset to initialize it.  */
 if (TREE_CODE (lhs) == SSA_NAME)
   lhs = SSA_NAME_VAR (lhs);
 tree var_addr = NULL_TREE;
 if (is_vla)
   var_addr = TREE_OPERAND (lhs, 0);
 else
   {
TREE_ADDRESSABLE (lhs) = 1;
var_addr = build_fold_addr_expr (lhs);
   }
 tree value = (init_type == AUTO_INIT_PATTERN) ?
   build_int_cst (unsigned_char_type_node,
  INIT_PATTERN_VALUE) :
   build_zero_cst (unsigned_char_type_node);
 tree m_call = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMSET),
3, var_addr, value, var_size);
 /* Expand this memset call.  */
 expand_builtin_memset (m_call, NULL_RTX, TYPE_MODE (var_type));
   }
 else
   {
   /* If this is not a VLA and the type of the variable can be natively 
  interpreted, expand to assignment to generate better code.  */
 tree pattern = NULL_TREE;
 unsigned HOST_WIDE_INT total_bytes
   = tree_to_uhwi (TYPE_SIZE_UNIT (var_type));

 if (init_type == AUTO_INIT_PATTERN)
   {
 unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
 memset (buf, INIT_PATTERN_VALUE, total_bytes);
 pattern = native_interpret_expr (var_type, buf, total_bytes);
 gcc_assert (pattern);
   }

 tree init = (init_type == AUTO_INIT_PATTERN) ?
  pattern :
  build_zero_cst (var_type);
 expand_assignment (lhs, init, false);
   }
 }
 ===

 Now, I used “can_native_interpret_type_p (var_type)” instead of 
 “use_register_for_decl (lhs)” to decide 
 whether to use “memset” or use “assign” to expand this function.

 However, this exposed an bug that is very hard to be addressed:

 ***For the testing case: test suite/gcc.dg/uninit-I.c:

 /* { dg-do compile } */
 /* { dg-options "-O2 -Wuninitialized" } */

 int sys_msgctl (void)
 {
 struct { int mode; } setbuf;
 return setbuf.mode;  /* { dg-warning "'setbuf\.mode' is used" } */
 ==

 **the above auto var “setbuf” has “struct” type, which 
 “can_native_interpret_type_p(var_type)” is false, therefore, 
 Expanding this .DEFERRED_INIT call went down the “memset” expansion route. 

 However, this structure type can be fitted into a register, therefore 
 cannot be taken address anymore at this stage, even though I tried:

TREE_ADDRESSABLE (lhs) = 1;
var_addr = build_fold_addr_expr (lhs);

 To create an address variable for it, the expansion still failed at 
 expr.c: line 8412:
 during RTL pass: expand
 /home/opc/Work/GCC/latest-gcc/gcc/testsuite/gcc.dg/auto-init-uninit-I.c:6:24:
  internal compiler error: in expand_expr_addr_expr_1, at expr.c:8412
 0xd04104 expand_expr_addr_expr_1
../../latest-gcc/gcc/expr.c:8412
 0xd04a95 expand_expr_addr_expr
../../latest-gcc/gcc/expr.c:8525
 0xd13592 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, 
 expand_modifier, rtx_def**, bool)
../../latest-gcc/gcc/expr.c:11741
 0xd05142 expand_expr_real(tree_node*, rtx_def*, machine_mode, 
 expand_modifier, rtx_def**, bool)
../../latest-gcc/gcc/expr.c:8713
 0xaed1d3 expand_expr
../../latest-gcc/gcc/expr.h:301
 0xaf0d89 get_memory_rtx
../../latest-gcc/gcc/builtins.c:1370
 0xafb4fb expand_builtin_memset_args
../../latest-gcc/gcc/builtins.c:4102
 0xafacde expand_builtin_memset(tree_node*, rtx_def*, machine_mode)
../../latest-gcc/gcc/builtins.c:3886
 0xe97fb3 expand_DEFERRED_INIT

 **That’s the major reason why I

Re: [PATCH] Optimize seed_seq construction

2021-08-17 Thread Antony Polukhin via Gcc-patches

вт, 17 авг. 2021 г. в 16:37, Jonathan Wakely :
<...>
> Thanks, this is a nice improvement. We can avoid tag dispatching to
> make it simpler though:
>
> @@ -3248,6 +3249,9 @@ namespace __detail
>   template
> seed_seq::seed_seq(_InputIterator __begin, _InputIterator __end)
> {
> +  if _GLIBCXX17_CONSTEXPR
> (__is_random_access_iter<_InputIterator>::value)
> +   _M_v.reserve(std::distance(__begin, __end));
> +
>   for (_InputIterator __iter = __begin; __iter != __end; ++__iter)
>_M_v.push_back(__detail::__mod   __detail::_Shift::__value>(*__iter));
>
> The call to std::distance is well-formed for input iterators, but we
> won't actually call it unless we have random access iterators.
>
> Unless you see a problem with this that I'm missing, I'll go with that 
> version.

Looks much better. Thanks!

-- 
Best regards,
Antony Polukhin

Re: [PATCH] Optimize seed_seq construction

2021-08-17 Thread Jonathan Wakely via Gcc-patches

On Tue, 17 Aug 2021 at 09:42, Antony Polukhin wrote:
>
> When std::seed_seq is constructed from random access iterators we can
> detect the internal vector size in O(1). Reserving memory for elements
> in such cases may avoid multiple memory allocations.
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/random.tcc: Optimize seed_seq construction.

Thanks, this is a nice improvement. We can avoid tag dispatching to
make it simpler though:

@@ -3248,6 +3249,9 @@ namespace __detail
  template
seed_seq::seed_seq(_InputIterator __begin, _InputIterator __end)
{
+  if _GLIBCXX17_CONSTEXPR
(__is_random_access_iter<_InputIterator>::value)
+   _M_v.reserve(std::distance(__begin, __end));
+
  for (_InputIterator __iter = __begin; __iter != __end; ++__iter)
   _M_v.push_back(__detail::__mod::__value>(*__iter));

The call to std::distance is well-formed for input iterators, but we
won't actually call it unless we have random access iterators.

Unless you see a problem with this that I'm missing, I'll go with that version.

Re: Fortran: Implement OpenMP 5.1 scope construct (was: Re: openmp: Implement OpenMP 5.1 scope construct)

2021-08-17 Thread Jakub Jelinek via Gcc-patches

On Tue, Aug 17, 2021 at 02:55:56PM +0200, Tobias Burnus wrote:
> On 17.08.21 09:47, Jakub Jelinek wrote:
> > This patch implements the OpenMP 5.1 scope construct, which is similar
> > to worksharing constructs in many regards, but isn't one of them.
> 
> And the attached patch does the same for Fortran.
> 
> I took the opportunity to convert some additional C/C++ testcases to Fortran 
> ones.
> 
> That latter is rather mechanical but took surprisingly much longer than the
> actual FE change. On the way, I improved the error message for
> 'omp end  junk' and 'omp cancellation '.
> And I encountered one issue with the reductions and the 'task' modifier,
> https://gcc.gnu.org/PR101948

Commented on the PR.

> Otherwise, the patch is straight forward and, hence, a bit boring.
> 
> Tobias
> 
> PS: I noted that
>  if (.false.) &
>!$omp cancellation do
> gives an error in C/C++ FE but not in the Fortran FE. With .true. it
> reaches the ME and does give a ME error about an orphaned construct.
> I did not fill a PR – but if someone thinks it should print an error,
> feel free to open a PR.

This isn't just Fortran, e.g. C++
void
foo (void)
{
  if constexpr (0)
{
  #pragma omp cancellation point parallel
}
}
isn't reported either.  That is the disadvantage of late diagnostics
of issues, on the other side having to duplicate everything for all the
FEs...  And one can include there anything that is reported during
gimplification, omp lowering (e.g. nesting of regions diagnostics there,
...) etc.  Say default(none) parallel mentioning vars not being explicitly
privatized if it appears in discarded stmts etc.
Perhaps Fortran if (.false.) and C++ constexpr if is something we should
discuss in the committee at some point.

> gcc/fortran/ChangeLog:
> 
>   * dump-parse-tree.c (show_omp_node, show_code_node): Handle
>   EXEC_OMP_SCOPE.
>   * gfortran.h (enum gfc_statement): Add ST_OMP_(END_)SCOPE.
>   (enum gfc_exec_op): Add EXEC_OMP_SCOPE.
>   * match.h (gfc_match_omp_scope): New.
>   * openmp.c (OMP_SCOPE_CLAUSES): Define
>   (gfc_match_omp_scope): New.
>   (gfc_match_omp_cancellation_point, gfc_match_omp_end_nowait):
>   Improve error diagnostic.
>   (omp_code_to_statement): Handle ST_OMP_SCOPE.
>   (gfc_resolve_omp_directive): Handle EXEC_OMP_SCOPE.
>   * parse.c (decode_omp_directive, next_statement,
>   gfc_ascii_statement, parse_omp_structured_block,
>   parse_executable): Handle OpenMP's scope construct.
>   * resolve.c (gfc_resolve_blocks): Likewise
>   * st.c (gfc_free_statement): Likewise
>   * trans-openmp.c (gfc_trans_omp_scope): New.
>   (gfc_trans_omp_directive): Call it.
>   * trans.c (trans_code): handle EXEC_OMP_SCOPE.
> 
> libgomp/ChangeLog:
> 
>   * testsuite/libgomp.fortran/scope-1.f90: New test.
>   * testsuite/libgomp.fortran/task-reduction-16.f90: New test.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gfortran.dg/gomp/scan-1.f90:
>   * gfortran.dg/gomp/cancel-1.f90: New test.
>   * gfortran.dg/gomp/cancel-4.f90: New test.
>   * gfortran.dg/gomp/loop-4.f90: New test.
>   * gfortran.dg/gomp/nesting-1.f90: New test.
>   * gfortran.dg/gomp/nesting-2.f90: New test.
>   * gfortran.dg/gomp/nesting-3.f90: New test.
>   * gfortran.dg/gomp/nowait-1.f90: New test.
>   * gfortran.dg/gomp/reduction-task-1.f90: New test.
>   * gfortran.dg/gomp/reduction-task-2.f90: New test.
>   * gfortran.dg/gomp/reduction-task-2a.f90: New test.
>   * gfortran.dg/gomp/reduction-task-3.f90: New test.
>   * gfortran.dg/gomp/scope-1.f90: New test.
>   * gfortran.dg/gomp/scope-2.f90: New test.

LGTM, thanks.

Jakub

Re: [PATCH] commit-mklog: Add --co argument.

2021-08-17 Thread Martin Liška


On 8/17/21 3:09 PM, Martin Liška wrote:

On 8/17/21 2:59 PM, Martin Liška wrote:

with --trailer='Signed-off-by=Mona Lisa Octocat '.


This should be of course:
--trailer='Co-Authored-By=Mona Lisa Octocat '


Update version where mklog.py skips Co-Authored-By ignoring case.

Martin
>From 68797b65eedabeb0712588409933cb69809c5fbc Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Tue, 17 Aug 2021 14:57:40 +0200
Subject: [PATCH] commit-mklog: Add --co argument.

The argument can be used for addition of Co-Authored-By lines
with --trailer='Co-Authored-By=Mona Lisa Octocat '.

contrib/ChangeLog:

	* gcc-git-customization.sh: Wrap $@ in quotes.
	* git-commit-mklog.py: Add new argument --co.
	* mklog.py: Skip the Co-Authored-By lines.
---
 contrib/gcc-git-customization.sh | 2 +-
 contrib/git-commit-mklog.py  | 6 ++
 contrib/mklog.py | 9 -
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/contrib/gcc-git-customization.sh b/contrib/gcc-git-customization.sh
index 6f8f23deebf..aca61b781ff 100755
--- a/contrib/gcc-git-customization.sh
+++ b/contrib/gcc-git-customization.sh
@@ -28,7 +28,7 @@ git config alias.gcc-undescr \!"f() { o=\$(git config --get gcc-config.upstream)
 git config alias.gcc-verify '!f() { "`git rev-parse --show-toplevel`/contrib/gcc-changelog/git_check_commit.py" $@; } ; f'
 git config alias.gcc-backport '!f() { "`git rev-parse --show-toplevel`/contrib/git-backport.py" $@; } ; f'
 git config alias.gcc-mklog '!f() { "`git rev-parse --show-toplevel`/contrib/mklog.py" $@; } ; f'
-git config alias.gcc-commit-mklog '!f() { "`git rev-parse --show-toplevel`/contrib/git-commit-mklog.py" $@; }; f'
+git config alias.gcc-commit-mklog '!f() { "`git rev-parse --show-toplevel`/contrib/git-commit-mklog.py" "$@"; }; f'
 
 # Make diff on MD files use "(define" as a function marker.
 # Use this in conjunction with a .gitattributes file containing
diff --git a/contrib/git-commit-mklog.py b/contrib/git-commit-mklog.py
index 9c59fb97809..eda3fc4a892 100755
--- a/contrib/git-commit-mklog.py
+++ b/contrib/git-commit-mklog.py
@@ -37,6 +37,8 @@ if __name__ == '__main__':
 help='Add the specified PRs (comma separated)')
 parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
 help='Download title of mentioned PRs')
+parser.add_argument('--co',
+help='Add Co-Authored-By trailer (comma separated)')
 args, unknown_args = parser.parse_known_args()
 
 myenv['GCC_FORCE_MKLOG'] = '1'
@@ -49,5 +51,9 @@ if __name__ == '__main__':
 if mklog_args:
 myenv['GCC_MKLOG_ARGS'] = ' '.join(mklog_args)
 
+if args.co:
+for author in args.co.split(','):
+unknown_args.append(f'--trailer "Co-Authored-By: {author}"')
+
 commit_args = ' '.join(unknown_args)
 subprocess.run(f'git commit {commit_args}', shell=True, env=myenv)
diff --git a/contrib/mklog.py b/contrib/mklog.py
index d2aea85c7cc..d362be5ab10 100755
--- a/contrib/mklog.py
+++ b/contrib/mklog.py
@@ -40,6 +40,7 @@ from unidiff import PatchSet
 
 LINE_LIMIT = 100
 TAB_WIDTH = 8
+CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
 
 pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?PPR [a-z+-]+\/[0-9]+)')
 prnum_regex = re.compile(r'PR (?P[a-z+-]+)/(?P[0-9]+)')
@@ -317,6 +318,12 @@ def update_copyright(data):
 f.write(content)
 
 
+def skip_line_in_changelog(line):
+if line.lower().startswith(CO_AUTHORED_BY_PREFIX) or line.startswith('#'):
+return False
+return True
+
+
 if __name__ == '__main__':
 parser = argparse.ArgumentParser(description=help_message)
 parser.add_argument('input', nargs='?',
@@ -350,7 +357,7 @@ if __name__ == '__main__':
 args.fill_up_bug_titles, args.pr_numbers)
 if args.changelog:
 lines = open(args.changelog).read().split('\n')
-start = list(takewhile(lambda l: not l.startswith('#'), lines))
+start = list(takewhile(skip_line_in_changelog, lines))
 end = lines[len(start):]
 with open(args.changelog, 'w') as f:
 if not start or not start[0]:
-- 
2.32.0

[committed] libstdc++: Test std::seed_seq construction from input iterators

2021-08-17 Thread Jonathan Wakely via Gcc-patches

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* testsuite/26_numerics/random/seed_seq/cons/range.cc: Check
construction from input iterators.

Tested powerpc64le-linux. Committed to trunk.

commit 20698ec5b681e23fa3404ed0ef78e3367b28e16d
Author: Jonathan Wakely 
Date:   Tue Aug 17 14:18:58 2021

libstdc++: Test std::seed_seq construction from input iterators

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* testsuite/26_numerics/random/seed_seq/cons/range.cc: Check
construction from input iterators.

diff --git a/libstdc++-v3/testsuite/26_numerics/random/seed_seq/cons/range.cc 
b/libstdc++-v3/testsuite/26_numerics/random/seed_seq/cons/range.cc
index 8ea87342002..63233c893c0 100644
--- a/libstdc++-v3/testsuite/26_numerics/random/seed_seq/cons/range.cc
+++ b/libstdc++-v3/testsuite/26_numerics/random/seed_seq/cons/range.cc
@@ -24,6 +24,7 @@
 
 #include 
 #include 
+#include 
 
 void
 test01()
@@ -38,9 +39,22 @@ test01()
   //VERIFY();
 }
 
+void
+test02()
+{
+  unsigned arr[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  __gnu_test::input_container in(arr);
+  std::seed_seq seq(in.begin(), in.end());
+
+  std::vector foo(1);
+  seq.generate(foo.begin(), foo.end());
+
+  VERIFY( seq.size() == 10 );
+}
+
 int
 main()
 {
   test01();
-  return 0;
+  test02();
 }

Re: [committed] libstdc++: Add pretty printer for std::error_code and std::error_condition

2021-08-17 Thread Jonathan Wakely via Gcc-patches

On Tue, 17 Aug 2021 at 14:27, Jonathan Wakely wrote:
>
> Signed-off-by: Jonathan Wakely 
>
> libstdc++-v3/ChangeLog:
>
> * python/libstdcxx/v6/printers.py (StdErrorCodePrinter): Define.
> (build_libstdcxx_dictionary): Register printer for
> std::error_code and std::error_condition.
> * testsuite/libstdc++-prettyprinters/cxx11.cc: Test it.
>
> Tested powerpc64le-linux. Committed to trunk.

Oops, that patch included a std::error_category printer which doesn't
work, and wasn't supposed to be committed. Remove with this patch.
commit 8ea0fadc1b39bded236a7eebd1bae78f0443875a
Author: Jonathan Wakely 
Date:   Tue Aug 17 14:29:53 2021

libstdc++: Remove pretty printer committed by mistake

The std::error_category printer wasn't meant to be part of the commit
adding std::error_code and std::error_condition printers.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* python/libstdcxx/v6/printers.py (StdErrorCatPrinter): Remove.

diff --git a/libstdc++-v3/python/libstdcxx/v6/printers.py 
b/libstdc++-v3/python/libstdcxx/v6/printers.py
index e027a69ded9..82d262de8c7 100644
--- a/libstdc++-v3/python/libstdcxx/v6/printers.py
+++ b/libstdc++-v3/python/libstdcxx/v6/printers.py
@@ -1484,18 +1484,6 @@ class StdCmpCatPrinter:
 name = names[int(self.val)]
 return 'std::{}::{}'.format(self.typename, name)
 
-class StdErrorCatPrinter:
-"Print an object derived from std::error_category"
-
-def __init__ (self, typename, val):
-self.val = val
-self.typename = typename
-
-def to_string (self):
-gdb.set_convenience_variable('__cat', self.val)
-name = gdb.parse_and_eval('$__cat->name()').string()
-return 'error category = "{}"'.format(name)
-
 class StdErrorCodePrinter:
 "Print a std::error_code or std::error_condition"

Re: Better memory statistics, take 2

2021-08-17 Thread David Malcolm via Gcc-patches

On Tue, 2021-08-17 at 11:17 +0200, Thomas Schwinge wrote:
> Hi!
> 
> On 2004-09-02T18:47:01+0200, Jan Hubicka  wrote:
> > *** ggc-common.c  9 Aug 2004 20:19:29 -   1.88
> > --- ggc-common.c  2 Sep 2004 16:08:50 -
> 
> > + /* When set, ggc_collect will do collection.  */
> > + bool ggc_force_collect;
> 
> > *** void dump_ggc_loc_statistics (void)
> 
> > +   ggc_force_collect = true;
> > +   ggc_collect ();
> 
> > *** ggc_collect (void)
> 
> > !   if (G.allocated < allocated_last_gc + min_expand)
> 
> > !   if (G.allocated < allocated_last_gc + min_expand &&
> > !ggc_force_collect)
> 
> > *** ggc.h 2 Sep 2004 02:39:15 -   1.68
> > --- ggc.h 2 Sep 2004 16:08:50 -
> 
> > + /* When set, ggc_collect will do collection.  */
> > + extern bool ggc_force_collect;
> 
> This has later acquired another use in the GCC selftests.
> 
> I wonder if we shouldn't simplify the interface per the attached
> "Turn
> global 'ggc_force_collect' variable into 'force_collect' parameter to
> 'ggc_collect'"?  OK to push to master branch after bootstrap testing?

Looks good to me, but bool params can be unclear - maybe introduce an
enum to make the meaning more explicit to the reader of the code?

e.g.

enum gcc_collect_when
{
  GGC_COLLECT_UNDER_MEMORY_PRESSURE,
  GGC_COLLECT_ALWAYS
};

or somesuch???

Dave

[committed] libstdc++: Only define basic_string::contains for C++23

2021-08-17 Thread Jonathan Wakely via Gcc-patches

The new contains member of the COW string is defined for non-strict
gnu++20 mode as well as for C++23 modes. I think that was left in the
committed patch unintentionally. It is inconsistent with the SSO string,
and doesn't actually compile because it uses the
basic_string_view::contains member which only defined for C++23.

This makes it only defined for C++23.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* include/bits/cow_string.h (basic_string::contains): Do not
define for -std=gnu++20.

Tested powerpc64le-linux. Committed to trunk.

commit 3b3f2f7c265ef9f176cb811a8049b24538d954d9
Author: Jonathan Wakely 
Date:   Mon Aug 16 20:42:54 2021

libstdc++: Only define basic_string::contains for C++23

The new contains member of the COW string is defined for non-strict
gnu++20 mode as well as for C++23 modes. I think that was left in the
committed patch unintentionally. It is inconsistent with the SSO string,
and doesn't actually compile because it uses the
basic_string_view::contains member which only defined for C++23.

This makes it only defined for C++23.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* include/bits/cow_string.h (basic_string::contains): Do not
define for -std=gnu++20.

diff --git a/libstdc++-v3/include/bits/cow_string.h 
b/libstdc++-v3/include/bits/cow_string.h
index 5b09cc2a896..61edaa85484 100644
--- a/libstdc++-v3/include/bits/cow_string.h
+++ b/libstdc++-v3/include/bits/cow_string.h
@@ -2963,8 +2963,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __sv_type(this->data(), this->size()).ends_with(__x); }
 #endif // C++20
 
-#if __cplusplus >= 202011L \
-  || (__cplusplus == 202002L && !defined __STRICT_ANSI__)
+#if __cplusplus > 202011L
   bool
   contains(basic_string_view<_CharT, _Traits> __x) const noexcept
   { return __sv_type(this->data(), this->size()).contains(__x); }

[committed] libstdc++: Rename detail::not_same_as helper

2021-08-17 Thread Jonathan Wakely via Gcc-patches

This is done to match an editorial change in the working draft, to
rename the exposition-only not-same-as helper to different-from.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* include/bits/ranges_util.h (__not_same_as): Rename to
__different_from.
* include/std/ranges (__not_same_as): Likewise.

Tested powerpc64le-linux. Committed to trunk.

commit c09cabb239179fcc8ed70f033f214d98a49eb378
Author: Jonathan Wakely 
Date:   Mon Aug 16 18:10:04 2021

libstdc++: Rename __detail::__not_same_as helper

This is done to match an editorial change in the working draft, to
rename the exposition-only not-same-as helper to different-from.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* include/bits/ranges_util.h (__not_same_as): Rename to
__different_from.
* include/std/ranges (__not_same_as): Likewise.

diff --git a/libstdc++-v3/include/bits/ranges_util.h 
b/libstdc++-v3/include/bits/ranges_util.h
index 0ca203dd4b0..4e87cfc6ef1 100644
--- a/libstdc++-v3/include/bits/ranges_util.h
+++ b/libstdc++-v3/include/bits/ranges_util.h
@@ -54,7 +54,7 @@ namespace ranges
&& (is_pointer_v<_It> || requires(_It __it) { __it.operator->(); });
 
 template
-  concept __not_same_as
+  concept __different_from
= !same_as, remove_cvref_t<_Up>>;
   } // namespace __detail
 
@@ -187,8 +187,8 @@ namespace ranges
 template
   concept __convertible_to_non_slicing = convertible_to<_From, _To>
&& !(is_pointer_v> && is_pointer_v>
-   && __not_same_as>,
-remove_pointer_t>>);
+   && __different_from>,
+   remove_pointer_t>>);
 
 template
   concept __pair_like
@@ -264,7 +264,7 @@ namespace ranges
  _M_size._M_size = __n;
   }
 
-  template<__detail::__not_same_as _Rng>
+  template<__detail::__different_from _Rng>
requires borrowed_range<_Rng>
  && __detail::__convertible_to_non_slicing, _It>
  && convertible_to, _Sent>
@@ -275,7 +275,7 @@ namespace ranges
: subrange(__r, ranges::size(__r))
{ }
 
-  template<__detail::__not_same_as _Rng>
+  template<__detail::__different_from _Rng>
requires borrowed_range<_Rng>
  && __detail::__convertible_to_non_slicing, _It>
  && convertible_to, _Sent>
@@ -296,7 +296,7 @@ namespace ranges
: subrange{ranges::begin(__r), ranges::end(__r), __n}
{ }
 
-  template<__detail::__not_same_as _PairLike>
+  template<__detail::__different_from _PairLike>
requires __detail::__pair_like_convertible_from<_PairLike, const _It&,
const _Sent&>
constexpr
diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index fb8905fab08..3d49a26ee79 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -1057,7 +1057,7 @@ namespace views::__adaptor
   static void _S_fun(_Range&&) = delete;
 
 public:
-  template<__detail::__not_same_as _Tp>
+  template<__detail::__different_from _Tp>
requires convertible_to<_Tp, _Range&>
  && requires { _S_fun(declval<_Tp>()); }
constexpr

[committed] libstdc++: Add conditional noexcept to std::exchange

2021-08-17 Thread Jonathan Wakely via Gcc-patches

This is not required by the standard, but seems useful.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* include/std/utility (exchange): Add noexcept-specifier.
* testsuite/20_util/exchange/noexcept.cc: New test.

Tested powerpc64le-linux. Committed to trunk.

commit 42cfa1bd6c05f2dc0d6269155950d16064310f56
Author: Jonathan Wakely 
Date:   Mon Aug 16 18:00:08 2021

libstdc++: Add conditional noexcept to std::exchange

This is not required by the standard, but seems useful.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* include/std/utility (exchange): Add noexcept-specifier.
* testsuite/20_util/exchange/noexcept.cc: New test.

diff --git a/libstdc++-v3/include/std/utility b/libstdc++-v3/include/std/utility
index c2697f87dc5..69d274ff194 100644
--- a/libstdc++-v3/include/std/utility
+++ b/libstdc++-v3/include/std/utility
@@ -91,6 +91,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX20_CONSTEXPR
 inline _Tp
 exchange(_Tp& __obj, _Up&& __new_val)
+noexcept(__and_,
+   is_nothrow_assignable<_Tp&, _Up>>::value)
 { return std::__exchange(__obj, std::forward<_Up>(__new_val)); }
 
 #if __cplusplus >= 201703L
diff --git a/libstdc++-v3/testsuite/20_util/exchange/noexcept.cc 
b/libstdc++-v3/testsuite/20_util/exchange/noexcept.cc
new file mode 100644
index 000..6363a4d4c06
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/exchange/noexcept.cc
@@ -0,0 +1,39 @@
+// { dg-options "-std=gnu++2a" }
+// { dg-do compile { target c++2a } }
+
+#include 
+
+// This is a GCC extension. std::exchange is not required to be noexcept.
+
+static_assert( noexcept( std::exchange(std::declval(), 1) ) );
+
+struct X
+{
+  X(const X&);
+  X(X&&) noexcept;
+  X& operator=(const X&);
+  X& operator=(X&&) noexcept;
+  X& operator=(int);
+};
+
+extern X x, x2;
+static_assert( noexcept( std::exchange(x, std::move(x2)) ) );
+static_assert( ! noexcept( std::exchange(x, 1) ) );
+
+struct Y
+{
+  Y(Y&&) noexcept;
+  Y& operator=(Y&&);
+};
+
+extern Y y, y2;
+static_assert( ! noexcept( std::exchange(y, std::move(y2)) ) );
+
+struct Z
+{
+  Z(Z&&)noexcept;
+  Z& operator=(Z&&) ;
+};
+
+extern Z z, z2;
+static_assert( ! noexcept( std::exchange(z, std::move(z2)) ) );

[PATCH] move x86 to use gather/scatter internal functions

2021-08-17 Thread Richard Biener via Gcc-patches

This is an attempt to start moving the x86 backend to use
standard pattern names for [mask_]gather_load and [mask_]scatter_store
rather than using the builtin_{gather,scatter} target hooks.

I've started with AVX2 gathers and given x86 only supports masked
gather I only implemented mask_gather_load.  Note while for
the builtin_gather case the vectorizer will provide an all-true
mask operand for non-masked gathers this capability does not
exist for the IFN path yet, so only testcases with actual masked
gathers will work.

If this looks reasonable on the backend side I'll see to first
complete the vectorizer part, ripping out the target hook and
arranging for the missing pieces.  Another one is the support
for SImode indices with DFmode data which requires unpacking
the index vector and actually recognizing the IFN.

2021-08-17  Richard Biener  

* tree-vect-data-refs.c (vect_check_gather_scatter):
Always use internal functions.
* config/i386/sse.md
(mask_gather_load): New expander.
(mask_gather_load): Likewise.
---
 gcc/config/i386/sse.md| 56 +++
 gcc/tree-vect-data-refs.c |  4 +--
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3957c86c3df..40bec98d9f7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -23232,12 +23232,22 @@
   (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
   (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
   (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
+(define_mode_attr vec_gather_idxsi
+ [(V2DI "v4si") (V4DI "v4si") (V8DI "v8si")
+  (V2DF "v4si") (V4DF "v4si") (V8DF "v8si")
+  (V4SI "v4si") (V8SI "v8si") (V16SI "v16si")
+  (V4SF "v4si") (V8SF "v8si") (V16SF "v16si")])
 
 (define_mode_attr VEC_GATHER_IDXDI
  [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
   (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
   (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
   (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
+(define_mode_attr vec_gather_idxdi
+ [(V2DI "v2di") (V4DI "v4di") (V8DI "v8di")
+  (V2DF "v2di") (V4DF "v4di") (V8DF "v8di")
+  (V4SI "v2di") (V8SI "v4di") (V16SI "v8di")
+  (V4SF "v2di") (V8SF "v4di") (V16SF "v8di")])
 
 (define_mode_attr VEC_GATHER_SRCDI
  [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
@@ -23245,6 +23255,29 @@
   (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
   (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
 
+(define_expand "mask_gather_load"
+  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
+  (unspec:VEC_GATHER_MODE
+[(pc)
+ (mem:
+   (match_par_dup 6
+ [(match_operand 1 "vsib_address_operand")
+  (match_operand:
+ 2 "register_operand")
+  (match_operand:SI 4 "const1248_operand")
+  (match_operand:SI 3 "const0_operand")]))
+ (mem:BLK (scratch))
+ (match_operand: 5 "register_operand")]
+UNSPEC_GATHER))
+ (clobber (match_scratch:VEC_GATHER_MODE 7))])]
+  "TARGET_AVX2 && TARGET_USE_GATHER"
+{
+  operands[5] = gen_lowpart_SUBREG (mode, operands[5]);
+  operands[6]
+= gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
+   operands[5]), UNSPEC_VSIBADDR);
+})
+
 (define_expand "avx2_gathersi"
   [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
   (unspec:VEC_GATHER_MODE
@@ -23306,6 +23339,29 @@
(set_attr "prefix" "vex")
(set_attr "mode" "")])
 
+(define_expand "mask_gather_load"
+  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
+  (unspec:VEC_GATHER_MODE
+[(pc)
+ (mem:
+   (match_par_dup 6
+ [(match_operand 1 "vsib_address_operand")
+  (match_operand:
+ 2 "register_operand")
+  (match_operand:SI 4 "const1248_operand ")
+  (match_operand:SI 3 "const0_operand")]))
+ (mem:BLK (scratch))
+ (match_operand: 5 "register_operand")]
+UNSPEC_GATHER))
+ (clobber (match_scratch:VEC_GATHER_MODE 7))])]
+  "TARGET_AVX2 && TARGET_USE_GATHER"
+{
+  operands[5] = gen_lowpart_SUBREG (mode, operands[5]);
+  operands[6]
+= gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[1], operands[2],
+   operan

[committed] libstdc++: Add pretty printer for std::error_code and std::error_condition

2021-08-17 Thread Jonathan Wakely via Gcc-patches

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* python/libstdcxx/v6/printers.py (StdErrorCodePrinter): Define.
(build_libstdcxx_dictionary): Register printer for
std::error_code and std::error_condition.
* testsuite/libstdc++-prettyprinters/cxx11.cc: Test it.

Tested powerpc64le-linux. Committed to trunk.

commit 2db38d9fcacf522fe9b98ba847e79ba33abdcadc
Author: Jonathan Wakely 
Date:   Mon Aug 16 17:41:50 2021

libstdc++: Add pretty printer for std::error_code and std::error_condition

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* python/libstdcxx/v6/printers.py (StdErrorCodePrinter): Define.
(build_libstdcxx_dictionary): Register printer for
std::error_code and std::error_condition.
* testsuite/libstdc++-prettyprinters/cxx11.cc: Test it.

diff --git a/libstdc++-v3/python/libstdcxx/v6/printers.py 
b/libstdc++-v3/python/libstdcxx/v6/printers.py
index 550e0ecdd22..e027a69ded9 100644
--- a/libstdc++-v3/python/libstdcxx/v6/printers.py
+++ b/libstdc++-v3/python/libstdcxx/v6/printers.py
@@ -18,7 +18,7 @@
 import gdb
 import itertools
 import re
-import sys
+import sys, os, errno
 
 ### Python 2 + Python 3 compatibility code
 
@@ -1484,6 +1484,57 @@ class StdCmpCatPrinter:
 name = names[int(self.val)]
 return 'std::{}::{}'.format(self.typename, name)
 
+class StdErrorCatPrinter:
+"Print an object derived from std::error_category"
+
+def __init__ (self, typename, val):
+self.val = val
+self.typename = typename
+
+def to_string (self):
+gdb.set_convenience_variable('__cat', self.val)
+name = gdb.parse_and_eval('$__cat->name()').string()
+return 'error category = "{}"'.format(name)
+
+class StdErrorCodePrinter:
+"Print a std::error_code or std::error_condition"
+
+_errno_categories = None # List of categories that use errno values
+
+def __init__ (self, typename, val):
+self.val = val
+self.typename = typename
+# Do this only once ...
+if StdErrorCodePrinter._errno_categories is None:
+StdErrorCodePrinter._errno_categories = ['generic']
+try:
+import posix
+StdErrorCodePrinter._errno_categories.append('system')
+except ImportError:
+pass
+
+@staticmethod
+def _category_name(cat):
+"Call the virtual function that overrides std::error_category::name()"
+gdb.set_convenience_variable('__cat', cat)
+return gdb.parse_and_eval('$__cat->name()').string()
+
+def to_string (self):
+value = self.val['_M_value']
+category = self._category_name(self.val['_M_cat'])
+strval = str(value)
+if value == 0:
+default_cats = {'error_code':'system', 'error_condition':'generic'}
+unqualified = self.typename.split('::')[-1]
+if category == default_cats[unqualified]:
+return self.typename + ' = { }' # default-constructed value
+if value > 0 and category in StdErrorCodePrinter._errno_categories:
+try:
+strval = errno.errorcode[int(value)]
+except:
+pass
+return '%s = {"%s": %s}' % (self.typename, category, strval)
+
 # A "regular expression" printer which conforms to the
 # "SubPrettyPrinter" protocol from gdb.printing.
 class RxPrinter(object):
@@ -1886,6 +1937,8 @@ def build_libstdcxx_dictionary ():
 libstdcxx_printer.add_version('std::__cxx11::', 'basic_string', 
StdStringPrinter)
 libstdcxx_printer.add_container('std::', 'bitset', StdBitsetPrinter)
 libstdcxx_printer.add_container('std::', 'deque', StdDequePrinter)
+libstdcxx_printer.add_version('std::', 'error_code', StdErrorCodePrinter)
+libstdcxx_printer.add_version('std::', 'error_condition', 
StdErrorCodePrinter)
 libstdcxx_printer.add_container('std::', 'list', StdListPrinter)
 libstdcxx_printer.add_container('std::__cxx11::', 'list', StdListPrinter)
 libstdcxx_printer.add_container('std::', 'map', StdMapPrinter)
diff --git a/libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx11.cc 
b/libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx11.cc
index 05950513ab0..637246b3c12 100644
--- a/libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx11.cc
+++ b/libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx11.cc
@@ -155,6 +155,25 @@ main()
 // { dg-final { note-test tpl {std::tuple containing = {[1] = 6, [2] = 7}} } }
   ExTuple &rtpl = tpl;
 // { dg-final { note-test rtpl {std::tuple containing = {[1] = 6, [2] = 7}} } }
+
+  std::error_code e0;
+  // { dg-final { note-test e0 {std::error_code = { }} } }
+  std::error_condition ec0;
+  // { dg-final { note-test ec0 {std::error_condition = { }} } }
+  std::error_code einval = std::make_error_code(std::errc::invalid_argument);
+  // { dg-final { note-test einval {std::error_code = {"generic": EINVAL}}

Re: [PATCH] commit-mklog: Add --co argument.

2021-08-17 Thread Martin Liška


On 8/17/21 2:59 PM, Martin Liška wrote:

with --trailer='Signed-off-by=Mona Lisa Octocat '.


This should be of course:
--trailer='Co-Authored-By=Mona Lisa Octocat '

Re: Valgrind '--show-leak-kinds=all'

2021-08-17 Thread Thomas Schwinge

Hi!

On 2021-08-06T17:10:36+0200, Richard Biener  wrote:
> On August 6, 2021 4:09:37 PM GMT+02:00, Thomas Schwinge 
>  wrote:
>>I'm working on plugging a memory leak in an entirely different
>>compartment of GCC, but also ran into this issue:
>>
>>On 2021-02-12T08:35:52+0100, Richard Biener via Gcc-patches 
>> wrote:
>>> On Thu, Feb 11, 2021 at 7:35 PM Martin Sebor  wrote:
 On 2/11/21 12:59 AM, Richard Biener wrote:
 > On Wed, Feb 10, 2021 at 6:16 PM Martin Sebor  wrote:
 >> [...] Valgrind shows more leaks in this code that
 >> I'm not sure what to do about:
 >>
 >> 1) A tree built by build_type_attribute_qual_variant() called from
 >>  attr_access::array_as_string() to build a temporary type only
 >>  for the purposes of formatting it.
 >>
 >> 2) A tree (an attribute list) built by tree_cons() called from
 >>  build_attr_access_from_parms() that's used only for the duration
 >>  of the caller.
 >>
 >> Do these temporary trees need to be released somehow or are the leaks
 >> expected?
 >
 > You should configure GCC with --enable-valgrind-annotations to make
 > it aware of our GC.

 I did configure with that option:

 $ /src/gcc/master/configure --enable-checking=yes
 --enable-languages=all,jit,lto --enable-host-shared
 --enable-valgrind-annotations
>>
 $ /build/gcc-master/gcc/xgcc -B /build/gcc-master/gcc -S -Wall
 /src/gcc/master/gcc/testsuite/gcc.dg/Wvla-parameter.c -wrapper
 valgrind,--leak-check=full,--show-leak-kinds=all,--track-origins=yes,--log-file=valgrind-out.txt

 Do you not see the same leaks?
>>
>>I do; also stuff like:
>>
>>56 bytes in 1 blocks are still reachable in loss record 152 of 875
>>   at 0x483DD99: calloc (vg_replace_malloc.c:762)
>>   by 0x1753240: xcalloc (xmalloc.c:162)
>>   by 0x669C83: ggc_internal_alloc(unsigned long, void (*)(void*), 
>> unsigned long, unsigned long) (ggc-page.c:918)
>>   by 0x89E07D: ggc_internal_cleared_alloc(unsigned long, void 
>> (*)(void*), unsigned long, unsigned long) (ggc-common.c:117)
>>   by 0xF65D0D: make_node(tree_code) (ggc.h:143)
>>   by 0xF6632B: build_decl(unsigned int, tree_code, tree_node*, 
>> tree_node*) (tree.c:5264)
>>   by 0xA28ADC: build_builtin_function(unsigned int, char const*, 
>> tree_node*, int, built_in_class, char const*, tree_node*) (langhooks.c:681)
>>   by 0xA29FDD: add_builtin_function(char const*, tree_node*, int, 
>> built_in_class, char const*, tree_node*) (langhooks.c:716)
>>   by 0x622BFB: def_builtin_1(built_in_function, char const*, 
>> built_in_class, tree_node*, tree_node*, bool, bool, bool, tree_node*, bool) 
>> [clone .constprop.25] (lto-lang.c:650)
>>   by 0x640709: lto_define_builtins(tree_node*, tree_node*) 
>> (omp-builtins.def:46)
>>   by 0x641EE3: lto_init() (lto-lang.c:1339)
>>   by 0x61E26A: toplev::main(int, char**) (toplev.c:1921)
>>
>>... and many, many more.
>>
>>> Err, well.  --show-leak-kinds=all is probably the cause.
>>
>>Before finding this email, I too had convinced myself that everying that
>>came by 'ggc_*' I may ignore, because:
>>
>>> We
>>> definitely do not force-release
>>> all reachable GC allocated memory at program end.
>>
>>... of this: these blocks simply had not been GCed at program end.
>>
>>It's however a bit tedious to filter, in my case, 11864 lines of Valgrind
>>output.

(Actually, might use something like the "mitigated as follows" that I've
added here: .)

>>> Not sure if
>>> valgrind annotations can
>>> make that obvious to valgrind.
>>
>>Or, if that's not feasible (I don't know much about Valgrind...), then
>>instead would it help to force a final GC at program end if we're running
>>in "valgrind mode"?  If that's a plausible thing to do, would guarding
>>that by GCC having been configured with '--enable-valgrind-annotations'
>>be OK, or do we need a '--param', or something else?
>
> Well, instead of a final GC we could explicitly release all GC managed memory.

Heh, of course, a "final GC at program end" doesn't help (much), given
that (most of) all the blocks are still reachable via the usual GC roots.

So I tried looking into how we might release all GCC memory
unconditionally, via adapting 'ggc_mark_roots' (to not add back roots via
'ggc_mark_root_tab'), 'clear_marks', 'sweep_pages', 'release_pages',
etc., but couldn't get this to work.  It doesn't help, of course, that I
don't know much about how the GC really works internally.  Possibly my
non-understanding of the "context depth" is highly relevant.

Anyway, this isn't really important for me right now, having otherwise
resolve my original issue, so I'm not intending to spend a lot more time
on this.


Calling 'memory_block_pool::trim (0);' at the end of 'gcc/main.c:main'
does have some effect, too, but isn't sufficient/useful on its own, of
course.


Grüße
 Thomas
-
Siemen

[PATCH] commit-mklog: Add --co argument.

2021-08-17 Thread Martin Liška


The argument can be used for addition of Co-Authored-By lines
with --trailer='Signed-off-by=Mona Lisa Octocat '.

Thoughts?
Thanks,
Martin

contrib/ChangeLog:

* gcc-git-customization.sh: Wrap $@ in quotes.
* git-commit-mklog.py: Add new argument --co.
* mklog.py: Skip the Co-Authored-By lines.
---
 contrib/gcc-git-customization.sh | 2 +-
 contrib/git-commit-mklog.py  | 6 ++
 contrib/mklog.py | 8 +++-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/contrib/gcc-git-customization.sh b/contrib/gcc-git-customization.sh
index 6f8f23deebf..aca61b781ff 100755
--- a/contrib/gcc-git-customization.sh
+++ b/contrib/gcc-git-customization.sh
@@ -28,7 +28,7 @@ git config alias.gcc-undescr \!"f() { o=\$(git config --get 
gcc-config.upstream)
 git config alias.gcc-verify '!f() { "`git rev-parse 
--show-toplevel`/contrib/gcc-changelog/git_check_commit.py" $@; } ; f'
 git config alias.gcc-backport '!f() { "`git rev-parse 
--show-toplevel`/contrib/git-backport.py" $@; } ; f'
 git config alias.gcc-mklog '!f() { "`git rev-parse 
--show-toplevel`/contrib/mklog.py" $@; } ; f'
-git config alias.gcc-commit-mklog '!f() { "`git rev-parse 
--show-toplevel`/contrib/git-commit-mklog.py" $@; }; f'
+git config alias.gcc-commit-mklog '!f() { "`git rev-parse 
--show-toplevel`/contrib/git-commit-mklog.py" "$@"; }; f'
 
 # Make diff on MD files use "(define" as a function marker.

 # Use this in conjunction with a .gitattributes file containing
diff --git a/contrib/git-commit-mklog.py b/contrib/git-commit-mklog.py
index 9c59fb97809..eda3fc4a892 100755
--- a/contrib/git-commit-mklog.py
+++ b/contrib/git-commit-mklog.py
@@ -37,6 +37,8 @@ if __name__ == '__main__':
 help='Add the specified PRs (comma separated)')
 parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
 help='Download title of mentioned PRs')
+parser.add_argument('--co',
+help='Add Co-Authored-By trailer (comma separated)')
 args, unknown_args = parser.parse_known_args()
 
 myenv['GCC_FORCE_MKLOG'] = '1'

@@ -49,5 +51,9 @@ if __name__ == '__main__':
 if mklog_args:
 myenv['GCC_MKLOG_ARGS'] = ' '.join(mklog_args)
 
+if args.co:

+for author in args.co.split(','):
+unknown_args.append(f'--trailer "Co-Authored-By: {author}"')
+
 commit_args = ' '.join(unknown_args)
 subprocess.run(f'git commit {commit_args}', shell=True, env=myenv)
diff --git a/contrib/mklog.py b/contrib/mklog.py
index d2aea85c7cc..8430f65a879 100755
--- a/contrib/mklog.py
+++ b/contrib/mklog.py
@@ -317,6 +317,12 @@ def update_copyright(data):
 f.write(content)
 
 
+def skip_line_in_changelog(line):

+if line.startswith('Co-Authored-By:') or line.startswith('#'):
+return False
+return True
+
+
 if __name__ == '__main__':
 parser = argparse.ArgumentParser(description=help_message)
 parser.add_argument('input', nargs='?',
@@ -350,7 +356,7 @@ if __name__ == '__main__':
 args.fill_up_bug_titles, args.pr_numbers)
 if args.changelog:
 lines = open(args.changelog).read().split('\n')
-start = list(takewhile(lambda l: not l.startswith('#'), lines))
+start = list(takewhile(skip_line_in_changelog, lines))
 end = lines[len(start):]
 with open(args.changelog, 'w') as f:
 if not start or not start[0]:
--
2.32.0

Fortran: Implement OpenMP 5.1 scope construct (was: Re: openmp: Implement OpenMP 5.1 scope construct)

2021-08-17 Thread Tobias Burnus


On 17.08.21 09:47, Jakub Jelinek wrote:

This patch implements the OpenMP 5.1 scope construct, which is similar
to worksharing constructs in many regards, but isn't one of them.


And the attached patch does the same for Fortran.

I took the opportunity to convert some additional C/C++ testcases to Fortran 
ones.

That latter is rather mechanical but took surprisingly much longer than the
actual FE change. On the way, I improved the error message for
'omp end  junk' and 'omp cancellation '.
And I encountered one issue with the reductions and the 'task' modifier,
https://gcc.gnu.org/PR101948

Otherwise, the patch is straight forward and, hence, a bit boring.

Tobias

PS: I noted that
 if (.false.) &
   !$omp cancellation do
gives an error in C/C++ FE but not in the Fortran FE. With .true. it
reaches the ME and does give a ME error about an orphaned construct.
I did not fill a PR – but if someone thinks it should print an error,
feel free to open a PR.

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
Fortran: Implement OpenMP 5.1 scope construct

Fortran version to commit e45483c7c4badc4bf2d6ced22360ce1ab172967f,
which implemented OpenMP's scope construct for C and C++.
Most testcases are based on the C testcases; it also contains some
testcases which existed previously but had no Fortran equivalent.

gcc/fortran/ChangeLog:

	* dump-parse-tree.c (show_omp_node, show_code_node): Handle
	EXEC_OMP_SCOPE.
	* gfortran.h (enum gfc_statement): Add ST_OMP_(END_)SCOPE.
	(enum gfc_exec_op): Add EXEC_OMP_SCOPE.
	* match.h (gfc_match_omp_scope): New.
	* openmp.c (OMP_SCOPE_CLAUSES): Define
	(gfc_match_omp_scope): New.
	(gfc_match_omp_cancellation_point, gfc_match_omp_end_nowait):
	Improve error diagnostic.
	(omp_code_to_statement): Handle ST_OMP_SCOPE.
	(gfc_resolve_omp_directive): Handle EXEC_OMP_SCOPE.
	* parse.c (decode_omp_directive, next_statement,
	gfc_ascii_statement, parse_omp_structured_block,
	parse_executable): Handle OpenMP's scope construct.
	* resolve.c (gfc_resolve_blocks): Likewise
	* st.c (gfc_free_statement): Likewise
	* trans-openmp.c (gfc_trans_omp_scope): New.
	(gfc_trans_omp_directive): Call it.
	* trans.c (trans_code): handle EXEC_OMP_SCOPE.

libgomp/ChangeLog:

	* testsuite/libgomp.fortran/scope-1.f90: New test.
	* testsuite/libgomp.fortran/task-reduction-16.f90: New test.

gcc/testsuite/ChangeLog:

	* gfortran.dg/gomp/scan-1.f90:
	* gfortran.dg/gomp/cancel-1.f90: New test.
	* gfortran.dg/gomp/cancel-4.f90: New test.
	* gfortran.dg/gomp/loop-4.f90: New test.
	* gfortran.dg/gomp/nesting-1.f90: New test.
	* gfortran.dg/gomp/nesting-2.f90: New test.
	* gfortran.dg/gomp/nesting-3.f90: New test.
	* gfortran.dg/gomp/nowait-1.f90: New test.
	* gfortran.dg/gomp/reduction-task-1.f90: New test.
	* gfortran.dg/gomp/reduction-task-2.f90: New test.
	* gfortran.dg/gomp/reduction-task-2a.f90: New test.
	* gfortran.dg/gomp/reduction-task-3.f90: New test.
	* gfortran.dg/gomp/scope-1.f90: New test.
	* gfortran.dg/gomp/scope-2.f90: New test.

 gcc/fortran/dump-parse-tree.c  |   3 +
 gcc/fortran/gfortran.h |   4 +-
 gcc/fortran/match.h|   1 +
 gcc/fortran/openmp.c   |  23 +-
 gcc/fortran/parse.c|  13 +-
 gcc/fortran/resolve.c  |   2 +
 gcc/fortran/st.c   |   1 +
 gcc/fortran/trans-openmp.c |  20 +
 gcc/fortran/trans.c|   1 +
 gcc/testsuite/gfortran.dg/gomp/cancel-1.f90| 539 +
 gcc/testsuite/gfortran.dg/gomp/cancel-4.f90|   9 +
 gcc/testsuite/gfortran.dg/gomp/loop-4.f90  | 279 +++
 gcc/testsuite/gfortran.dg/gomp/nesting-1.f90   |  68 +++
 gcc/testsuite/gfortran.dg/gomp/nesting-2.f90   | 165 +++
 gcc/testsuite/gfortran.dg/gomp/nesting-3.f90   | 347 +
 gcc/testsuite/gfortran.dg/gomp/nowait-1.f90|  19 +
 .../gfortran.dg/gomp/reduction-task-1.f90  | 112 +
 .../gfortran.dg/gomp/reduction-task-2.f90  |  45 ++
 .../gfortran.dg/gomp/reduction-task-2a.f90 |  30 ++
 .../gfortran.dg/gomp/reduction-task-3.f90  |  15 +
 gcc/testsuite/gfortran.dg/gomp/scan-1.f90  |   5 +
 gcc/testsuite/gfortran.dg/gomp/scope-1.f90 |  39 ++
 gcc/testsuite/gfortran.dg/gomp/scope-2.f90 |  40 ++
 libgomp/testsuite/libgomp.fortran/scope-1.f90  |  55 +++
 .../libgomp.fortran/task-reduction-16.f90  |  82 
 25 files changed, 1911 insertions(+), 6 deletions(-)

diff --git a/gcc/fortran/dump-parse-tree.c b/gcc/fortran/dump-parse-tree.c
index 53c49fe4d6f..92d9f9e054d 100644
--- a/gcc/fortran/dump-parse-tree.c
+++ b/gcc/fortran/dump-pa

Re: [PATCH] Revert "Add the member integer_to_sse to processor_cost as a cost simulation for movd/pinsrd. It will be used to calculate the cost of vec_construct."

2021-08-17 Thread H.J. Lu via Gcc-patches

On Tue, Aug 17, 2021 at 5:43 AM liuhongt via Gcc-patches
 wrote:
>
> This reverts commit 872da9a6f664a06d73c987aa0cb2e5b830158a10.
>
> PR target/101936
> PR target/101929
>
>   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
>   Pushed to master.
>

I proposed a different approach earlier.   Will it make a difference?

-- 
H.J.

[PATCH] Revert "Add the member integer_to_sse to processor_cost as a cost simulation for movd/pinsrd. It will be used to calculate the cost of vec_construct."

2021-08-17 Thread liuhongt via Gcc-patches

This reverts commit 872da9a6f664a06d73c987aa0cb2e5b830158a10.

PR target/101936
PR target/101929

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
  Pushed to master.

---
 gcc/config/i386/i386.c  |  6 +-
 gcc/config/i386/i386.h  |  1 -
 gcc/config/i386/x86-tune-costs.h| 26 -
 gcc/testsuite/gcc.target/i386/pr99881.c |  2 +-
 4 files changed, 2 insertions(+), 33 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4d4ab6a03d6..46844fab08f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -22203,11 +22203,7 @@ ix86_builtin_vectorization_cost (enum 
vect_cost_for_stmt type_of_cost,
   case vec_construct:
{
  /* N element inserts into SSE vectors.  */
- int cost
-   = TYPE_VECTOR_SUBPARTS (vectype) * (fp ?
-   ix86_cost->sse_op
-   : ix86_cost->integer_to_sse);
-
+ int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
  /* One vinserti128 for combining two SSE vectors for AVX256.  */
  if (GET_MODE_BITSIZE (mode) == 256)
cost += ix86_vec_cost (mode, ix86_cost->addss);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 21fe51bba40..f9241df3b3d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -165,7 +165,6 @@ struct processor_costs {
   const int xmm_move, ymm_move, /* cost of moving XMM and YMM register.  */
zmm_move;
   const int sse_to_integer;/* cost of moving SSE register to integer.  */
-  const int integer_to_sse;/* cost of moving integer to SSE register.  */
   const int gather_static, gather_per_elt; /* Cost of gather load is computed
   as static + per_item * nelts. */
   const int scatter_static, scatter_per_elt; /* Cost of gather store is
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 67cfa006196..ffe810f2bcb 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -102,7 +102,6 @@ struct processor_costs ix86_size_cost = {/* costs for 
tuning for size */
   in 128bit, 256bit and 512bit */
   3, 3, 3, /* cost of moving XMM,YMM,ZMM register 
*/
   3,   /* cost of moving SSE register to 
integer.  */
-  COSTS_N_BYTES (2),   /* cost of moving integer to sse 
register.  */
   5, 0,/* Gather load static, per_elt. 
 */
   5, 0,/* Gather store static, 
per_elt.  */
   0,   /* size of l1 cache  */
@@ -212,7 +211,6 @@ struct processor_costs i386_cost = {/* 386 specific 
costs */
   {4, 8, 16, 32, 64},  /* cost of unaligned stores.  */
   2, 4, 8, /* cost of moving XMM,YMM,ZMM register 
*/
   3,   /* cost of moving SSE register to 
integer.  */
-  COSTS_N_INSNS (1),   /* cost of moving integer to sse 
register.  */
   4, 4,/* Gather load static, per_elt. 
 */
   4, 4,/* Gather store static, 
per_elt.  */
   0,   /* size of l1 cache  */
@@ -321,7 +319,6 @@ struct processor_costs i486_cost = {/* 486 specific 
costs */
   {4, 8, 16, 32, 64},  /* cost of unaligned stores.  */
   2, 4, 8, /* cost of moving XMM,YMM,ZMM register 
*/
   3,   /* cost of moving SSE register to 
integer.  */
-  COSTS_N_INSNS (1),   /* cost of moving integer to sse 
register.  */
   4, 4,/* Gather load static, per_elt. 
 */
   4, 4,/* Gather store static, 
per_elt.  */
   4,   /* size of l1 cache.  486 has 8kB cache
@@ -432,7 +429,6 @@ struct processor_costs pentium_cost = {
   {4, 8, 16, 32, 64},  /* cost of unaligned stores.  */
   2, 4, 8, /* cost of moving XMM,YMM,ZMM register 
*/
   3,   /* cost of moving SSE register to 
integer.  */
-  COSTS_N_INSNS (1),   /* cost of moving integer to sse 
register.  */
   4, 4,/* Gather load static, per_elt. 
 */
   4, 4,/* Gather store static, 
per_elt.  */
   8,   /* size of l1 cache.  */
@@ -534,7 +530,6 @@ struct processor_costs lakemont_cost = {
   {4, 8, 16, 32, 64},  /* cost of unaligned stores.  */
   2, 4, 8, /* cost of moving XMM,YMM,ZMM register 
*/
   3,

Re: ipa-modref: merge flags when adding escape

2021-08-17 Thread Alexandre Oliva

On Aug 11, 2021, Jan Hubicka  wrote:

> This is improved patch

Thanks for the proper fix!

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about

Re: move unreachable user labels to entry point

2021-08-17 Thread Alexandre Oliva

On Jul 13, 2021, Richard Biener  wrote:

> The right OMP region suggests something wrt correctness

Yeah, as Jakub wrote, we have to choose a block that's in the same
region the label belongs to.  The proposed patch doesn't change that, it
just uses the entry block instead of the previous block, if it satisfies
the requirement.

I found it made the logic simpler, slightly more efficient, and more
predictable, but I'm not attached to the change.  Since Jakub objected
to it, let's leave it alone.

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about

Re: don't access cfun in dump_function_to_file

2021-08-17 Thread Alexandre Oliva

On Jul 28, 2021, Richard Biener  wrote:

> OK.

Thanks, I've finally put this in as well.

>> * tree-cfg.c (dump_function_to_file): Use fun, not cfun.

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about

Re: retain debug stmt order when moving to successors

2021-08-17 Thread Alexandre Oliva

On Jul 28, 2021, Richard Biener  wrote:

> OK.

Thanks, I've finally put this in.  Sorry about the delay.

> On Wed, Jul 28, 2021 at 10:12 AM Alexandre Oliva  wrote:
>> * tree-inline.c (maybe_move_debug_stmts_to_successors): Don't
>> reverse debug stmts.



-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about

[PATCH] c++, v3: Implement P0466R5 __cpp_lib_is_layout_compatible compiler helpers [PR101539]

2021-08-17 Thread Jakub Jelinek via Gcc-patches

On Mon, Aug 16, 2021 at 03:57:21PM -0400, Jason Merrill wrote:
> > +static tree
> > +is_corresponding_member_aggr (location_t loc, tree basetype1, tree 
> > membertype1,
> > + tree arg1, tree basetype2, tree membertype2,
> > + tree arg2)
> > +{
> > +  tree field1 = TYPE_FIELDS (basetype1);
> > +  tree field2 = TYPE_FIELDS (basetype2);
> > +  tree ret = boolean_false_node;
> > +  while (1)
> > +{
> 
> Can we share more of the code between this function and
> layout_compatible_type_p?  I'm thinking of a function something like
> 
> bool next_common_initial_seqence (tree &mem1, tree &mem2)
> 
> that would update mem1/mem2 to the next entities of the common initial
> sequence (or null at the end) and return true, or return false if the next
> fields are not compatible.

Ok, here it is, tested with make check-c++-all RUNTESTFLAGS=dg.exp=cpp2a/*
so far (full bootstrap/regtests are scheduled).

I had to repeat in the anonymous struct is_corresponding_member_aggr case
the [[no_unique_address]] and bit_position checks, because we want to
recurse into the anonymous structs even when they aren't layout compatible
but just have some non-empty common initial sequence.
And the old code for non-empty DECL_FIELD_IS_BASE recursed, while the
new function just continues iterating on TYPE_FIELDS of the base.
That changes behavior of (added to the test):
struct A1 { int a; };
struct B1 { signed int b; };
struct alignas (16) C1 : public A1 {};
struct alignas (16) D1 : public B1 {};
static_assert (std::is_layout_compatible_v);
which failed the assertion previously (because it temporarily tested if
A1 is layout compatible with D1, which it is not), but that actually is
a good thing, I think C1 and D1 are clearly layout compatible.
Or (not in the testsuite):
struct A1 { int a; };
struct B1 { signed int b; };
struct alignas (8) E1 : public A1 {};
struct F1 : public B1 {};
struct alignas (16) G1 : public E1 {};
struct alignas (16) H1 : public F1 {};
static_assert (std::is_layout_compatible_v);
static_assert (!std::is_layout_compatible_v);
static_assert (std::is_layout_compatible_v);
(previously the last assertion would fail).

2021-08-17  Jakub Jelinek  

PR c++/101539
gcc/c-family/
* c-common.h (enum rid): Add RID_IS_LAYOUT_COMPATIBLE.
* c-common.c (c_common_reswords): Add __is_layout_compatible.
gcc/cp/
* cp-tree.h (enum cp_trait_kind): Add CPTK_IS_LAYOUT_COMPATIBLE.
(enum cp_built_in_function): Add CP_BUILT_IN_IS_CORRESPONDING_MEMBER.
(fold_builtin_is_corresponding_member, next_common_initial_seqence,
layout_compatible_type_p): Declare.
* parser.c (cp_parser_primary_expression): Handle
RID_IS_LAYOUT_COMPATIBLE.
(cp_parser_trait_expr): Likewise.
* cp-objcp-common.c (names_builtin_p): Likewise.
* constraint.cc (diagnose_trait_expr): Handle
CPTK_IS_LAYOUT_COMPATIBLE.
* decl.c (cxx_init_decl_processing): Register
__builtin_is_corresponding_member builtin.
* constexpr.c (cxx_eval_builtin_function_call): Handle
CP_BUILT_IN_IS_CORRESPONDING_MEMBER builtin.
* semantics.c (is_corresponding_member_union,
is_corresponding_member_aggr, fold_builtin_is_corresponding_member):
New functions.
(trait_expr_value): Handle CPTK_IS_LAYOUT_COMPATIBLE.
(finish_trait_expr): Likewise.
* typeck.c (next_common_initial_seqence, layout_compatible_type_p):
New functions.
* cp-gimplify.c (cp_gimplify_expr): Fold
CP_BUILT_IN_IS_CORRESPONDING_MEMBER.
(cp_fold): Likewise.
* tree.c (builtin_valid_in_constant_expr_p): Handle
CP_BUILT_IN_IS_CORRESPONDING_MEMBER.
* cxx-pretty-print.c (pp_cxx_trait_expression): Handle
CPTK_IS_LAYOUT_COMPATIBLE.
* class.c (remove_zero_width_bit_fields): Remove.
(layout_class_type): Don't call it.
gcc/testsuite/
* g++.dg/cpp2a/is-corresponding-member1.C: New test.
* g++.dg/cpp2a/is-corresponding-member2.C: New test.
* g++.dg/cpp2a/is-corresponding-member3.C: New test.
* g++.dg/cpp2a/is-corresponding-member4.C: New test.
* g++.dg/cpp2a/is-corresponding-member5.C: New test.
* g++.dg/cpp2a/is-corresponding-member6.C: New test.
* g++.dg/cpp2a/is-corresponding-member7.C: New test.
* g++.dg/cpp2a/is-corresponding-member8.C: New test.
* g++.dg/cpp2a/is-layout-compatible1.C: New test.
* g++.dg/cpp2a/is-layout-compatible2.C: New test.
* g++.dg/cpp2a/is-layout-compatible3.C: New test.

--- gcc/c-family/c-common.h.jj  2021-08-12 22:40:49.040646930 +0200
+++ gcc/c-family/c-common.h 2021-08-17 10:51:16.976465135 +0200
@@ -173,7 +173,8 @@ enum rid
   RID_IS_ABSTRACT, RID_IS_AGGREGATE,
   RID_IS_BASE_OF,  RID_IS_CLASS,
   RID_IS_EMPTY,RID_IS_ENUM,
-  RID_IS_FINAL,RID_IS_LITERAL_TYPE,
+  RID_IS_FI

Re: [PATCH] Improved handling of MULT_EXPR in bit CCP.

2021-08-17 Thread Richard Biener via Gcc-patches

On Mon, Aug 9, 2021 at 10:13 AM Roger Sayle  wrote:
>
>
> This patch allows GCC to constant fold (i | (i<<16)) | ((i<<24) | (i<<8)),
> where i is an unsigned char, or the equivalent (i*65537) | (i*16777472), to
> i*16843009.  The trick is to teach tree_nonzero_bits which bits may be
> set in the result of a multiplication by a constant given which bits are
> potentially set in the operands.  This allows the optimizations recently
> added to match.pd to catch more cases.
>
> The required mask/value pair from a multiplication may be calculated using
> a classical shift-and-add algorithm, given we already have implementations
> for both addition and shift by constant.  To keep this optimization "cheap",
> this functionality is only used if the constant multiplier has a few bits
> set (unless flag_expensive_optimizations), and we provide a special case
> fast-path implementation for the common case where the (non-constant)
> operand has no bits that are guaranteed to be set.  I have no evidence
> that this functionality causes performance issues, it's just that sparse
> multipliers provide the largest benefit to CCP.
>
> This patch has been tested on x86_64-pc-linux-gnu with "make bootstrap"
> and "make -k check" with no new failures.
>
> Ok for mainline?

OK.

Thanks,
Richard.

>
> 2021-08-09  Roger Sayle  
>
> gcc/ChangeLog
> * tree-ssa-ccp.c (bit_value_mult_const): New helper function to
> calculate the mask-value pair result of a multiplication by an
> unsigned constant.
> (bit_value_binop) [MULT_EXPR]:  Call it from here for
> multiplications
> by non-negative constants.
>
> gcc/testsuite/ChangeLog
> * gcc.dg/fold-ior-5.c: New test case.
>
> Roger
> --
>

[ping][vect-patterns][RFC] Refactor widening patterns to allow internal_fn's

2021-08-17 Thread Joel Hutton via Gcc-patches

Ping. Is there still interest in refactoring vect-patterns to internal_fn's? 

> -Original Message-
> From: Joel Hutton
> Sent: 07 June 2021 14:30
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Biener ; Richard Sandiford
> 
> Subject: [vect-patterns][RFC] Refactor widening patterns to allow
> internal_fn's
> 
> Hi all,
> 
> This refactor allows widening patterns (such as widen_plus/widen_minus) to
> be represented as either internal_fns or tree_codes. The widening patterns
> were originally added as tree codes with the expectation that they would be
> refactored later.
> 
> [vect-patterns] Refactor as internal_fn's
> 
> Refactor vect-patterns to allow patterns to be internal_fns starting with
> widening_plus/minus patterns.
> 
> 
> gcc/ChangeLog:
> 
> * gimple-match.h (class code_helper): Move code_helper class to more
> visible header.
> * internal-fn.h (internal_fn_name): Add internal_fn range check.
> * optabs-tree.h (supportable_convert_operation): Change function
> prototypes to use code_helper.
> * tree-vect-patterns.c (vect_recog_widen_op_pattern): Refactor to use
> code_helper.
> * tree-vect-stmts.c (vect_gen_widened_results_half): Refactor to use
> code_helper, build internal_fns.
> (vect_create_vectorized_promotion_stmts): Refactor to use
> code_helper.
> (vectorizable_conversion): Refactor to use code_helper.
> (supportable_widening_operation): Refactor to use code_helper.
> (supportable_narrowing_operation): Refactor to use code_helper.
> * tree-vectorizer.h (supportable_widening_operation): Refactor to use
> code_helper.
> (supportable_narrowing_operation): Refactor to use code_helper.
> * tree.h (class code_helper): Refactor to use code_helper.

Re: Better memory statistics, take 2

2021-08-17 Thread Richard Biener via Gcc-patches

On Tue, Aug 17, 2021 at 11:18 AM Thomas Schwinge
 wrote:
>
> Hi!
>
> On 2004-09-02T18:47:01+0200, Jan Hubicka  wrote:
> > *** ggc-common.c  9 Aug 2004 20:19:29 -   1.88
> > --- ggc-common.c  2 Sep 2004 16:08:50 -
>
> > + /* When set, ggc_collect will do collection.  */
> > + bool ggc_force_collect;
>
> > *** void dump_ggc_loc_statistics (void)
>
> > +   ggc_force_collect = true;
> > +   ggc_collect ();
>
> > *** ggc_collect (void)
>
> > !   if (G.allocated < allocated_last_gc + min_expand)
>
> > !   if (G.allocated < allocated_last_gc + min_expand && !ggc_force_collect)
>
> > *** ggc.h 2 Sep 2004 02:39:15 -   1.68
> > --- ggc.h 2 Sep 2004 16:08:50 -
>
> > + /* When set, ggc_collect will do collection.  */
> > + extern bool ggc_force_collect;
>
> This has later acquired another use in the GCC selftests.
>
> I wonder if we shouldn't simplify the interface per the attached "Turn
> global 'ggc_force_collect' variable into 'force_collect' parameter to
> 'ggc_collect'"?  OK to push to master branch after bootstrap testing?

OK.

Thanks,
Richard.

>
> Grüße
>  Thomas
>
>
> -
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
> München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
> Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
> München, HRB 106955

Re: [PATCH] Improved handling of MINUS_EXPR in bit CCP.

2021-08-17 Thread Richard Biener via Gcc-patches

On Thu, Aug 12, 2021 at 11:52 AM Roger Sayle  wrote:
>
>
> This patch improves the bit bounds for MINUS_EXPR during tree-ssa's
> conditional constant propagation (CCP) pass (and as an added bonus
> adds support for POINTER_DIFF_EXPR).
>
> The pessimistic assumptions made by the current algorithm are
> demonstrated by considering 1 - (x&1).  Intuitively this should
> have possible values 0 and 1, and therefore an unknown mask of 1.
> Alas by treating subtraction as a negation followed by addition,
> the second operand first becomes 0 or -1, with an unknown mask
> of all ones, which results in the addition containing no known bits.
>
> Improved bounds are achieved by using the same approach used for
> PLUS_EXPR, determining the result with the minimum number of borrows,
> the result from the maximum number of borrows, and examining the bits
> they have in common.  One additional benefit of this approach
> is that it is applicable to POINTER_DIFF_EXPR, where previously the
> negation of a pointer didn't/doesn't make sense.
>
> A more convincing example, where a transformation missed by .032t.cpp
> isn't caught a few passes later by .038t.evrp, is the expression
> (7 - (x&5)) & 2, which (in the new test case) currently survives the
> tree-level optimizers but with this patch is now simplified to the
> constant value 2.
>
> This patch has been tested on x86_64-pc-linux-gnu with "make bootstrap"
> and "make -k check" with no new failures.
>
> Ok for mainline?

OK.

Thanks,
Richard.

> 2021-08-12  Roger Sayle  
>
> gcc/ChangeLog
> * tree-ssa-ccp.c (bit_value_binop) [MINUS_EXPR]: Use same
> algorithm as PLUS_EXPR to improve subtraction bit bounds.
> [POINTER_DIFF_EXPR]: Treat as synonymous with MINUS_EXPR.
>
> gcc/testsuite/ChangeLog
> * gcc.dg/tree-ssa/ssa-ccp-40.c: New test case.
>
>
> Roger
> --
> Roger Sayle
> NextMove Software
> Cambridge, UK
>

Re: [PATCH] aarch64: Replace some uses of GET_CODE with RTL predicate macros

2021-08-17 Thread Richard Sandiford via Gcc-patches

Sorry for the slow reply.

Alistair Lee via Gcc-patches  writes:
> Hi all, this patch changed some RTL nodes to appropriate macros in Aarch64.c
> backend using a script. Would this be okay for trunk?
>
>
> Also, this is my first contribution, and I do not yet have commit 
> rights, so if everything is okay  could someone commit this for me?
>
> Thank you and I would appreciate any feedback/advice!
>
>
> gcc/ChangeLog:
>
>
> 2021-07-21  Alistair_Lee  alistair@arm.com
>
>
>   * rtl.h (CONST_VECTOR_P): New macro.
>   (CONST_STRING_P): New macro.

Since the patch doesn't add any uses of CONST_STRING_P, I think it
would be better not to add it at this stage.  (CONST_STRING is rarely
used outside generators.)

Looks good otherwise, thanks.  I've pushed it to trunk without the
CONST_STRING_P part.

Richard

>
>   * config/aarch64/aarch64.c (aarch64_get_sve_pred_bits): Use RTL 
> code testing macros.
>   (aarch64_ptrue_all_mode): Likewise.
>   (aarch64_expand_mov_immediate): Likewise.
>   (aarch64_const_vec_all_in_range_p): Likewise.
>   (aarch64_rtx_costs): Likewise.
>   (aarch64_legitimate_constant_p): Likewise.
>   (aarch64_simd_valid_immediate): Likewise.
>   (aarch64_simd_make_constant): Likewise.
>   (aarch64_convert_mult_to_shift): Likewise.
>   (aarch64_expand_sve_vec_perm): Likewise.
>   (aarch64_vec_fpconst_pow_of_2): Likewise.

Re: [PATCH] Couple of debug dump improvements to scheduler (no code-gen changes)

2021-08-17 Thread Maxim Kuvyrkov via Gcc-patches

Hi Jeff,

I've forgotten to commit these patches when they were approved 2 years
ago.  They still apply cleanly to the current mainline and I've retested
them (bootstrap+regtest) on aarch64-linux-gnu and arm-linux-gnueabihf with
no regressions.

I'll commit these shortly.

Regards,

On Fri, 30 Aug 2019 at 01:57, Jeff Law  wrote:

> On 8/29/19 9:44 AM, Maxim Kuvyrkov wrote:
> > Hi,
> >
> > The first patch adds ranking statistics for autoprefetcher heuristic.
> >
> > The second one makes it easier to diff scheduler debug dumps by adding
> more context lines for diff at clock increments.
> >
> > OK to commit?
> OK for both.
> jeff
>

-- 
Maxim Kuvyrkov
www.linaro.org

0003-Improve-diff-ability-of-scheduler-logs.patch
Description: Binary data

0002-Add-missing-entry-for-rank_for_schedule-stats.patch
Description: Binary data

Re: [PR91598] Improve autoprefetcher heuristic in haifa-sched.c

2021-08-17 Thread Maxim Kuvyrkov via Gcc-patches

Hi All,

I've forgotten to commit this patch when it was approved 2 years ago.  It
still applies cleanly to the current mainline and I've retested it
(bootstrap+regtest) on aarch64-linux-gnu and arm-linux-gnueabihf with no
regressions.

I'll commit this shortly.

Regards,

On Tue, 3 Sept 2019 at 19:55, Wilco Dijkstra  wrote:

> Hi Maxim,
>
> >  > Autoprefetching heuristic is enabled only for cores that support it,
> and isn't active for by default.
> >
> > It's enabled on most cores, including the default (generic). So we do
> have to be
> > careful that this doesn't regress any other benchmarks or do worse on
> modern
> > cores.
>
> I benchmarked your scheduler change on a few AArch64 machines, and it
> either has
> no effect or a positive effect on SPECFP with no major outliers (and only
> minor
> codesize differences). So I think your proposed patch is OK as is.
>
> Cheers,
> Wilco
>
>

-- 
Maxim Kuvyrkov
www.linaro.org

0001-Improve-autoprefetcher-heuristic-partly-fix-regressi.patch
Description: Binary data

Re: [PATCH] Do not enable DT_INIT_ARRAY/DT_FINI_ARRAY on uclinuxfdpiceabi

2021-08-17 Thread Richard Sandiford via Gcc-patches

Christophe LYON via Gcc-patches  writes:
> ping?
>
>
> On 12/08/2021 17:29, Christophe Lyon via Gcc-patches wrote:
>> Commit r12-1328 enabled DT_INIT_ARRAY/DT_FINI_ARRAY for all Linux
>> targets, but this does not work for arm-none-uclinuxfdpiceabi: it
>> makes all the execution tests fail.
>>
>> This patch restores the original behavior for uclinuxfdpiceabi.
>>
>> 2021-08-12  Christophe Lyon  
>>
>>  gcc/
>>  PR target/100896
>>  * config.gcc (gcc_cv_initfini_array): Leave undefined for
>>  uclinuxfdpiceabi targets.

OK, thanks.

Richard

>> ---
>>   gcc/config.gcc | 10 --
>>   1 file changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/gcc/config.gcc b/gcc/config.gcc
>> index 93e2b3219b9..8c8d30ca934 100644
>> --- a/gcc/config.gcc
>> +++ b/gcc/config.gcc
>> @@ -851,8 +851,14 @@ case ${target} in
>> tmake_file="${tmake_file} t-glibc"
>> target_has_targetcm=yes
>> target_has_targetdm=yes
>> -  # Linux targets always support .init_array.
>> -  gcc_cv_initfini_array=yes
>> +  case $target in
>> +*-*-uclinuxfdpiceabi)
>> +  ;;
>> +*)
>> +  # Linux targets always support .init_array.
>> +  gcc_cv_initfini_array=yes
>> +  ;;
>> +  esac
>> ;;
>>   *-*-netbsd*)
>> tm_p_file="${tm_p_file} netbsd-protos.h"

[committed] Special case -TYPE_MIN_VALUE for flag_wrapv in operator_abs::op1_range.

2021-08-17 Thread Aldy Hernandez via Gcc-patches

From: Andrew MacLeod 

With flag_wrapv, -TYPE_MIN_VALUE = TYPE_MIN_VALUE which is
unrepresentable.  We currently special case this in the ABS folding
routine, but are missing similar treatment in operator_abs::op1_range.

Tested on x86-64 Linux.

Aldy

[p.s. Thanks to Andrew for fixing this.  --author= set appropriately]

PR tree-optimization/101938

gcc/ChangeLog:

* range-op.cc (operator_abs::op1_range): Special case
-TYPE_MIN_VALUE for flag_wrapv.

gcc/testsuite/ChangeLog:

* gcc.dg/pr101938.c: New test.
---
 gcc/range-op.cc |  6 ++
 gcc/testsuite/gcc.dg/pr101938.c | 28 
 2 files changed, 34 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr101938.c

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index eb66e12677f..56eccf471a2 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -3642,6 +3642,12 @@ operator_abs::op1_range (irange &r, tree type,
 r.union_ (int_range<1> (type,
-positives.upper_bound (i),
-positives.lower_bound (i)));
+  // With flag_wrapv, -TYPE_MIN_VALUE = TYPE_MIN_VALUE which is
+  // unrepresentable.  Add -TYPE_MIN_VALUE in this case.
+  wide_int min_value = wi::min_value (TYPE_PRECISION (type), TYPE_SIGN (type));
+  wide_int lb = lhs.lower_bound ();
+  if (!TYPE_OVERFLOW_UNDEFINED (type) && wi::eq_p (lb, min_value))
+r.union_ (int_range<2> (type, lb, lb));
   return true;
 }
 
diff --git a/gcc/testsuite/gcc.dg/pr101938.c b/gcc/testsuite/gcc.dg/pr101938.c
new file mode 100644
index 000..82777554eaf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr101938.c
@@ -0,0 +1,28 @@
+// { dg-do run }
+// { dg-require-effective-target lp64 }
+// { dg-options "-O2 -fwrapv" }
+
+typedef long long int int64;
+#define INT64CONST(x) (x##LL)
+/* -9223372036854775808ULL */
+#define INT64_MIN (-INT64CONST(0x7FFF) - 1)
+
+static void __attribute__((noipa)) foo(int64 arg1, int64 arg2) {
+  int64 a1 = -arg1;
+  int64 a2 = (arg2 < 0) ? arg2 : -arg2;
+
+  if (a1 > a2) {
+int64 swap = arg1;
+arg1 = arg2;
+arg2 = swap;
+  }
+
+  if (arg1 == INT64_MIN && arg2 == -1) return;
+
+  __builtin_abort();
+}
+
+int main() {
+  foo(-1, INT64_MIN);
+  return 0;
+}
-- 
2.31.1

[PATCH] tree-optimization/101868 - avoid PRE of trapping mems across calls

2021-08-17 Thread Richard Biener via Gcc-patches

This backports a fix for the omission of a check of trapping mems
when hoisting them across calls that might not return.  This was
originally done as part of a fix to handle const functions that throw
properly.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed to the
GCC 11 branch.

2021-08-17  Richard Biener  

PR tree-optimization/101373
PR tree-optimization/101868
* tree-ssa-pre.c (prune_clobbered_mems): Also prune trapping
references when the BB may not return.

* gcc.dg/lto/pr101868_0.c: New testcase.
* gcc.dg/lto/pr101868_1.c: Likewise.
* gcc.dg/lto/pr101868_2.c: Likewise.
* gcc.dg/lto/pr101868_3.c: Likewise.
---
 gcc/testsuite/gcc.dg/lto/pr101868_0.c | 33 +++
 gcc/testsuite/gcc.dg/lto/pr101868_1.c | 23 +++
 gcc/testsuite/gcc.dg/lto/pr101868_2.c | 11 +
 gcc/testsuite/gcc.dg/lto/pr101868_3.c |  8 +++
 gcc/tree-ssa-pre.c|  7 ++
 5 files changed, 82 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr101868_0.c
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr101868_1.c
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr101868_2.c
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr101868_3.c

diff --git a/gcc/testsuite/gcc.dg/lto/pr101868_0.c 
b/gcc/testsuite/gcc.dg/lto/pr101868_0.c
new file mode 100644
index 000..c84d19b0267
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr101868_0.c
@@ -0,0 +1,33 @@
+/* { dg-lto-do run } */
+/* { dg-lto-options { "-O2 -fno-strict-aliasing -flto" } } */
+
+typedef unsigned long VALUE;
+
+__attribute__ ((cold))
+void rb_check_type(VALUE, int);
+
+static VALUE
+repro(VALUE dummy, VALUE hash)
+{
+if (hash == 0) {
+rb_check_type(hash, 1);
+}
+else if (*(long *)hash) {
+rb_check_type(hash, 1);
+}
+
+
+return *(long *)hash;
+}
+
+static VALUE (*that)(VALUE dummy, VALUE hash) = repro;
+
+int
+main(int argc, char **argv)
+{
+argc--;
+that(0, argc);
+
+rb_check_type(argc, argc);
+
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr101868_1.c 
b/gcc/testsuite/gcc.dg/lto/pr101868_1.c
new file mode 100644
index 000..146c14abc76
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr101868_1.c
@@ -0,0 +1,23 @@
+typedef unsigned long VALUE;
+
+
+__attribute__ ((noreturn)) void rexc_raise(VALUE mesg);
+
+VALUE rb_donothing(VALUE klass);
+
+static void
+funexpected_type(VALUE x, int xt, int t)
+{
+rexc_raise(rb_donothing(0));
+}
+
+__attribute__ ((cold))
+void
+rb_check_type(VALUE x, int t)
+{
+int xt;
+
+if (x == 0) {
+funexpected_type(x, xt, t);
+}
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr101868_2.c 
b/gcc/testsuite/gcc.dg/lto/pr101868_2.c
new file mode 100644
index 000..e6f01b23f45
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr101868_2.c
@@ -0,0 +1,11 @@
+typedef unsigned long VALUE;
+
+static void thing(void) {}
+static void (*ptr)(void) = &thing;
+
+VALUE
+rb_donothing(VALUE klass)
+{
+ptr();
+return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr101868_3.c 
b/gcc/testsuite/gcc.dg/lto/pr101868_3.c
new file mode 100644
index 000..61217625be7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr101868_3.c
@@ -0,0 +1,8 @@
+typedef unsigned long VALUE;
+
+__attribute__((noreturn))
+void
+rexc_raise(VALUE mesg)
+{
+__builtin_exit(0);
+}
diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index 04ec4fbaeec..2aedc31e1d7 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -2070,6 +2070,13 @@ prune_clobbered_mems (bitmap_set_t set, basic_block 
block)
  && value_dies_in_block_x (expr, block
to_remove = i;
}
+ /* If the REFERENCE may trap make sure the block does not contain
+a possible exit point.
+???  This is overly conservative if we translate AVAIL_OUT
+as the available expression might be after the exit point.  */
+ if (BB_MAY_NOTRETURN (block)
+ && vn_reference_may_trap (ref))
+   to_remove = i;
}
   else if (expr->kind == NARY)
{
-- 
2.31.1

[PATCH v2] Fix incomplete computation in fill_always_executed_in_1

2021-08-17 Thread Xionghu Luo via Gcc-patches




On 2021/8/17 15:12, Richard Biener wrote:
> On Tue, 17 Aug 2021, Xionghu Luo wrote:
> 
>> Hi,
>>
>> On 2021/8/16 19:46, Richard Biener wrote:
>>> On Mon, 16 Aug 2021, Xiong Hu Luo wrote:
>>>
 It seems to me that ALWAYS_EXECUTED_IN is not computed correctly for
 nested loops.  inn_loop is updated to inner loop, so it need be restored
 when exiting from innermost loop. With this patch, the store instruction
 in outer loop could also be moved out of outer loop by store motion.
 Any comments?  Thanks.
>>>
 gcc/ChangeLog:

   * tree-ssa-loop-im.c (fill_always_executed_in_1): Restore
   inn_loop when exiting from innermost loop.

 gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-lim-19.c: New test.
 ---
gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-19.c | 24 ++
gcc/tree-ssa-loop-im.c |  6 +-
2 files changed, 29 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-19.c

 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-19.c
 b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-19.c
 new file mode 100644
 index 000..097a5ee4a4b
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-19.c
 @@ -0,0 +1,24 @@
 +/* PR/101293 */
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -fdump-tree-lim2-details" } */
 +
 +struct X { int i; int j; int k;};
 +
 +void foo(struct X *x, int n, int l)
 +{
 +  for (int j = 0; j < l; j++)
 +{
 +  for (int i = 0; i < n; ++i)
 +  {
 +int *p = &x->j;
 +int tem = *p;
 +x->j += tem * i;
 +  }
 +  int *r = &x->k;
 +  int tem2 = *r;
 +  x->k += tem2 * j;
 +}
 +}
 +
 +/* { dg-final { scan-tree-dump-times "Executing store motion" 2 "lim2" } }
 */
 +
 diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
 index b24bc64f2a7..5ca4738b20e 100644
 --- a/gcc/tree-ssa-loop-im.c
 +++ b/gcc/tree-ssa-loop-im.c
 @@ -3211,6 +3211,10 @@ fill_always_executed_in_1 (class loop *loop, sbitmap
 @@ contains_call)
   if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
 last = bb;
+ if (inn_loop != loop
 +&& flow_loop_nested_p (bb->loop_father, inn_loop))
 +  inn_loop = bb->loop_father;
 +
>>>
>>> The comment says
>>>
>>> /* In a loop that is always entered we may proceed anyway.
>>>But record that we entered it and stop once we leave it.
>>> */
>>> inn_loop = bb->loop_father;
>>>
>>> and your change would defeat that early return, no?
>>
>> The issue is the search method exits too early when iterating the outer
>> loop.  For example of a nested loop, loop 1 includes 5,8,3,10,4,9
>> and loop2 includes 3,10.  Currently, it breaks when bb is 3 as bb 3
>> doesn't dominate bb 9 of loop 1.  But actually, both bb 5 and bb 4 are
>> ALWAYS_EXECUTED for loop 1, so if there are store instructions in bb 4
>> they won't be processed by store motion again.
>>
>>
>>  5<
>>  |\   |
>>  8 \  9
>>  |  \ |
>> --->3--->4
>> ||
>> 10---|
>>
>>
>> SET_ALWAYS_EXECUTED_IN is only set to bb 5 on master code now, with this
>> patch, it will continue search when meet bb 3 until bb 4, then last is 
>> updated
>> to bb 4, it will break until exit edge is found at bb 4 by
>> "if (!flow_bb_inside_loop_p (loop, e->dest))".  Then the followed loop code
>> will
>> set bb 4 as ALWAYS_EXEUCTED and all it's idoms bb 5.
>>
>>
>>   while (1)
>>  {
>>SET_ALWAYS_EXECUTED_IN (last, loop);
>>if (last == loop->header)
>>  break;
>>last = get_immediate_dominator (CDI_DOMINATORS, last);
>>  }
>>
>> After further discussion with Kewen, we found that the inn_loop variable is
>> totally useless and could be removed.
>>
>>
>>>
   if (bitmap_bit_p (contains_call, bb->index))
 break;

 @@ -3238,7 +3242,7 @@ fill_always_executed_in_1 (class loop *loop, sbitmap
 @@ contains_call)

   if (bb->loop_father->header == bb)
{
 -if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
 +if (!dominated_by_p (CDI_DOMINATORS, bb->loop_father->latch,
 bb))
  break;
>>>
>>> That's now a always false condition - a loops latch is always dominated
>>> by its header.  The condition as written tries to verify whether the
>>> loop is always entered - mind we visit all blocks, not only those
>>> always executed.
>>
>> Thanks for the catch!  I am afraid the piece of code should be removed since
>> it stops
>> search of potential ALWAYS EXECUTED bb after inner loop...
> 
> But the code says:
> 
>  /* In a loop that is always entered we may proceed anyway.
> But record that we entered it and stop once we leave it.
>

Re: Better memory statistics, take 2

2021-08-17 Thread Thomas Schwinge

Hi!

On 2004-09-02T18:47:01+0200, Jan Hubicka  wrote:
> *** ggc-common.c  9 Aug 2004 20:19:29 -   1.88
> --- ggc-common.c  2 Sep 2004 16:08:50 -

> + /* When set, ggc_collect will do collection.  */
> + bool ggc_force_collect;

> *** void dump_ggc_loc_statistics (void)

> +   ggc_force_collect = true;
> +   ggc_collect ();

> *** ggc_collect (void)

> !   if (G.allocated < allocated_last_gc + min_expand)

> !   if (G.allocated < allocated_last_gc + min_expand && !ggc_force_collect)

> *** ggc.h 2 Sep 2004 02:39:15 -   1.68
> --- ggc.h 2 Sep 2004 16:08:50 -

> + /* When set, ggc_collect will do collection.  */
> + extern bool ggc_force_collect;

This has later acquired another use in the GCC selftests.

I wonder if we shouldn't simplify the interface per the attached "Turn
global 'ggc_force_collect' variable into 'force_collect' parameter to
'ggc_collect'"?  OK to push to master branch after bootstrap testing?


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 58e7dc524f65593166102c553a2e6e11e6b20b60 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 17 Aug 2021 10:47:02 +0200
Subject: [PATCH] Turn global 'ggc_force_collect' variable into 'force_collect'
 parameter to 'ggc_collect'

This simplifies the interface and gets us rid of a global variable.
No change in behavior.

Clean-up for 2004-09-02 CVS commit (Subversion r86974,
Git commit 0772402279c0161fe41784911b52c77e12803c42)
"Better memory statistics, take 2".

	gcc/
	* ggc.h (ggc_collect): Add 'force_collect' parameter.
	* ggc-page.c (ggc_collect): Use that one instead of global
	'ggc_force_collect'.  Adjust all users.
	* doc/gty.texi (Invoking the garbage collector): Update.
	* ggc-internal.h (ggc_force_collect): Remove.
	* ggc-common.c (ggc_force_collect): Likewise.
	* selftest.h (forcibly_ggc_collect): Remove.
	* ggc-tests.c (selftest::forcibly_ggc_collect): Likewise.
	* read-rtl-function.c (test_loading_labels): Adjust.
	* selftest-run-tests.c (run_tests): Likewise.
---
 gcc/doc/gty.texi |  5 -
 gcc/ggc-common.c |  8 +---
 gcc/ggc-internal.h   |  3 ---
 gcc/ggc-page.c   |  4 ++--
 gcc/ggc-tests.c  | 29 +
 gcc/ggc.h|  6 --
 gcc/read-rtl-function.c  |  2 +-
 gcc/selftest-run-tests.c |  2 +-
 gcc/selftest.h   |  5 -
 9 files changed, 22 insertions(+), 42 deletions(-)

diff --git a/gcc/doc/gty.texi b/gcc/doc/gty.texi
index cf070c1f7f7..b667d1d19ba 100644
--- a/gcc/doc/gty.texi
+++ b/gcc/doc/gty.texi
@@ -654,7 +654,10 @@ The GCC garbage collector GGC is only invoked explicitly. In contrast
 with many other garbage collectors, it is not implicitly invoked by
 allocation routines when a lot of memory has been consumed. So the
 only way to have GGC reclaim storage is to call the @code{ggc_collect}
-function explicitly.  This call is an expensive operation, as it may
+function explicitly.
+When the @var{force_collect} parameter is set or otherwise an internal
+heuristic decides whether to actually collect, this call is
+potentially an expensive operation, as it may
 have to scan the entire heap.  Beware that local variables (on the GCC
 call stack) are not followed by such an invocation (as many other
 garbage collectors do): you should reference all your data from static
diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c
index 357bda13f97..f38e4d5020d 100644
--- a/gcc/ggc-common.c
+++ b/gcc/ggc-common.c
@@ -31,9 +31,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "plugin.h"
 #include "options.h"
 
-/* When set, ggc_collect will do collection.  */
-bool ggc_force_collect;
-
 /* When true, protect the contents of the identifier hash table.  */
 bool ggc_protect_identifiers = true;
 
@@ -965,12 +962,9 @@ dump_ggc_loc_statistics ()
   if (! GATHER_STATISTICS)
 return;
 
-  ggc_force_collect = true;
-  ggc_collect ();
+  ggc_collect (true);
 
   ggc_mem_desc.dump (GGC_ORIGIN);
-
-  ggc_force_collect = false;
 }
 
 /* Record ALLOCATED and OVERHEAD bytes to descriptor NAME:LINE (FUNCTION).  */
diff --git a/gcc/ggc-internal.h b/gcc/ggc-internal.h
index 39850cd6230..4dcfb4c008c 100644
--- a/gcc/ggc-internal.h
+++ b/gcc/ggc-internal.h
@@ -88,9 +88,6 @@ extern void ggc_pch_read (FILE *, void *);
 
 /* Allocation and collection.  */
 
-/* When set, ggc_collect will do collection.  */
-extern bool ggc_force_collect;
-
 extern void ggc_record_overhead (size_t, size_t, void * FINAL_MEM_STAT_DECL);
 
 extern void ggc_free_overhead (void *);
diff --git a/gcc/ggc-page.c b/gcc/ggc-page.c
index 1b09f0da94f..a6fbecaa1d8 100644
--- a/gcc/ggc-page.c
+++ b/gcc/ggc-page.c
@@ -2184,7 +2184,7 @@ validate_free_objects (void)
 /* Top level mark-and-sweep routin

[PATCH] [i386] Add x86 tune to enable v2df vector reduction by paddpd.

2021-08-17 Thread liuhongt via Gcc-patches

Hi:
  This patch add a new x86 tune named X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD
to enable haddpd for v2df vector reduction, the tune is disabled by default.

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
  Ok for trunk?

gcc/ChangeLog:

PR target/97147
* config/i386/i386.h (TARGET_V2DF_REDUCTION_PREFER_HADDPD):
New macro.
* config/i386/sse.md (*sse3_haddv2df3_low): Add
TARGET_V2DF_REDUCTION_PREFER_HADDPD.
(*sse3_hsubv2df3_low): Ditto.
* config/i386/x86-tune.def
(X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD): New tune.

gcc/testsuite/ChangeLog:

PR target/97147
* gcc.target/i386/pr54400.c: Adjust testcase.
* gcc.target/i386/pr94147.c: New test.
---
 gcc/config/i386/i386.h  |  2 ++
 gcc/config/i386/sse.md  |  4 ++--
 gcc/config/i386/x86-tune.def|  5 +
 gcc/testsuite/gcc.target/i386/pr54400.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr94147.c | 22 ++
 5 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr94147.c

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 21fe51bba40..b3e57a83846 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -418,6 +418,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
 #define TARGET_EXPAND_ABS \
ix86_tune_features[X86_TUNE_EXPAND_ABS]
+#define TARGET_V2DF_REDUCTION_PREFER_HADDPD \
+   ix86_tune_features[X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 27e25cc7952..13889687793 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2771,7 +2771,7 @@ (define_insn "*sse3_haddv2df3_low"
  (vec_select:DF
(match_dup 1)
(parallel [(match_operand:SI 3 "const_0_to_1_operand")]]
-  "TARGET_SSE3
+  "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD
&& INTVAL (operands[2]) != INTVAL (operands[3])"
   "@
haddpd\t{%0, %0|%0, %0}
@@ -2790,7 +2790,7 @@ (define_insn "*sse3_hsubv2df3_low"
  (vec_select:DF
(match_dup 1)
(parallel [(const_int 1)]]
-  "TARGET_SSE3"
+  "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD"
   "@
hsubpd\t{%0, %0|%0, %0}
vhsubpd\t{%1, %1, %0|%0, %1, %1}"
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index eb057a67750..8f55da89c92 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -452,6 +452,11 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, 
"avoid_fma_chains", m_ZNVER)
smaller FMA chain.  */
 DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | 
m_ZNVER3)
 
+/* X86_TUNE_V2DF_REDUCTION_PREFER_PHADDPD: Prefer haddpd
+   for v2df vector reduction.  */
+DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
+ "v2df_reduction_prefer_haddpd", m_NONE)
+
 /*/
 /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */
 /*/
diff --git a/gcc/testsuite/gcc.target/i386/pr54400.c 
b/gcc/testsuite/gcc.target/i386/pr54400.c
index 5ed5ba06644..3a450376b9e 100644
--- a/gcc/testsuite/gcc.target/i386/pr54400.c
+++ b/gcc/testsuite/gcc.target/i386/pr54400.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
+/* { dg-options "-O2 -msse3 -mfpmath=sse 
-mtune-ctrl=v2df_reduction_prefer_haddpd" } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/i386/pr94147.c 
b/gcc/testsuite/gcc.target/i386/pr94147.c
new file mode 100644
index 000..8ff5c34834f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr94147.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
+
+#include 
+
+double f (__m128d p)
+{
+  return p[0] - p[1];
+}
+
+double g1 (__m128d p)
+{
+  return p[0] + p[1];
+}
+
+double g2 (__m128d p)
+{
+  return p[1] + p[0];
+}
+
+/* { dg-final { scan-assembler-not "hsubpd" } } */
+/* { dg-final { scan-assembler-not "haddpd" } } */
-- 
2.18.1

Re: [PATCH 3/3] aarch64: Remove macros for vld4[q]_lane Neon intrinsics

2021-08-17 Thread Richard Sandiford via Gcc-patches

Jonathan Wright  writes:
> Hi,
>
> This patch removes macros for vld4[q]_lane Neon intrinsics. This is a
> preparatory step before adding new modes for structures of Advanced
> SIMD vectors.
>
> Regression tested and bootstrapped on aarch64-none-linux-gnu - no
> issues.
>
> Ok for master?

OK, thanks.

Richard

> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-08-16  Jonathan Wright  
>
> * config/aarch64/arm_neon.h (__LD4_LANE_FUNC): Delete.
> (__LD4Q_LANE_FUNC): Likewise.
> (vld4_lane_u8): Define without macro.
> (vld4_lane_u16): Likewise.
> (vld4_lane_u32): Likewise.
> (vld4_lane_u64): Likewise.
> (vld4_lane_s8): Likewise.
> (vld4_lane_s16): Likewise.
> (vld4_lane_s32): Likewise.
> (vld4_lane_s64): Likewise.
> (vld4_lane_f16): Likewise.
> (vld4_lane_f32): Likewise.
> (vld4_lane_f64): Likewise.
> (vld4_lane_p8): Likewise.
> (vld4_lane_p16): Likewise.
> (vld4_lane_p64): Likewise.
> (vld4q_lane_u8): Likewise.
> (vld4q_lane_u16): Likewise.
> (vld4q_lane_u32): Likewise.
> (vld4q_lane_u64): Likewise.
> (vld4q_lane_s8): Likewise.
> (vld4q_lane_s16): Likewise.
> (vld4q_lane_s32): Likewise.
> (vld4q_lane_s64): Likewise.
> (vld4q_lane_f16): Likewise.
> (vld4q_lane_f32): Likewise.
> (vld4q_lane_f64): Likewise.
> (vld4q_lane_p8): Likewise.
> (vld4q_lane_p16): Likewise.
> (vld4q_lane_p64): Likewise.
> (vld4_lane_bf16): Likewise.
> (vld4q_lane_bf16): Likewise.

Re: [PATCH 2/3] aarch64: Remove macros for vld3[q]_lane Neon intrinsics

2021-08-17 Thread Richard Sandiford via Gcc-patches

Jonathan Wright  writes:
> Hi,
>
> This patch removes macros for vld3[q]_lane Neon intrinsics. This is a
> preparatory step before adding new modes for structures of Advanced
> SIMD vectors.
>
> Regression tested and bootstrapped on aarch64-none-linux-gnu - no
> issues.
>
> Ok for master?

OK, thanks.

Richard

> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-08-16  Jonathan Wright  
>
> * config/aarch64/arm_neon.h (__LD3_LANE_FUNC): Delete.
> (__LD3Q_LANE_FUNC): Delete.
> (vld3_lane_u8): Define without macro.
> (vld3_lane_u16): Likewise.
> (vld3_lane_u32): Likewise.
> (vld3_lane_u64): Likewise.
> (vld3_lane_s8): Likewise.
> (vld3_lane_s16): Likewise.
> (vld3_lane_s32): Likewise.
> (vld3_lane_s64): Likewise.
> (vld3_lane_f16): Likewise.
> (vld3_lane_f32): Likewise.
> (vld3_lane_f64): Likewise.
> (vld3_lane_p8): Likewise.
> (vld3_lane_p16): Likewise.
> (vld3_lane_p64): Likewise.
> (vld3q_lane_u8): Likewise.
> (vld3q_lane_u16): Likewise.
> (vld3q_lane_u32): Likewise.
> (vld3q_lane_u64): Likewise.
> (vld3q_lane_s8): Likewise.
> (vld3q_lane_s16): Likewise.
> (vld3q_lane_s32): Likewise.
> (vld3q_lane_s64): Likewise.
> (vld3q_lane_f16): Likewise.
> (vld3q_lane_f32): Likewise.
> (vld3q_lane_f64): Likewise.
> (vld3q_lane_p8): Likewise.
> (vld3q_lane_p16): Likewise.
> (vld3q_lane_p64): Likewise.
> (vld3_lane_bf16): Likewise.
> (vld3q_lane_bf16): Likewise.
>
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 
> 91c072fe4572ff0012aced11e0f609168e4afc10..29b62988a91909a928e02fd6891803e936a1c6a9
>  100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -20334,100 +20334,525 @@ vld2q_lane_p64 (const poly64_t * __ptr, 
> poly64x2x2_t __b, const int __c)
>  
>  /* vld3_lane */
>  
> -#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,  \
> -  qmode, ptrmode, funcsuffix, signedtype)   \
> -__extension__ extern __inline intype \
> -__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
> -vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
> -{   \
> -  __builtin_aarch64_simd_ci __o;\
> -  largetype __temp; \
> -  __temp.val[0] =   \
> -vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));   \
> -  __temp.val[1] =   \
> -vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));   \
> -  __temp.val[2] =   \
> -vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));   \
> -  __o = __builtin_aarch64_set_qregci##qmode (__o,   \
> - (signedtype) __temp.val[0],\
> - 0);\
> -  __o = __builtin_aarch64_set_qregci##qmode (__o,   \
> - (signedtype) __temp.val[1],\
> - 1);\
> -  __o = __builtin_aarch64_set_qregci##qmode (__o,   \
> - (signedtype) __temp.val[2],\
> - 2);\
> -  __o =  __builtin_aarch64_ld3_lane##mode (  
>\
> -   (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);   \
> -  __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0);   \
> -  __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1);   \
> -  __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2);   \
> -  return __b;
>\
> +__extension__ extern __inline uint8x8x3_t
> +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
> +vld3_lane_u8 (const uint8_t * __ptr, uint8x8x3_t __b, const int __c)
> +{
> +  __builtin_aarch64_simd_ci __o;
> +  uint8x16x3_t __temp;
> +  __temp.val[0] = vcombine_u8 (__b.val[0], vcreate_u8 (0));
> +  __temp.val[1] = vcombine_u8 (__b.val[1], vcreate_u8 (0));
> +  __temp.val[2] = vcombine_u8 (__b.val[2], vcreate_u8 (0));
> +  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 
> 0);
> +  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 
> 1);
> +  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 
> 2);
> +  __o =  __builtin_aarch64_ld3_lanev8qi (
> +

Re: [PATCH 1/3] aarch64: Remove macros for vld2[q]_lane Neon intrinsics

2021-08-17 Thread Richard Sandiford via Gcc-patches

Jonathan Wright  writes:
> Hi,
>
> This patch removes macros for vld2[q]_lane Neon intrinsics. This is a
> preparatory step before adding new modes for structures of Advanced
> SIMD vectors.
>
> Regression tested and bootstrapped on aarch64-none-linux-gnu - no
> issues.
>
> Ok for master?

OK, thanks.  (TBH I'm taking it on faith that the expansions are correct.
I find this kind of thing very hard to check manually.)

Richard

>
> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-08-12  Jonathan Wright  
>
> * config/aarch64/arm_neon.h (__LD2_LANE_FUNC): Delete.
> (__LD2Q_LANE_FUNC): Likewise.
> (vld2_lane_u8): Define without macro.
> (vld2_lane_u16): Likewise.
> (vld2_lane_u32): Likewise.
> (vld2_lane_u64): Likewise.
> (vld2_lane_s8): Likewise.
> (vld2_lane_s16): Likewise.
> (vld2_lane_s32): Likewise.
> (vld2_lane_s64): Likewise.
> (vld2_lane_f16): Likewise.
> (vld2_lane_f32): Likewise.
> (vld2_lane_f64): Likewise.
> (vld2_lane_p8): Likewise.
> (vld2_lane_p16): Likewise.
> (vld2_lane_p64): Likewise.
> (vld2q_lane_u8): Likewise.
> (vld2q_lane_u16): Likewise.
> (vld2q_lane_u32): Likewise.
> (vld2q_lane_u64): Likewise.
> (vld2q_lane_s8): Likewise.
> (vld2q_lane_s16): Likewise.
> (vld2q_lane_s32): Likewise.
> (vld2q_lane_s64): Likewise.
> (vld2q_lane_f16): Likewise.
> (vld2q_lane_f32): Likewise.
> (vld2q_lane_f64): Likewise.
> (vld2q_lane_p8): Likewise.
> (vld2q_lane_p16): Likewise.
> (vld2q_lane_p64): Likewise.
> (vld2_lane_bf16): Likewise.
> (vld2q_lane_bf16): Likewise.
>
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 
> 390cf9a774381365017895951b0209b7e64ce78b..91c072fe4572ff0012aced11e0f609168e4afc10
>  100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -19882,92 +19882,455 @@ vld4q_dup_p64 (const poly64_t * __a)
>  
>  /* vld2_lane */
>  
> -#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,  \
> -  qmode, ptrmode, funcsuffix, signedtype)   \
> -__extension__ extern __inline intype \
> -__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
> -vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
> -{   \
> -  __builtin_aarch64_simd_oi __o;\
> -  largetype __temp; \
> -  __temp.val[0] =   \
> -vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));   \
> -  __temp.val[1] =   \
> -vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));   \
> -  __o = __builtin_aarch64_set_qregoi##qmode (__o,   \
> - (signedtype) __temp.val[0],\
> - 0);\
> -  __o = __builtin_aarch64_set_qregoi##qmode (__o,   \
> - (signedtype) __temp.val[1],\
> - 1);\
> -  __o =  __builtin_aarch64_ld2_lane##mode (  
>\
> -   (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);   \
> -  __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0);   \
> -  __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1);   \
> -  return __b;
>\
> +__extension__ extern __inline uint8x8x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
> +vld2_lane_u8 (const uint8_t * __ptr, uint8x8x2_t __b, const int __c)
> +{
> +  __builtin_aarch64_simd_oi __o;
> +  uint8x16x2_t __temp;
> +  __temp.val[0] = vcombine_u8 (__b.val[0], vcreate_u8 (0));
> +  __temp.val[1] = vcombine_u8 (__b.val[1], vcreate_u8 (0));
> +  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 
> 0);
> +  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 
> 1);
> +  __o =  __builtin_aarch64_ld2_lanev8qi (
> +   (__builtin_aarch64_simd_qi *) __ptr, __o, __c);
> +  __b.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoidi (__o, 0);
> +  __b.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoidi (__o, 1);
> +  return __b;
>  }
>  
> -__LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf,
> -  v8hf, hf, f16, float16x8_t)
> -__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, 
> v4sf,
> -  sf, f32, float32x4_t)
> -__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x

Re: [committed] Introduce selftest::locate_file (v5)

2021-08-17 Thread Richard Biener via Gcc-patches

On Tue, Aug 17, 2021 at 9:01 AM Thomas Schwinge  wrote:
>
> Hi!
>
> On 2016-12-14T21:31:05-0500, David Malcolm  wrote:
> > On Wed, 2016-12-14 at 15:02 +0100, Bernd Schmidt wrote:
> >> On 12/09/2016 08:32 PM, David Malcolm wrote:
> >> > Thanks.  Unfortunately, applying the "locate_file" patch
> >> >   https://gcc.gnu.org/ml/gcc-patches/2016-11/msg01186.html
> >> > would now introduce a regression in a recently-added test case:
> >>
> >> > The problem is that this DejaGnu test case uses -fself-test, and
> >> > doesn't provide any arguments.  With the locate_file patch, we need to
> >> > pass the path to $(srcdir)/testsuite/selftests as an argument to -fself
> >> > -test, and it's not clear to me how to do that sanely in a DejaGnu test
> >> > case
>
> Rather simple, actually -- once you realize how all this works.  ;-)
>
> >> > if I pass in a dummy value (like for pr71591.c), then the
> >> > selftests that use locate_file fail.
>
> > I've committed the following updated version to trunk (as r243681).
> >
> > Changed in v5:
> > * disable DejaGnu test for PR 78213
> >
> > Successfully bootstrapped®rtested on x86_64-pc-linux-gnu (with 2 PASS
> > results converted to 1 UNSUPPORTED in gcc.sum, re gcc.dg/pr78213.c).
>
> > --- a/gcc/testsuite/gcc.dg/pr78213.c
> > +++ b/gcc/testsuite/gcc.dg/pr78213.c
> > @@ -1,6 +1,13 @@
> >  /* { dg-do compile } */
> >  /* { dg-options "-fself-test" } */
> >
> > +/* When this test was written -fself-test took no argument, but it
> > +   has subsequently gained a mandatory argument, giving the path
> > +   to selftest support files (within the srcdir).
> > +   It's not clear how to provide this path sanely from
> > +   within DejaGnu, so for now, this test is disabled.  */
> > +/* { dg-skip-if "" { *-*-* } } */
> > +
> >  /* Verify that -fself-test does not fail on a non empty source.  */
> >
> >  int i; 
> >  void bar();
> >  void foo()
>
> OK to push the attached "Restore 'gcc.dg/pr78213.c' testing" to master
> branch?

OK.

> See 'git grep --cached 'dg-.*options .*\$' -- */testsuite/' for
> pre-existing '$srcdir' usage in DejaGnu directives.
>
>
> Grüße
>  Thomas
>
>
> -
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
> München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
> Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
> München, HRB 106955

Re: Expensive selftests (was: 'hash_map>')

2021-08-17 Thread Richard Biener via Gcc-patches

On Tue, Aug 17, 2021 at 8:40 AM Thomas Schwinge  wrote:
>
> Hi!
>
> On 2021-08-16T14:10:00-0600, Martin Sebor  wrote:
> > On 8/16/21 6:44 AM, Thomas Schwinge wrote:
> >> [...], to document the current behavior, I propose to
> >> "Add more self-tests for 'hash_map' with Value type with non-trivial
> >> constructor/destructor", see attached.  OK to push to master branch?
> >> (Also cherry-pick into release branches, eventually?)
>
> (Attached again, for easy reference.)
>
> > Adding more tests sounds like an excellent idea.  I'm not sure about
> > the idea of adding loopy selftests that iterate as many times as in
> > the patch (looks like 1234 times two?)
>
> Correct, and I agree it's a sensible concern, generally.
>
> The current 1234 times two iterations is really arbitrary (should
> document that in the test case), just so that we trigger a few hash table
> expansions.

You could lower N_init (the default init is just 13!),
even with just 128 inserted elements you'll trigger
expansions to 31, 61 and 127 elements.

> For 'selftest-c', we've got originally:
>
> -fself-test: 74775 pass(es) in 0.309299 seconds
> -fself-test: 74775 pass(es) in 0.366041 seconds
> -fself-test: 74775 pass(es) in 0.356663 seconds
> -fself-test: 74775 pass(es) in 0.355009 seconds
> -fself-test: 74775 pass(es) in 0.367575 seconds
> -fself-test: 74775 pass(es) in 0.320406 seconds
>
> ..., and with my changes we've got:
>
> -fself-test: 94519 pass(es) in 0.327755 seconds
> -fself-test: 94519 pass(es) in 0.369522 seconds
> -fself-test: 94519 pass(es) in 0.355531 seconds
> -fself-test: 94519 pass(es) in 0.362179 seconds
> -fself-test: 94519 pass(es) in 0.363176 seconds
> -fself-test: 94519 pass(es) in 0.318930 seconds
>
> So it really seems to be all in the noise?

Yes.  I think the test is OK but it's also reasonable to lower
the '1234' times and add a comment as to the count should
trigger hashtable expansions "a few times".

Richard.

> Yet:
>
> > Selftests run each time GCC
> > builds (i.e., even during day to day development).  It seems to me
> > that it might be better to run such selftests only as part of
> > the bootstrap process.
>
> I'd rather have thought about a '--param self-test-expensive' (or
> similar), and then invoke the selftests via a new
> 'gcc/testsuite/selftests/expensive.exp' (or similar).
>
> Or, adapt 'gcc/testsuite/gcc.dg/plugin/expensive_selftests_plugin.c',
> that is, invoke them via the GCC plugin mechanism, which also seems to be
> easy enough?
>
> I don't have a strong opinion about where/when these tests get run, so
> will happily take any suggestions.
>
>
> Grüße
>  Thomas
>
>
> -
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
> München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
> Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
> München, HRB 106955

Re: [PATCH] more warning code refactoring

2021-08-17 Thread Richard Biener via Gcc-patches

On Tue, Aug 17, 2021 at 3:52 AM Martin Sebor via Gcc-patches
 wrote:
>
> The attached patch continues with the move of warning code from
> builtins.c and calls.c into a more suitable home.  As before, it
> is mostly free of functional changes.  The one exception is that
> as pleasant a side-effect, moving the attribute access checking
> from initialize_argument_information() in calls.c to the new
> warning pass also happens to fix PR 101854.  This is thanks to
> the latter iterating over function arguments explicitly provided
> in the program and not having to worry about skipping over
> the additional pointer argument synthesized for calls to functions
> that return a large struct by value that the former function sneaks
> into the argument list.
>
> Tested on x86_64-linux.

OK.

Thanks,
Richard.

> Martin
>
> Previous patches in this series:
> https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576821.html
> https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575377.html

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-17 Thread Richard Biener via Gcc-patches

On Mon, 16 Aug 2021, Qing Zhao wrote:

> 
> 
> > On Aug 16, 2021, at 2:40 AM, Richard Biener  wrote:
> > 
> > On Thu, 12 Aug 2021, Qing Zhao wrote:
> > 
> >> Hi, Richard,
> >> 
> >> For RTL expansion of call to .DEFERRED_INIT, I changed my code per your 
> >> suggestions like following:
> >> 
> >> ==
> >> #define INIT_PATTERN_VALUE  0xFE
> >> static void
> >> expand_DEFERRED_INIT (internal_fn, gcall *stmt)
> >> {
> >>  tree lhs = gimple_call_lhs (stmt);
> >>  tree var_size = gimple_call_arg (stmt, 0);
> >>  enum auto_init_type init_type
> >>= (enum auto_init_type) TREE_INT_CST_LOW (gimple_call_arg (stmt, 1));
> >>  bool is_vla = (bool) TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
> >> 
> >>  tree var_type = TREE_TYPE (lhs);
> >>  gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
> >> 
> >>  if (is_vla || (!can_native_interpret_type_p (var_type)))
> >>{
> >>/* If this is a VLA or the type of the variable cannot be natively
> >>   interpreted, expand to a memset to initialize it.  */
> >>  if (TREE_CODE (lhs) == SSA_NAME)
> >>lhs = SSA_NAME_VAR (lhs);
> >>  tree var_addr = NULL_TREE;
> >>  if (is_vla)
> >>var_addr = TREE_OPERAND (lhs, 0);
> >>  else
> >>{
> >> TREE_ADDRESSABLE (lhs) = 1;
> >> var_addr = build_fold_addr_expr (lhs);
> >>}
> >>  tree value = (init_type == AUTO_INIT_PATTERN) ?
> >>build_int_cst (unsigned_char_type_node,
> >>   INIT_PATTERN_VALUE) :
> >>build_zero_cst (unsigned_char_type_node);
> >>  tree m_call = build_call_expr (builtin_decl_implicit 
> >> (BUILT_IN_MEMSET),
> >> 3, var_addr, value, var_size);
> >>  /* Expand this memset call.  */
> >>  expand_builtin_memset (m_call, NULL_RTX, TYPE_MODE (var_type));
> >>}
> >>  else
> >>{
> >>/* If this is not a VLA and the type of the variable can be natively 
> >>   interpreted, expand to assignment to generate better code.  */
> >>  tree pattern = NULL_TREE;
> >>  unsigned HOST_WIDE_INT total_bytes
> >>= tree_to_uhwi (TYPE_SIZE_UNIT (var_type));
> >> 
> >>  if (init_type == AUTO_INIT_PATTERN)
> >>{
> >>  unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
> >>  memset (buf, INIT_PATTERN_VALUE, total_bytes);
> >>  pattern = native_interpret_expr (var_type, buf, total_bytes);
> >>  gcc_assert (pattern);
> >>}
> >> 
> >>  tree init = (init_type == AUTO_INIT_PATTERN) ?
> >>   pattern :
> >>   build_zero_cst (var_type);
> >>  expand_assignment (lhs, init, false);
> >>}
> >> }
> >> ===
> >> 
> >> Now, I used “can_native_interpret_type_p (var_type)” instead of 
> >> “use_register_for_decl (lhs)” to decide 
> >> whether to use “memset” or use “assign” to expand this function.
> >> 
> >> However, this exposed an bug that is very hard to be addressed:
> >> 
> >> ***For the testing case: test suite/gcc.dg/uninit-I.c:
> >> 
> >> /* { dg-do compile } */
> >> /* { dg-options "-O2 -Wuninitialized" } */
> >> 
> >> int sys_msgctl (void)
> >> {
> >>  struct { int mode; } setbuf;
> >>  return setbuf.mode;  /* { dg-warning "'setbuf\.mode' is used" } */
> >> ==
> >> 
> >> **the above auto var “setbuf” has “struct” type, which 
> >> “can_native_interpret_type_p(var_type)” is false, therefore, 
> >> Expanding this .DEFERRED_INIT call went down the “memset” expansion route. 
> >> 
> >> However, this structure type can be fitted into a register, therefore 
> >> cannot be taken address anymore at this stage, even though I tried:
> >> 
> >> TREE_ADDRESSABLE (lhs) = 1;
> >> var_addr = build_fold_addr_expr (lhs);
> >> 
> >> To create an address variable for it, the expansion still failed at 
> >> expr.c: line 8412:
> >> during RTL pass: expand
> >> /home/opc/Work/GCC/latest-gcc/gcc/testsuite/gcc.dg/auto-init-uninit-I.c:6:24:
> >>  internal compiler error: in expand_expr_addr_expr_1, at expr.c:8412
> >> 0xd04104 expand_expr_addr_expr_1
> >>../../latest-gcc/gcc/expr.c:8412
> >> 0xd04a95 expand_expr_addr_expr
> >>../../latest-gcc/gcc/expr.c:8525
> >> 0xd13592 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, 
> >> expand_modifier, rtx_def**, bool)
> >>../../latest-gcc/gcc/expr.c:11741
> >> 0xd05142 expand_expr_real(tree_node*, rtx_def*, machine_mode, 
> >> expand_modifier, rtx_def**, bool)
> >>../../latest-gcc/gcc/expr.c:8713
> >> 0xaed1d3 expand_expr
> >>../../latest-gcc/gcc/expr.h:301
> >> 0xaf0d89 get_memory_rtx
> >>../../latest-gcc/gcc/builtins.c:1370
> >> 0xafb4fb expand_builtin_memset_args
> >>../../latest-gcc/gcc/builtins.c:4102
> >> 0xafacde expand_builtin_memset(tree_node*, rtx_def*, machine_mode)
> >>../../latest-gcc/gcc/builtins.c:3886
> >> 0xe97fb3 expand_DEFERRED_INIT
> >> 
> >> **That’s the major reason why I chose

[PATCH] Use __builtin_trap() for abort() if inhibit_libc

2021-08-17 Thread Sebastian Huber

abort() is used in gcc_assert() and gcc_unreachable() which is used by target
libraries such as libgcov.a.  This patch changes the abort() definition under
certain conditions.  If inhibit_libc is defined and abort is not already
defined, then abort() is defined to __builtin_trap().

The inhibit_libc define is usually defined if GCC is built for targets running
in embedded systems which may optionally use a C standard library.  If
inhibit_libc is defined, then there may be still a full featured abort()
available.  abort() is a heavy weight function which depends on signals and
file streams.  For statically linked applications, this means that a dependency
on gcc_assert() pulls in the support for signals and file streams.  This could
prevent using gcov to test low end targets for example.  Using __builtin_trap()
avoids these dependencies if the target implements a "trap" instruction.  The
application or operating system could use a trap handler to react to failed GCC
runtime checks which caused a trap.

gcc/

* tsystem.h (abort): Define abort() if inhibit_libc is defined and it
is not already defined.
---
 gcc/tsystem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tsystem.h b/gcc/tsystem.h
index e1e6a96a4f48..5c72c69ff3ed 100644
--- a/gcc/tsystem.h
+++ b/gcc/tsystem.h
@@ -59,7 +59,7 @@ extern int atexit (void (*)(void));
 #endif
 
 #ifndef abort
-extern void abort (void) __attribute__ ((__noreturn__));
+#define abort() __builtin_trap ()
 #endif
 
 #ifndef strlen
-- 
2.26.2

[PATCH] Optimize seed_seq construction

2021-08-17 Thread Antony Polukhin via Gcc-patches

When std::seed_seq is constructed from random access iterators we can
detect the internal vector size in O(1). Reserving memory for elements
in such cases may avoid multiple memory allocations.

libstdc++-v3/ChangeLog:

* include/bits/random.tcc: Optimize seed_seq construction.

-- 
Best regards,
Antony Polukhin
diff --git a/libstdc++-v3/include/bits/random.tcc 
b/libstdc++-v3/include/bits/random.tcc
index bf43970..816bfc1 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -3234,14 +3234,31 @@ namespace __detail
   template
 seed_seq::seed_seq(std::initializer_list<_IntType> __il)
 {
+  _M_v.reserve(__il.size());
   for (auto __iter = __il.begin(); __iter != __il.end(); ++__iter)
_M_v.push_back(__detail::__mod::__value>(*__iter));
 }
 
+  template
+void __reserve_if_distance_cheap(_Vector& __vec, _InputIterator __begin,
+  _InputIterator __end, random_access_iterator_tag)
+{
+  __vec.reserve(__end - __begin);
+}
+
+  template
+void __reserve_if_distance_cheap(_Vector&, _InputIterator,
+  _InputIterator, _Tag)
+{
+  // computing the distance between __begin and __end is not O(1)
+}
+
   template
 seed_seq::seed_seq(_InputIterator __begin, _InputIterator __end)
 {
+  std::__reserve_if_distance_cheap(_M_v, __begin, __end,
+  typename iterator_traits<_InputIterator>::iterator_category());
   for (_InputIterator __iter = __begin; __iter != __end; ++__iter)
_M_v.push_back(__detail::__mod::__value>(*__iter));

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-17 Thread Richard Biener via Gcc-patches

On Mon, 16 Aug 2021, Qing Zhao wrote:

> My current code for expand_DEFERRED_INIT is like the following, could you 
> check and see whether there is any issue for it:
> 
> #define INIT_PATTERN_VALUE  0xFE
> static void
> expand_DEFERRED_INIT (internal_fn, gcall *stmt)
> {
>   tree lhs = gimple_call_lhs (stmt);
>   tree var_size = gimple_call_arg (stmt, 0);
>   enum auto_init_type init_type
> = (enum auto_init_type) TREE_INT_CST_LOW (gimple_call_arg (stmt, 1));
>   bool is_vla = (bool) TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
> 
>   tree var_type = TREE_TYPE (lhs);
>   gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
> 
>   if (is_vla || (!use_register_for_decl (lhs)))
> {
>   if (TREE_CODE (lhs) == SSA_NAME)
> lhs = SSA_NAME_VAR (lhs);

this should not be necessary (in fact you shouldn't see a SSA_NAME
here, if you do then using SSA_NAME_VAR is wrong)

> /* If this is a VLA or the variable is not in register,
>expand to a memset to initialize it.  */
>   tree var_addr = NULL_TREE;
>   if (is_vla)
> var_addr = TREE_OPERAND (lhs, 0);
>   else
> {
>   TREE_ADDRESSABLE (lhs) = 1;
>   var_addr = build_fold_addr_expr (lhs);
> }

use, independent of is_vla

 mark_addressable (lhs);
 var_addr = build_fold_addr_expr (lhs);

> 
>   tree value = (init_type == AUTO_INIT_PATTERN) ?
> build_int_cst (unsigned_char_type_node,
>INIT_PATTERN_VALUE) :
> build_zero_cst (unsigned_char_type_node);

since memset has an integer argument for the value use
integer_zero_node for the zero case and build_int_cst (integer_type_node, 
...) for the pattern case

>   tree m_call = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMSET),
>  3, var_addr, value, var_size);
>   /* Expand this memset call.  */
>   expand_builtin_memset (m_call, NULL_RTX, TYPE_MODE (var_type));
> }
>   else
> {
> /* If this variable is in a register, use expand_assignment might
>generate better code.  */
>   tree pattern = NULL_TREE;
>   unsigned HOST_WIDE_INT total_bytes
> = tree_to_uhwi (TYPE_SIZE_UNIT (var_type));
> 
>   if (init_type == AUTO_INIT_PATTERN)
> {
>   if (can_native_interpret_type_p (var_type))
> {
>   unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
>   memset (buf, INIT_PATTERN_VALUE, total_bytes);
>   pattern = native_interpret_expr (var_type, buf, total_bytes);
>   gcc_assert (pattern);
> }
>   else
> {
>   tree index_type = build_index_type (size_int (total_bytes - 1));
>   tree array_type = build_array_type (unsigned_char_type_node,
>   index_type);
>   tree element = build_int_cst (unsigned_char_type_node,
> INIT_PATTERN_VALUE);
>   vec *elts = NULL;
>   for (unsigned int i = 0; i < total_bytes; i++)
> CONSTRUCTOR_APPEND_ELT (elts, NULL_TREE, element);
>   pattern = build_constructor (array_type, elts);
>   pattern = build1 (VIEW_CONVERT_EXPR, var_type, pattern);
> }
> }
> 
>   tree init = (init_type == AUTO_INIT_PATTERN) ?
>pattern :
>build_zero_cst (var_type);

maybe conditionally initialize init instead of pattern and init?
Thus replace pattern by init and do

else
  init = build_zero_cst (var_type);


the above should work, as said the RTL expansion part can possibly
be improved but we can do this as followup as well.

>   expand_assignment (lhs, init, false);
> }
> }
> 
> Thanks.
> 
> Qing
> 
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH] libcpp: __VA_OPT__ p1042r1 placemarker changes [PR101488]

2021-08-17 Thread Jakub Jelinek via Gcc-patches

On Mon, Aug 16, 2021 at 06:07:57PM -0400, Jason Merrill wrote:
> > It is unclear if it would be enough
> > to remove just one or if all padding tokens should be removed.
> > Anyway, e.g. the previous removal of all padding tokens at the end of
> > __VA_OPT__ is undesirable, as it e.g. eats also the padding tokens needed
> > for the H4 example from the paper.
> 
> Hmm, I don't see why.  Looking at the H4 example, it seems that the
> expansion of __VA_OPT__ should be
> 
>  a 
> 
> so when we paste to b, b is pasted to the placemarker, leaving a as a
> separate token.

#define H4(X, ...) __VA_OPT__(a X ## X) ## b
H4(, 1)  // replaced by a b

We actually get with vanilla trunk
  a  
where the former comes from:
2216  /* Padding on the left of an argument (unless RHS of ##).  */
2217  if ((!pfile->state.in_directive || 
pfile->state.directive_wants_padding)
2218  && src != macro->exp.tokens && !(src[-1].flags & PASTE_LEFT)
2219  && !last_token_is (buff, vaopt_start))
2220{
2221  const cpp_token *t = padding_token (pfile, src);
  unsigned index = expanded_token_index (pfile, macro, src, i);
2223  /* Allocate a virtual location for the padding token and
2224 append the token and its location to BUFF and
2225 VIRT_LOCS.   */
2226  tokens_buff_add_token (buff, virt_locs, t,
2227 t->src_loc, t->src_loc,
2228 map, index);
2229}
and the latter one is added at
2303  /* Avoid paste on RHS (even case count == 0).  */
2304  if (!pfile->state.in_directive && !(src->flags & PASTE_LEFT)
2305  && !last_token_is (buff, vaopt_start))
2306{
2307  const cpp_token *t = &pfile->avoid_paste;
2308  tokens_buff_add_token (buff, virt_locs,
2309 t, t->src_loc, t->src_loc,
2310 NULL, 0);
2311}
and trunk eats both s in:
  /* Remove any tail padding from inside the __VA_OPT__.  */
  paste_flag = tokens_buff_last_token_ptr (buff);
  while (paste_flag && paste_flag != start
 && (*paste_flag)->type == CPP_PADDING)
{
  tokens_buff_remove_last_token (buff);
  paste_flag = tokens_buff_last_token_ptr (buff);
}
and thus H4(, 1) is replaced by ab instead of the right a b.

We want to remove the latter  but not the former one, and
the patch adds the vaopt_padding_tokens counter for it to control
how many placemarkers are removed on vaopt_state::END.
As can be seen in #c1 and #c2 of the PR, I've tried various approaches,
but neither worked out for all the cases except the posted one.

Jakub

Re: [Patch][GCC][middle-end] - Generate FRINTZ for (double)(int) under -ffast-math on aarch64

2021-08-17 Thread Richard Biener via Gcc-patches

On Mon, Aug 16, 2021 at 8:48 PM Andrew Pinski via Gcc-patches
 wrote:
>
> On Mon, Aug 16, 2021 at 9:15 AM Jirui Wu via Gcc-patches
>  wrote:
> >
> > Hi all,
> >
> > This patch generates FRINTZ instruction to optimize type casts.
> >
> > The changes in this patch covers:
> > * Opimization of a FIX_TRUNC_EXPR cast inside a FLOAT_EXPR using IFN_TRUNC.
> > * Change of corresponding test cases.
> >
> > Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master? If OK can it be committed for me, I have no commit rights.
>
> Is there a reason why you are doing the transformation manually inside
> forwprop rather than handling it inside match.pd?
> Also can't this only be done for -ffast-math case?

You definitely have to look at the intermediate type - that could be a uint8_t
or even a boolean type.  So unless the intermediate type can represent
all float values optimizing to trunc() is invalid.  Also if you emit
IFN_TRUNC you have to make sure there's target support - we don't
emit calls to a library trunc() from an internal function call (and we wouldn't
want to optimize it that way).

Richard.

>
> Thanks,
> Andrew Pinski
>
> >
> > Thanks,
> > Jirui
> >
> > gcc/ChangeLog:
> >
> > * tree-ssa-forwprop.c (pass_forwprop::execute): Optimize with 
> > frintz.
> >
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/aarch64/fix_trunc1.c: Update to new expectation.

1 2 >

1 - 100 of 106 matches

Mail list logo