Re: [PATCH] c++: Partially implement P1042R1: __VA_OPT__ wording clarifications [PR92319]

2020-02-13 Thread Jason Merrill

On 1/31/20 7:14 PM, Jakub Jelinek wrote:

Hi!

I've noticed we claim in cxx-status.html that we implement P1042R1,
but it seems we don't implement any of the changes from there.
The following patch implements just the change that __VA_OPT__ determines
whether to expand to nothing or the enclosed tokens no longer based on
whether there were any tokens passed to __VA_ARGS__, but whether __VA_ARGS__
expands to any tokens (from testing apparently it has to be non-CPP_PADDING
tokens).
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK.


I'm afraid I'm completely lost about the padding preservation/removal
changes that are also in the paper, so haven't touched that part.

2020-01-31  Jakub Jelinek  

Partially implement P1042R1: __VA_OPT__ wording clarifications
PR preprocessor/92319
* macro.c (expand_arg): Move declarations before vaopt_state
definition.
(class vaopt_state): Move enum update_type definition earlier.  Remove
m_allowed member, add m_arg and m_update members.
(vaopt_state::vaopt_state): Change last argument from bool any_args
to macro_arg *arg, initialize m_arg and m_update instead of m_allowed.
(vaopt_state::update): When bumping m_state from 1 to 2 and m_update
is ERROR, determine if __VA_ARGS__ expansion has any non-CPP_PADDING
tokens and set m_update to INCLUDE if it has any, DROP otherwise.
Return m_update instead of m_allowed ? INCLUDE : DROP in m_state >= 2.
(replace_args, create_iso_definition): Adjust last argument to
vaopt_state ctor.

* c-c++-common/cpp/va-opt-4.c: New test.

--- libcpp/macro.c.jj   2020-01-29 09:35:06.022244483 +0100
+++ libcpp/macro.c  2020-01-31 12:28:02.444695860 +0100
@@ -93,6 +93,8 @@ struct macro_arg_saved_data {
  static const char *vaopt_paste_error =
N_("'##' cannot appear at either end of __VA_OPT__");
  
+static void expand_arg (cpp_reader *, macro_arg *);

+
  /* A class for tracking __VA_OPT__ state while iterating over a
 sequence of tokens.  This is used during both macro definition and
 expansion.  */
@@ -100,28 +102,29 @@ class vaopt_state {
  
   public:
  
+  enum update_type

+  {
+ERROR,
+DROP,
+INCLUDE,
+BEGIN,
+END
+  };
+
/* Initialize the state tracker.  ANY_ARGS is true if variable
   arguments were provided to the macro invocation.  */
-  vaopt_state (cpp_reader *pfile, bool is_variadic, bool any_args)
+  vaopt_state (cpp_reader *pfile, bool is_variadic, macro_arg *arg)
  : m_pfile (pfile),
-m_allowed (any_args),
+m_arg (arg),
  m_variadic (is_variadic),
  m_last_was_paste (false),
  m_state (0),
  m_paste_location (0),
-m_location (0)
+m_location (0),
+m_update (ERROR)
{
}
  
-  enum update_type

-  {
-ERROR,
-DROP,
-INCLUDE,
-BEGIN,
-END
-  };
-
/* Given a token, update the state of this tracker and return a
   boolean indicating whether the token should be be included in the
   expansion.  */
@@ -154,6 +157,23 @@ class vaopt_state {
return ERROR;
  }
++m_state;
+   if (m_update == ERROR)
+ {
+   if (m_arg == NULL)
+ m_update = INCLUDE;
+   else
+ {
+   m_update = DROP;
+   if (!m_arg->expanded)
+ expand_arg (m_pfile, m_arg);
+   for (unsigned idx = 0; idx < m_arg->expanded_count; ++idx)
+ if (m_arg->expanded[idx]->type != CPP_PADDING)
+   {
+ m_update = INCLUDE;
+ break;
+   }
+ }
+ }
return DROP;
}
  else if (m_state >= 2)
@@ -197,7 +217,7 @@ class vaopt_state {
return END;
  }
  }
-   return m_allowed ? INCLUDE : DROP;
+   return m_update;
}
  
  /* Nothing to do with __VA_OPT__.  */

@@ -219,8 +239,9 @@ class vaopt_state {
/* The cpp_reader.  */
cpp_reader *m_pfile;
  
-  /* True if there were varargs.  */

-  bool m_allowed;
+  /* The __VA_ARGS__ argument.  */
+  macro_arg *m_arg;
+
/* True if the macro is variadic.  */
bool m_variadic;
/* If true, the previous token was ##.  This is used to detect when
@@ -239,6 +260,10 @@ class vaopt_state {
  
/* Location of the __VA_OPT__ token.  */

location_t m_location;
+
+  /* If __VA_ARGS__ substitutes to no preprocessing tokens,
+ INCLUDE, otherwise DROP.  ERROR when unknown yet.  */
+  update_type m_update;
  };
  
  /* Macro expansion.  */

@@ -256,7 +281,6 @@ static _cpp_buff *collect_args (cpp_read
_cpp_buff **, unsigned *);
  static cpp_context *next_context (cpp_reader *);
  static const cpp_token *padding_token (cpp_reader *, const cpp_token *);
-static void expand_arg (cpp_reader *, macro_arg *);
  static const cpp_token *new_string_token 

Re: [PATCH]Several intrinsic macros lack a closing parenthesis[PR93274]

2020-02-13 Thread Uros Bizjak
On Fri, Feb 14, 2020 at 7:03 AM Hongtao Liu  wrote:
>
> On Thu, Feb 13, 2020 at 5:31 PM Hongtao Liu  wrote:
> >
> > On Thu, Feb 13, 2020 at 5:12 PM Uros Bizjak  wrote:
> > >
> > > On Thu, Feb 13, 2020 at 9:53 AM Jakub Jelinek  wrote:
> > > >
> > > > On Thu, Feb 13, 2020 at 09:39:05AM +0100, Uros Bizjak wrote:
> > > > > > Changelog
> > > > > > gcc/
> > > > > >* config/i386/avx512vbmi2intrin.h
> > > > > >(_mm512_[,mask_,maskz_]shrdi_epi16,
> > > > > >_mm512_[,mask_,maskz_]shrdi_epi32,
> > > > > >_m512_[,mask_,maskz_]shrdi_epi64,
> > > > > >_mm512_[,mask_,maskz_]shldi_epi16,
> > > > > >_mm512_[,mask_,maskz_]shldi_epi32,
> > > > > >_m512_[,mask_,maskz_]shldi_epi64): Fix typo of lacking a
> > > > > >closing parenthesis.
> > > > > >* config/i386/avx512vbmi2vlintrin.h
> > > > > >(_mm256_[,mask_,maskz_]shrdi_epi16,
> > > > > >_mm256_[,mask_,maskz_]shrdi_epi32,
> > > > > >_m256_[,mask_,maskz_]shrdi_epi64,
> > > > > >_mm_[,mask_,maskz_]shrdi_epi16,
> > > > > >_mm_[,mask_,maskz_]shrdi_epi32,
> > > > > >_mm_[,mask_,maskz_]shrdi_epi64,
> > > > > >_mm256_[,mask_,maskz_]shldi_epi16,
> > > > > >_mm256_[,mask_,maskz_]shldi_epi32,
> > > > > >_m256_[,mask_,maskz_]shldi_epi64,
> > > > > >_mm_[,mask_,maskz_]shldi_epi16,
> > > > > >_mm_[,mask_,maskz_]shldi_epi32,
> > > > > >_mm_[,mask_,maskz_]shldi_epi64): Ditto.
> > > > > >
> > > > > > gcc/testsuite/
> > > > > >* gcc.target/i386/avx512vbmi2-vpshld-1.c: New test.
> > > > > >* gcc.target/i386/avx512vbmi2-vpshld-O0-1.c: Ditto.
> > > > > >* gcc.target/i386/avx512vbmi2-vpshrd-1.c: Ditto.
> > > > > >* gcc.target/i386/avx512vbmi2-vpshrd-O0-1.c: Ditto.
> > > > > >* gcc.target/i386/avx512vl-vpshld-O0-1.c: Ditto.
> > > > > >* gcc.target/i386/avx512vl-vpshrd-O0-1.c: Ditto.
> > > > >
> > > > > This is obvious patch, so OK for mainline and backports.
> > > >
> > > > The header changes sure, but for the testsuite, the standard way
> > > > would be to have it covered in the standard tests we have for this.
> > > > I think that is gcc.target/i386/sse-{13,14,22a,23}.c, so it would be 
> > > > worth
> > > > trying to figure out why it hasn't caught that.
> > >
> > > Indeed. It looks that these macros are not listed in sse-14.c, which
> > > would catch the problem. So, there is no need for new -O0 tests,
> > > please add missing functions to sse-14.c and sse-22.c testcases. I was
> > > also surprised that no testsuite coverage for vbmi2 functions was
> > > added at submission.
> > >
> > Yes, i saw that, thanks.
> > > Uros.
> > >
> > > > And, I don't think we allow any wildcards etc. (and 
> > > > [,whatever,whateverelse]
> > > > isn't even one, neither regexp nor shell wildcard) in the names of 
> > > > functions
> > > > changed, they can appear in the description text, but for the names of
> > > > macros one needs to list them all expanded, people do grep for those.
> > > >
> > > > Jakub
> > > >
> >
> >
> >
> > --
> > BR,
> > Hongtao
>
> Update patch:
> Update Changelog, delete O0 testcase, and add testcase in sse-14.c, sse-22.c

OK.

Thanks,
Uros.

> --
> BR,
> Hongtao


Re: [PATCH, rs6000]: mark clobber for registers changed by untpyed_call

2020-02-13 Thread Jiufu Guo
Segher Boessenkool  writes:

> On Sat, Feb 08, 2020 at 10:17:42AM -0600, Segher Boessenkool wrote:
>> And we do not know which of the register will be used for the return, in
>> untyped_call (only untyped-return knows).  But we can add clobbers of all
>> registers that *might* be used for the return, we do know that here, see
>> operands[2] of untyped_call.
>
> Clobbers in parallel to the call, I mean, not as separate insns later.
>
Thanks Segher, as discussed, we may refine the patch by adding a
'barrier' to avoid instructions mis-scheduled.  This improvement patch
maybe fine for practice.

Below is the updated patch: bootstrap and regtest pass on powerpc64le.
Is this ok to submit to trunk?
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index f3c8eb0..2fbe68f 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -10867,6 +10867,10 @@
 
   emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
 
+  for (int i = 0; i < XVECLEN (operands[2], 0); i++)
+emit_clobber (SET_SRC (XVECEXP (operands[2], 0, i)));
+  emit_insn (gen_blockage ());
+
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
 {
   rtx set = XVECEXP (operands[2], 0, i);
diff --git a/gcc/testsuite/gcc.dg/torture/stackalign/builtin-return-2.c 
b/gcc/testsuite/gcc.dg/torture/stackalign/builtin-return-2.c
new file mode 100644
index 000..7719109
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/stackalign/builtin-return-2.c
@@ -0,0 +1,40 @@
+/* PR target/93047 */
+/* Originator: Andrew Church  */
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -frename-registers" } */
+/* { dg-require-effective-target untyped_assembly } */
+
+#ifdef __MMIX__
+/* No parameters on stack for bar.  */
+#define STACK_ARGUMENTS_SIZE 0
+#else
+#define STACK_ARGUMENTS_SIZE 64
+#endif
+
+extern void abort(void);
+
+int foo(int n)
+{
+  return n+1;
+}
+
+int bar(int n)
+{
+  __builtin_return(__builtin_apply((void (*)(void))foo, __builtin_apply_args(),
+  STACK_ARGUMENTS_SIZE));
+}
+
+int main(void)
+{
+  /* Allocate 64 bytes on the stack to make sure that __builtin_apply
+ can read at least 64 bytes above the return address.  */
+  char dummy[64];
+
+  __asm__ ("" : : "" (dummy));
+
+  if (bar(1) != 2)
+abort();
+
+  return 0;
+}
+
>
> Segher


Re: [PATCH]Several intrinsic macros lack a closing parenthesis[PR93274]

2020-02-13 Thread Hongtao Liu
On Thu, Feb 13, 2020 at 5:31 PM Hongtao Liu  wrote:
>
> On Thu, Feb 13, 2020 at 5:12 PM Uros Bizjak  wrote:
> >
> > On Thu, Feb 13, 2020 at 9:53 AM Jakub Jelinek  wrote:
> > >
> > > On Thu, Feb 13, 2020 at 09:39:05AM +0100, Uros Bizjak wrote:
> > > > > Changelog
> > > > > gcc/
> > > > >* config/i386/avx512vbmi2intrin.h
> > > > >(_mm512_[,mask_,maskz_]shrdi_epi16,
> > > > >_mm512_[,mask_,maskz_]shrdi_epi32,
> > > > >_m512_[,mask_,maskz_]shrdi_epi64,
> > > > >_mm512_[,mask_,maskz_]shldi_epi16,
> > > > >_mm512_[,mask_,maskz_]shldi_epi32,
> > > > >_m512_[,mask_,maskz_]shldi_epi64): Fix typo of lacking a
> > > > >closing parenthesis.
> > > > >* config/i386/avx512vbmi2vlintrin.h
> > > > >(_mm256_[,mask_,maskz_]shrdi_epi16,
> > > > >_mm256_[,mask_,maskz_]shrdi_epi32,
> > > > >_m256_[,mask_,maskz_]shrdi_epi64,
> > > > >_mm_[,mask_,maskz_]shrdi_epi16,
> > > > >_mm_[,mask_,maskz_]shrdi_epi32,
> > > > >_mm_[,mask_,maskz_]shrdi_epi64,
> > > > >_mm256_[,mask_,maskz_]shldi_epi16,
> > > > >_mm256_[,mask_,maskz_]shldi_epi32,
> > > > >_m256_[,mask_,maskz_]shldi_epi64,
> > > > >_mm_[,mask_,maskz_]shldi_epi16,
> > > > >_mm_[,mask_,maskz_]shldi_epi32,
> > > > >_mm_[,mask_,maskz_]shldi_epi64): Ditto.
> > > > >
> > > > > gcc/testsuite/
> > > > >* gcc.target/i386/avx512vbmi2-vpshld-1.c: New test.
> > > > >* gcc.target/i386/avx512vbmi2-vpshld-O0-1.c: Ditto.
> > > > >* gcc.target/i386/avx512vbmi2-vpshrd-1.c: Ditto.
> > > > >* gcc.target/i386/avx512vbmi2-vpshrd-O0-1.c: Ditto.
> > > > >* gcc.target/i386/avx512vl-vpshld-O0-1.c: Ditto.
> > > > >* gcc.target/i386/avx512vl-vpshrd-O0-1.c: Ditto.
> > > >
> > > > This is obvious patch, so OK for mainline and backports.
> > >
> > > The header changes sure, but for the testsuite, the standard way
> > > would be to have it covered in the standard tests we have for this.
> > > I think that is gcc.target/i386/sse-{13,14,22a,23}.c, so it would be worth
> > > trying to figure out why it hasn't caught that.
> >
> > Indeed. It looks that these macros are not listed in sse-14.c, which
> > would catch the problem. So, there is no need for new -O0 tests,
> > please add missing functions to sse-14.c and sse-22.c testcases. I was
> > also surprised that no testsuite coverage for vbmi2 functions was
> > added at submission.
> >
> Yes, i saw that, thanks.
> > Uros.
> >
> > > And, I don't think we allow any wildcards etc. (and 
> > > [,whatever,whateverelse]
> > > isn't even one, neither regexp nor shell wildcard) in the names of 
> > > functions
> > > changed, they can appear in the description text, but for the names of
> > > macros one needs to list them all expanded, people do grep for those.
> > >
> > > Jakub
> > >
>
>
>
> --
> BR,
> Hongtao

Update patch:
Update Changelog, delete O0 testcase, and add testcase in sse-14.c, sse-22.c

-- 
BR,
Hongtao


0001-Intrinsic-macro-of-vpshr-and-vpshl-lack-a-closing-pa.patch
Description: Binary data


Re: [PATCH 2/3] libstdc++: Implement C++20 constrained algorithms

2020-02-13 Thread François Dumont

Thanks for those additional information.

I still think that the same way we rely on STL algos for 
push_heap/pop_heap/... we should do it for copy/move/... from 
stl_algobase.h for RAI.


In fact range algos that are already trying to call C functions should 
just try to call the STL counterparts which will then forward to C 
functions.


On 2/13/20 8:00 PM, Jonathan Wakely wrote:

On 13/02/20 19:07 +0100, François Dumont wrote:

On 2/4/20 3:07 AM, Patrick Palka wrote:

This patch implements the C++20 ranges overloads for the algorithms in
[algorithms].  Most of the algorithms were reimplemented, with each 
of their

implementations very closely following the existing implementation in
bits/stl_algo.h and bits/stl_algobase.h.  The reason for 
reimplementing most of
the algorithms instead of forwarding to their STL-style overload is 
because
forwarding cannot be conformantly and efficiently performed for 
algorithms that

operate on non-random-access iterators.

Why ? Do you have a clear counter-example ?

Maybe at the time you wrote this code those algos were not constexpr 
qualified, but they are now.


It has nothing to do with constexpr.

If you call a ranges algo with an iterator and a sentinel that is a
different type to the iterator, you can't call the old STL algo unless
you can efficiently get an end iterator that refers to the same
position as the sentinel. For random access iterators that is:

auto last2 = first + (last - first);

But for non-random access iterators finding the distance between first
and last is not possible in O(1), and incrementing first by that
distance is also not possible in O(1).


  But algorithms that operate on random
access iterators can safely and efficiently be forwarded to the 
STL-style
implementation, and this patch does so for push_heap, pop_heap, 
make_heap,
sort_heap, sort, stable_sort, nth_element, inplace_merge and 
stable_partition.
IMHO we should try as much as possible to forward to algos in 
stl_algobase.h.


That would not be conforming in many cases.

The old code assumes iterators can be copied. It assumes they have an
iterator_category. It assumes they have operator->(). Most
importantly, it assumes the begin and end iterators have the same
type.

The old algorithms do tag dispatching, which adds function call
overhead at run-time and overload resolution overhead at compile-time.

The new constrained algos can be implemented much more cleanly using
if-constexpr and the new iterator concepts.

There are very good reasons to reimplement the new ranges algos.

Those are highly customized and will be even more in the future when 
some patches I have on my side will be integrated (I hope).


If you do so you won't have to care much about _GLIBCXX_DEBUG iterators.



What's missing from this patch is debug-iterator and container 
specializations
that are present for some of the STL-style algorithms that need to 
be ported
over to the ranges algos.  I marked them missing at TODO comments.  
There are

also some other minor outstanding TODOs.

The code that could use the most thorough review is 
ranges::__copy_or_move,

ranges::__copy_or_move_backward, ranges::__equal and
ranges::__lexicographical_compare.  In the tests, I tried to test 
the interface
of each new overload, as well as the correctness of the new 
implementation.










[PATCH] testsuite/strlenopt-81.c: Add target limitation.

2020-02-13 Thread Kito Cheng
 - strlenopt-81.c has same limitation as strlenopt-80.c, this
   optimization only work when memcpy expand into load/store.

ChangeLog

gcc/testsuite

Kito Cheng  

* gcc.dg/strlenopt-81.c: Add target limitation.
---
 gcc/testsuite/gcc.dg/strlenopt-81.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/strlenopt-81.c 
b/gcc/testsuite/gcc.dg/strlenopt-81.c
index 95ac29aa71f..4a0dfc4f17d 100644
--- a/gcc/testsuite/gcc.dg/strlenopt-81.c
+++ b/gcc/testsuite/gcc.dg/strlenopt-81.c
@@ -1,5 +1,9 @@
 /* PR tree-optimization/ - fold strlen relational expressions
-   { dg-do run }
+   { dg-do run { target aarch64*-*-* i?86-*-* powerpc*-*-* x86_64-*-* } }
+   The optimization is only implemented for MEM_REF stores and other
+   targets than those below may not transform the memcpy call into
+   such a store.
+
{ dg-options "-O2 -Wall -Wno-unused-local-typedefs -fdump-tree-optimized" } 
*/
 
 typedef __SIZE_TYPE__ size_t;
-- 
2.25.0



[PATCH] aarch64: Allow -mcpu=generic -march=armv8.5-a

2020-02-13 Thread apinski
From: Andrew Pinski 

Right if someone supplies a -mcpu= option and then overrides
that option with -march=*, we get a warning when they conflict.
What we need is a generic cpu for each arch level but that is not
that useful because the only difference would be the arch level.
The best option is to allow -mcpu=generic -march=armv8.5-a not to
warn and that is now a generic armv8.5-a arch.

OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

ChangeLog:
* config/aarch64/aarch64.c (aarch64_override_options): Don't
warn when the selected cpu was generic.
---
 gcc/config/aarch64/aarch64.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4a34dce..9173afe 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14075,10 +14075,12 @@ aarch64_override_options (void)
explicit_tune_core = selected_tune->ident;
 }
   /* If both -mcpu and -march are specified check that they are architecturally
- compatible, warn if they're not and prefer the -march ISA flags.  */
+ compatible, warn if they're not and prefer the -march ISA flags.
+ Only warn if not using the generic cpu.  */
   else if (selected_arch)
 {
-  if (selected_arch->arch != selected_cpu->arch)
+  if (selected_cpu->ident != generic
+ && selected_arch->arch != selected_cpu->arch)
{
  warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
   all_architectures[selected_cpu->arch].name,
-- 
1.8.3.1



Re: [PATCH] tree-optimization/93661 properly guard tree_to_poly_int64

2020-02-13 Thread Hans-Peter Nilsson
On Tue, 11 Feb 2020, Richard Biener wrote:
> Bootstrapped / tested on x86_64-unknown-linux-gnu, pushed.

> diff --git a/gcc/testsuite/gcc.dg/pr93661.c b/gcc/testsuite/gcc.dg/pr93661.c
> new file mode 100644
> index 000..e311ba545c4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr93661.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */

Missing int32plus qualifier considering the assumed-to-fit
constants?

> +
> +int f ()
> +{
> +  unsigned x = 0x;
> +  __builtin_memset (1+(char *) , 0, -1); /* { dg-warning "maximum object 
> size" } */
> +  return (x != 0xf000);

brgds, H-P
PS. no, doesn't fail for targets I care for, just random.


[PATCH] rs6000: fixinc: Skip machine_name fix for powerpc*-*-linux*

2020-02-13 Thread Segher Boessenkool
From: Matheus Castanho 

Some system headers can be broken by the machine_name fix performed
by GCC during the fixincludes step. According to the comment in
fixincludes/fixinc.h:130 :

   On some platforms, machine_name doesn't work properly and
   breaks some of the header files.  Since everything works
   properly without it, just wipe the macro list to
   disable the fix.

So we can just skip it to avoid trouble.

fixincludes/
* fixinc.in: Skip machine_name fix on powerpc*-*-linux*.
---
 fixincludes/fixinc.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fixincludes/fixinc.in b/fixincludes/fixinc.in
index cd0b458..de5a37f 100755
--- a/fixincludes/fixinc.in
+++ b/fixincludes/fixinc.in
@@ -136,7 +136,7 @@ fi
 #  disable the fix.
 
 case "${target_canonical}" in
-*-*-vxworks*)
+*-*-vxworks* | powerpc*-*-linux*)
test -f ${MACRO_LIST} &&  echo > ${MACRO_LIST}
 ;;
 esac
-- 
1.8.3.1



Re: [PATCH] configure: Re-disable building cross-gdbserver

2020-02-13 Thread Maciej W. Rozycki
On Wed, 12 Feb 2020, Pedro Alves wrote:

> >  That's actually quite similar to what I considered first, before I 
> > changed my mind.  Whatever.
> 
> Doing it in gdbserver/ has the advantage that it stays under gdbserver's
> control, so it doesn't need syncing code with the gcc tree.  I know of at
> least one off-tree port that uses gdbserver in a host != target scenario,
> so I imagine that this condition will evolve over time.

 Sure, that makes sense to me.

> >  case "${host}" in
> > +  ${target})
> > +   gdbserver_host=${host}
> > +   ;;
> > +  *)
> > +   gdbserver_host=NONE
> > +   ;;
> if/else reads more to-the-point to me, so I tweaked it that
> way, and merged it in (to binutils-gdb), like below.

 Great, thanks for handling this!

> I'm sorry for not noticing your earlier patch.

 No worries, I'm glad we've got this sorted.

  Maciej


Re: [PATCH v2] c++: Fix ICE with template codes in check_narrowing [PR91465]

2020-02-13 Thread Jason Merrill

On 2/11/20 5:06 PM, Marek Polacek wrote:

On Sun, Feb 09, 2020 at 01:51:13PM +0100, Jason Merrill wrote:

On 2/6/20 7:30 PM, Marek Polacek wrote:

In ed4f2c001a883b2456fc607a33f1c59f9c4ee65d I changed the call to
fold_non_dependent_expr in check_narrowing to maybe_constant_value.
That was the wrong thing to do as these tests show: check_narrowing
bails out for dependent expressions but we can still have template
codes like CAST_EXPR that don't have anything dependent in it so are
considered non-dependent.  But cxx_eval_* don't grok template codes,
so we need to call fold_non_dependent_expr instead which knows what
to do with template codes.  (I fully accept a "told you so".)

I'm passing tf_none to it, otherwise we'd emit a bogus error for
constexpr-ex4.C: there INIT is "A::operator int()" and while
instantiating this CALL_EXPR (in a template) we call finish_call_expr
and that sees a BASELINK and so emits a new dummy object for 'this',
and then we complain about the wrong number of arguments, because now
we basically have two 'this's.  Which is exactly the problem I saw
recently in c++/92948.


Yeah, the problem continues to be that build_converted_constant_expr is
breaking the boundary between template and non-template codes:
convert_like_real produces trees that aren't suitable for later
substitution, so substituting them breaks.  Perhaps if we're looking at a
non-dependent constant expression in a template,
build_converted_constant_expr should instantiate_non_dependent_expr, pass
the result to convert_like, and then if successful throw away the result in
favor of an IMPLICIT_CONV_EXPR.


That seems to work (if I adjust two spots to handle an I_C_E).  So something
like this?  I don't like that we create an I_C_E in convert_nontype_argument
and in build_converted_constant_expr too, but both are important.


Hmm, the one in convert_nontype_argument shouldn't be needed.

I see the pattern in e.g. build_explicit_specifier is

  expr = instantiate_non_dependent_expr_sfinae (expr, complain);
  /* Don't let convert_like_real create more template codes.  */
  processing_template_decl_sentinel s;
  expr = build_converted_constant_bool_expr (expr, complain);
  expr = cxx_constant_value (expr);

That has always seemed unpleasantly complicated.


+  /* convert_like produces trees that aren't suitable for
+substitution, so use an IMPLICIT_CONV_EXPR.  */
+  if (processing_template_decl
+ && is_nondependent_constant_expression (expr))
+   {
+ tree e = instantiate_non_dependent_expr (expr);
+ e = convert_like (conv, e, complain);
+ if (e != error_mark_node)
+   return build1 (IMPLICIT_CONV_EXPR, type, expr);
+   }


Shouldn't the above be after setting the check_narrowing flags?  We 
could simplify the pattern above by handling all of it here: 
instantiate, set the sentinel, convert, maybe_constant_value, return 
either the constant result or an IMPLICIT_CONV_EXPR.



conv->check_narrowing = true;
conv->check_narrowing_const_only = true;
expr = convert_like (conv, expr, complain);
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 31a556a0a1f..5eb91007e9e 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -10281,9 +10281,8 @@ compute_array_index_type_loc (location_t name_loc, tree 
name, tree size,
  /* Pedantically a constant expression is required here and so
 __builtin_is_constant_evaluated () should fold to true if it
 is successfully folded into a constant.  */
- size = maybe_constant_value (size, NULL_TREE,
-  /*manifestly_const_eval=*/true);
-
+ size = fold_non_dependent_expr (size, complain,
+ /*manifestly_const_eval=*/true);


With the change I suggest above we would drop both this call and the 
earlier instantiate_ call.  In general it's wrong to call 
*_non_dependent_expr twice on the same expression.



  if (!TREE_CONSTANT (size))
size = origsize;
}
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index c2d3a98b1c5..5d207da2f5b 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -7099,6 +7099,13 @@ convert_nontype_argument (tree type, tree expr, 
tsubst_flags_t complain)
/* Make sure we return NULL_TREE only if we have really issued
   an error, as described above.  */
return (complain & tf_error) ? NULL_TREE : error_mark_node;
+ /* Don't pass any IMPLICIT_CONV_EXPRs to maybe_constant_value
+because that can't handle it.  */
+ else if (TREE_CODE (expr) == IMPLICIT_CONV_EXPR)
+   {
+ IMPLICIT_CONV_EXPR_NONTYPE_ARG (expr) = true;
+ return expr;
+   }
  expr = maybe_constant_value (expr, NULL_TREE,
   /*manifestly_const_eval=*/true);


And this whole if block should also be able to reduce to

  expr = build_converted_constant_expr (type, 

Re: [PATCH] c++: Fix hashing and testing for equality of ATOMIC_CONST_EXPRs

2020-02-13 Thread Patrick Palka
On Fri, 14 Feb 2020, Jason Merrill wrote:

> On 2/12/20 5:15 PM, Patrick Palka wrote:
> > Two equal atomic constraint expressions do not necessarily share the same
> > tree,
> > so we can't assume that two ATOMIC_CONST_EXPRs are equal if and only if they
> > point to the same tree.
> 
> This is incorrect; comparison of atomic constraints is based on them coming
> from the same actual tokens, which is why we use pointer comparison.
> 
> In your concepts-partial-spec7.C, test2 is properly rejected.  If you want to
> be able to write the same constraint in multiple places, you need to use a
> concept.
> 
> 13.5.1.2:
> 
> Two atomic constraints are identical if they are formed from the same
> expression and the targets of the parameter mappings are equivalent according
> to the rules for expressions described in 13.7.6.1.

I see, that makes complete sense now.  I was stumbling over what the
spec meant by "the same expression."  Thanks for clearing up my
misunderstanding.



Re: [PING^7][PATCH 0/4] Fix library testsuite compilation for build sysroot

2020-02-13 Thread Maciej W. Rozycki
On Sun, 26 Jan 2020, Jeff Law wrote:

> >  Ping for the build system parts of:
> > 
> > 
> > 
> OK.  Sorry for the terrible slowness on this kit.

 No worries, completely understood with all the attention required by the 
switch to Git.

 Thank you for your review.  However Julian and Chung-Lin reported issues 
with the libgomp part previously committed and I have concluded all the 
changes have to be completely rewritten.  I have posted v2 now.

  Maciej


[PATCH v2 4/4] libgomp/test: Remove a build sysroot fix regression

2020-02-13 Thread Maciej W. Rozycki
Fix a problem with commit c8e759b4215b ("libgomp/test: Fix compilation 
for build sysroot") that caused a regression in some standalone test 
environments where testsuite/libgomp-test-support.exp is used, but the 
compiler is expected to be determined by `[find_gcc]', and use the 
`--tool_exec' option to `runtest' passed via $(AM_RUNTESTFLAGS) with 
$(CC) as the argument rather than the GCC_UNDER_TEST TCL variable in 
testsuite/libgomp-test-support.exp.

libgomp/
* testsuite/libgomp-test-support.exp.in (GCC_UNDER_TEST): Remove 
variable.
* testsuite/Makefile.am (AM_RUNTESTFLAGS): New variable.
* testsuite/Makefile.in: Regenerate.
---
Applies on top of v1.
---
 libgomp/testsuite/Makefile.am |1 +
 libgomp/testsuite/Makefile.in |1 +
 libgomp/testsuite/libgomp-test-support.exp.in |2 --
 3 files changed, 2 insertions(+), 2 deletions(-)

gcc-test-libgomp-runtestflags-tool-exec.diff
Index: gcc/libgomp/testsuite/Makefile.am
===
--- gcc.orig/libgomp/testsuite/Makefile.am
+++ gcc/libgomp/testsuite/Makefile.am
@@ -11,6 +11,7 @@ EXPECT = $(shell if test -f $(top_buildd
 _RUNTEST = $(shell if test -f $(top_srcdir)/../dejagnu/runtest; then \
 echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi)
 RUNTESTDEFAULTFLAGS = --tool $$tool --srcdir $$srcdir
+AM_RUNTESTFLAGS = --tool_exec "$(CC)"
 
 # Instead of directly in ../testsuite/libgomp-test-support.exp.in, the
 # following variables have to be "routed through" this Makefile, for expansion
Index: gcc/libgomp/testsuite/Makefile.in
===
--- gcc.orig/libgomp/testsuite/Makefile.in
+++ gcc/libgomp/testsuite/Makefile.in
@@ -310,6 +310,7 @@ _RUNTEST = $(shell if test -f $(top_srcd
 echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi)
 
 RUNTESTDEFAULTFLAGS = --tool $$tool --srcdir $$srcdir
+AM_RUNTESTFLAGS = --tool_exec "$(CC)"
 all: all-am
 
 .SUFFIXES:
Index: gcc/libgomp/testsuite/libgomp-test-support.exp.in
===
--- gcc.orig/libgomp/testsuite/libgomp-test-support.exp.in
+++ gcc/libgomp/testsuite/libgomp-test-support.exp.in
@@ -1,5 +1,3 @@
-set GCC_UNDER_TEST {@CC@}
-
 set cuda_driver_include "@CUDA_DRIVER_INCLUDE@"
 set cuda_driver_lib "@CUDA_DRIVER_LIB@"
 set hsa_runtime_lib "@HSA_RUNTIME_LIB@"


[PATCH v2 1/4] libatomic/test: Fix compilation for build sysroot

2020-02-13 Thread Maciej W. Rozycki
Fix a problem with the libatomic testsuite using a method to determine 
the compiler to use resulting in the tool being different from one the 
library has been built with, and causing a catastrophic failure from the 
lack of a suitable `--sysroot=' option where the `--with-build-sysroot=' 
configuration option has been used to build the compiler resulting in 
the inability to link executables.

Address this problem by passing the `--tool_exec' option to `runtest' 
via $(AM_RUNTESTFLAGS) with $(CC) as the argument, which will have all 
the required options set for the target compiler to build executables in 
the environment configured, removing failures like:

.../bin/riscv64-linux-gnu-ld: cannot find crt1.o: No such file or directory
.../bin/riscv64-linux-gnu-ld: cannot find -lm
collect2: error: ld returned 1 exit status
compiler exited with status 1
FAIL: libatomic.c/atomic-compare-exchange-1.c (test for excess errors)
Excess errors:
.../bin/riscv64-linux-gnu-ld: cannot find crt1.o: No such file or directory
.../bin/riscv64-linux-gnu-ld: cannot find -lm

UNRESOLVED: libatomic.c/atomic-compare-exchange-1.c compilation failed to 
produce executable

and bringing overall test results for the `riscv64-linux-gnu' target 
(here with the `x86_64-linux-gnu' host and RISC-V QEMU in the Linux user 
emulation mode as the target board) from:

=== libatomic Summary ===

# of unexpected failures27
# of unresolved testcases   27

to:

=== libatomic Summary ===

# of expected passes54

libatomic/
* testsuite/Makefile.am (AM_RUNTESTFLAGS): New variable.
* testsuite/Makefile.in: Regenerate.
---
Changes from v1:

- Remove testsuite/libatomic-test-support.exp.in and the associated 
  changes.

- Pass $(CC) via `--tool_exec' in $(AM_RUNTESTFLAGS).
---
 libatomic/testsuite/Makefile.am |1 +
 libatomic/testsuite/Makefile.in |1 +
 2 files changed, 2 insertions(+)

patches/gcc-test-libatomic-runtestflags-tool-exec.diff
Index: gcc/libatomic/testsuite/Makefile.am
===
--- gcc.orig/libatomic/testsuite/Makefile.am
+++ gcc/libatomic/testsuite/Makefile.am
@@ -11,3 +11,4 @@ EXPECT = $(shell if test -f $(top_buildd
 _RUNTEST = $(shell if test -f $(top_srcdir)/../dejagnu/runtest; then \
 echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi)
 RUNTEST = $(_RUNTEST) $(AM_RUNTESTFLAGS)
+AM_RUNTESTFLAGS = --tool_exec "$(CC)"
Index: gcc/libatomic/testsuite/Makefile.in
===
--- gcc.orig/libatomic/testsuite/Makefile.in
+++ gcc/libatomic/testsuite/Makefile.in
@@ -278,6 +278,7 @@ _RUNTEST = $(shell if test -f $(top_srcd
 echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi)
 
 RUNTEST = $(_RUNTEST) $(AM_RUNTESTFLAGS)
+AM_RUNTESTFLAGS = --tool_exec "$(CC)"
 all: all-am
 
 .SUFFIXES:


[PATCH v2 0/4] Fix library testsuite compilation for build sysroot

2020-02-13 Thread Maciej W. Rozycki
Hi,

 This is v2 of patch series, originally posted here:






meant to address a problem with the testsuite compiler being set up across 
libatomic, libffi, libgo, libgomp with no correlation whatsoever to the 
target compiler being used in GCC compilation.  Consequently there in no 
arrangement made to set up the compilation sysroot according to the build 
sysroot specified for GCC compilation, causing a catastrophic failure 
across the testsuites affected from the inability to link executables.

 Julian reported an issue triggered in his standalone test environment by 
the original version of the libgomp part of this patch series, which is 
one of the two patches from the series that have been already applied.

 The cause has been the newly-added definition of GCC_UNDER_TEST embedded 
in the generated libgomp/testsuite/libgomp-test-support.exp configuration 
file, which is used in Julian's setup which also relies on the invocation 
of `[find_gcc]' to locate the correct compiler to use.

 This invocation is unsuitable for in-tree testing as we want to use 
exactly the same compiler invocation, as recorded in $(CC), as used to 
build the library.  So I have worked with Chung-Lin on a replacement 
version, which would work with such standalone testing while still 
satisfying the original requirement to use $(CC).

 In the end I have decided to use the documented `--tool_exec' option to 
`runtest' to contain the change within the testsuite's Makefile and its 
`check' goal, which is inherent to the build tree and as such not supposed 
to be used in standalone testing, like with `contrib/test_installed'.

 I am quite sure the problem is specific to libgomp only, as it's the only 
of the four libraries handled that had a preexisting *-test-support.exp 
test configuration file, and the remaining three only got one with the 
changes from the series.  However to keep things consistent across the 
tree I propose to use the `--tool_exec' option for all four libraries.

 This in particular means reverting the whole of the libgomp change as 
well as a part of the libgo change, both already applied, in addition to 
making further adjustments.  For libatomic and libffi this updated 
proposal merely replaces the original one.

 Verified with a cross-compiler configured for the `riscv-linux-gnu' 
target and the `x86_64-linux-gnu' host and using RISC-V/Linux QEMU in the 
user emulation mode as the target board.  Also no change in results with 
`x86_64-linux-gnu' native regression testing.

 See individual change descriptions for details.

 I'm assuming Ian will take care of the 3/4 libgo change; OK to apply the 
remaining ones to the GCC repo?

  Maciej


[PATCH v2 3/4] libgo/test: Complement compilation fix for build sysroot

2020-02-13 Thread Maciej W. Rozycki
Complement commit b72813a68c94 ("libgo: fix DejaGNU testsuite compiler 
when using build sysroot") and use the `--tool_exec' option to `runtest' 
via $(AM_RUNTESTFLAGS) with $(GOC) as the argument rather than the 
GOC_UNDER_TEST TCL variable to set the compiler to use with the test 
suite, for consistency with the other top-level GCC target libraries.  
Update testsuite/lib/libgo.exp to actually handle the option by using 
the TOOL_EXECUTABLE TCL variable.

libgo/
* configure.ac: Remove testsuite/libgo-test-support.exp from 
output files.
* configure: Regenerate.
* testsuite/libgo-test-support.exp.in: Remove file.
* testsuite/Makefile.am (EXTRA_DEJAGNU_SITE_CONFIG): Remove
variable.
(AM_RUNTESTFLAGS): Add `--tool_exec' option.
* testsuite/Makefile.in: Regenerate.
* testsuite/lib/libgo.exp: Handle TOOL_EXECUTABLE.
---
Applies on top of v1.
---
 libgo/configure   |3 +--
 libgo/configure.ac|2 +-
 libgo/testsuite/Makefile.am   |4 +---
 libgo/testsuite/Makefile.in   |7 ++-
 libgo/testsuite/lib/libgo.exp |   12 
 libgo/testsuite/libgo-test-support.exp.in |   17 -
 6 files changed, 13 insertions(+), 32 deletions(-)

gcc-test-libgo-runtestflags-tool-exec.diff
Index: gcc/libgo/configure
===
--- gcc.orig/libgo/configure
+++ gcc/libgo/configure
@@ -15880,7 +15880,7 @@ else
   multilib_arg=
 fi
 
-ac_config_files="$ac_config_files Makefile testsuite/Makefile 
testsuite/libgo-test-support.exp"
+ac_config_files="$ac_config_files Makefile testsuite/Makefile"
 
 
 ac_config_commands="$ac_config_commands default"
@@ -17061,7 +17061,6 @@ do
 "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
 "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
 "testsuite/Makefile") CONFIG_FILES="$CONFIG_FILES testsuite/Makefile" ;;
-"testsuite/libgo-test-support.exp") CONFIG_FILES="$CONFIG_FILES 
testsuite/libgo-test-support.exp" ;;
 "default") CONFIG_COMMANDS="$CONFIG_COMMANDS default" ;;
 
   *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
Index: gcc/libgo/configure.ac
===
--- gcc.orig/libgo/configure.ac
+++ gcc/libgo/configure.ac
@@ -889,7 +889,7 @@ else
   multilib_arg=
 fi
 
-AC_CONFIG_FILES(Makefile testsuite/Makefile testsuite/libgo-test-support.exp)
+AC_CONFIG_FILES(Makefile testsuite/Makefile)
 
 AC_CONFIG_COMMANDS([default],
 [if test -n "$CONFIG_FILES"; then
Index: gcc/libgo/testsuite/Makefile.am
===
--- gcc.orig/libgo/testsuite/Makefile.am
+++ gcc/libgo/testsuite/Makefile.am
@@ -11,13 +11,11 @@ RUNTEST = `if [ -f $(top_srcdir)/../deja
   echo $(top_srcdir)/../dejagnu/runtest ; \
else echo runtest; fi`
 
-EXTRA_DEJAGNU_SITE_CONFIG = libgo-test-support.exp
-
 # When running the tests we set GCC_EXEC_PREFIX to the install tree so that
 # files that have already been installed there will be found.  The -B option
 # overrides it, so use of GCC_EXEC_PREFIX will not result in using GCC files
 # from the install tree.
 
-AM_RUNTESTFLAGS = "TEST_GCC_EXEC_PREFIX=$(libdir)/gcc"
+AM_RUNTESTFLAGS = --tool_exec "$(GOC)" "TEST_GCC_EXEC_PREFIX=$(libdir)/gcc"
 
 CLEANFILES = *.log *.sum
Index: gcc/libgo/testsuite/Makefile.in
===
--- gcc.orig/libgo/testsuite/Makefile.in
+++ gcc/libgo/testsuite/Makefile.in
@@ -107,7 +107,7 @@ am__configure_deps = $(am__aclocal_m4_de
 DIST_COMMON = $(srcdir)/Makefile.am
 mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
 CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES = libgo-test-support.exp
+CONFIG_CLEAN_FILES =
 CONFIG_CLEAN_VPATH_FILES =
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -300,13 +300,12 @@ RUNTEST = `if [ -f $(top_srcdir)/../deja
   echo $(top_srcdir)/../dejagnu/runtest ; \
else echo runtest; fi`
 
-EXTRA_DEJAGNU_SITE_CONFIG = libgo-test-support.exp
 
 # When running the tests we set GCC_EXEC_PREFIX to the install tree so that
 # files that have already been installed there will be found.  The -B option
 # overrides it, so use of GCC_EXEC_PREFIX will not result in using GCC files
 # from the install tree.
-AM_RUNTESTFLAGS = "TEST_GCC_EXEC_PREFIX=$(libdir)/gcc"
+AM_RUNTESTFLAGS = --tool_exec "$(GOC)" "TEST_GCC_EXEC_PREFIX=$(libdir)/gcc"
 CLEANFILES = *.log *.sum
 all: all-am
 
@@ -340,8 +339,6 @@ $(top_srcdir)/configure: @MAINTAINER_MOD
 $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 $(am__aclocal_m4_deps):
-libgo-test-support.exp: $(top_builddir)/config.status 
$(srcdir)/libgo-test-support.exp.in
-   cd 

[PATCH v2 2/4] libffi/test: Fix compilation for build sysroot

2020-02-13 Thread Maciej W. Rozycki
Fix a problem with the libffi testsuite using a method to determine the 
compiler to use resulting in the tool being different from one the 
library has been built with, and causing a catastrophic failure from the 
inability to actually choose any compiler at all in a cross-compilation
configuration.

Address this problem by passing the `--tool_exec' option to `runtest' 
via $(AM_RUNTESTFLAGS) with $(CC) as the argument, which will have all 
the required options set for the target compiler to build executables in 
the environment configured, removing failures like:

FAIL: libffi.call/closure_fn0.c -W -Wall -Wno-psabi -O0 (test for excess errors)
Excess errors:
default_target_compile: No compiler to compile with
UNRESOLVED: libffi.call/closure_fn0.c -W -Wall -Wno-psabi -O0 compilation 
failed to produce executable

and bringing overall test results for the `riscv64-linux-gnu' target 
(here with the `x86_64-linux-gnu' host and RISC-V QEMU in the Linux user 
emulation mode as the target board) from:

=== libffi Summary ===

# of unexpected failures708
# of unresolved testcases   708
# of unsupported tests  30

to:

=== libffi Summary ===

# of expected passes1934
# of unsupported tests  28

For consistency with other top-level target libraries also respect the 
GCC_UNDER_TEST TCL variable, which can be set e.g. with an argument to 
`runtest' in a standalone run.  Finally remove an unused TOOL_OPTIONS 
TCL variable instance.

libffi/
* testsuite/Makefile.am (AM_RUNTESTFLAGS): Add `--tool_exec' 
option.
* testsuite/Makefile.in: Regenerate.
* testsuite/lib/libffi.exp (libffi-init): Handle GCC_UNDER_TEST 
and TOOL_EXECUTABLE.
(libffi_target_compile): Use GCC_UNDER_TEST.
---
Changes from v1:

- Remove testsuite/libffi-test-support.exp.in and the associated changes.

- Pass $(CC) via `--tool_exec' in $(AM_RUNTESTFLAGS).
---
 libffi/testsuite/Makefile.am|2 +-
 libffi/testsuite/Makefile.in|2 +-
 libffi/testsuite/lib/libffi.exp |   16 ++--
 3 files changed, 16 insertions(+), 4 deletions(-)

gcc-test-libffi-runtestflags-tool-exec.diff
Index: gcc/libffi/testsuite/Makefile.am
===
--- gcc.orig/libffi/testsuite/Makefile.am
+++ gcc/libffi/testsuite/Makefile.am
@@ -11,7 +11,7 @@ RUNTEST = `if [ -f $(top_srcdir)/../deja
   echo $(top_srcdir)/../dejagnu/runtest ; \
else echo runtest; fi`
 
-AM_RUNTESTFLAGS =
+AM_RUNTESTFLAGS = --tool_exec "$(CC)"
 
 CLEANFILES = *.exe core* *.log *.sum
 
Index: gcc/libffi/testsuite/Makefile.in
===
--- gcc.orig/libffi/testsuite/Makefile.in
+++ gcc/libffi/testsuite/Makefile.in
@@ -134,7 +134,7 @@ ALLOCA = @ALLOCA@
 AMTAR = @AMTAR@
 AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
 AM_LTLDFLAGS = @AM_LTLDFLAGS@
-AM_RUNTESTFLAGS = 
+AM_RUNTESTFLAGS = --tool_exec "$(CC)"
 AR = @AR@
 AUTOCONF = @AUTOCONF@
 AUTOHEADER = @AUTOHEADER@
Index: gcc/libffi/testsuite/lib/libffi.exp
===
--- gcc.orig/libffi/testsuite/lib/libffi.exp
+++ gcc/libffi/testsuite/lib/libffi.exp
@@ -99,7 +99,8 @@ proc libffi-init { args } {
 global blddirffi
 global objdir
 global blddircxx
-global TOOL_OPTIONS
+global TOOL_EXECUTABLE
+global GCC_UNDER_TEST
 global tool
 global libffi_include
 global libffi_link_flags
@@ -123,7 +124,15 @@ proc libffi-init { args } {
 set ld_library_path "."
 append ld_library_path ":${gccdir}"
 
-set compiler "${gccdir}/xgcc"
+if ![info exists GCC_UNDER_TEST] then {
+   if [info exists TOOL_EXECUTABLE] {
+   set GCC_UNDER_TEST $TOOL_EXECUTABLE
+   } else {
+   set GCC_UNDER_TEST "[find_gcc]"
+   }
+}
+
+set compiler [lindex $GCC_UNDER_TEST 0]
 if { [is_remote host] == 0 && [which $compiler] != 0 } {
foreach i "[exec $compiler --print-multi-lib]" {
set mldir ""
@@ -175,11 +184,14 @@ proc libffi_target_compile { source dest
 global srcdir
 global blddirffi
 global TOOL_OPTIONS
+global GCC_UNDER_TEST
 global libffi_link_flags
 global libffi_include
 global target_triplet
 
 
+lappend options "compiler=$GCC_UNDER_TEST"
+
 if { [target_info needs_status_wrapper]!="" && [info exists gluefile] } {
lappend options "libs=${gluefile}"
lappend options "ldflags=$wrap_flags"


Re: [PATCH] c++: Fix value-init crash in template [PR93676]

2020-02-13 Thread Jason Merrill

On 2/11/20 8:54 PM, Marek Polacek wrote:

Since  we
attempt to value-initialize in build_vec_init even when there's no
initializer but the type has a constexpr default constructor.  But
build_value_init doesn't work in templates, so I think let's avoid
this scenario; we'll go to the normal build_aggr_init path then.

Bootstrapped/regtested on x86_64-linux, ok for trunk and branches?

PR c++/93676 - value-init crash in template.
* init.c (build_vec_init): Don't perform value-init in a template.


Hmm, we really shouldn't even be calling build_vec_init in a template, 
that builds up a lot of garbage that we'll throw away at the end of 
build_new.


Jason



Re: Update gcc.target/powerpc/pr92132-fp test for power7 and older

2020-02-13 Thread Segher Boessenkool
On Thu, Feb 13, 2020 at 04:41:09PM -0600, will schmidt wrote:
>   Attempting to clean up some more testcase failures.
> This test in particular exercises the -ftree-vectorize
> option, and by inspection I see this fails out with assorted
> "conversion not supported by target" messages on power7 and
> earlier systems.
> 
> Thus, this would limits the scan-dump check of "LOOP VECTORIZED" to
> those targets supporting power8 vector support.
> The testcase itself otherwise runs fine, so limiting the test
> itself seems unnecessary.

> -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" { target 
> p8vector_hw } } } */

That actually checks if the hardware is p8 or later, while what you care
about is what options are compiled with.  Say, if running on a p8 but
compiling for a p7 this will fail?


Segher


Re: [PATCH] c++: Fix poor diagnostic for array initializer [PR93710]

2020-02-13 Thread Jason Merrill

On 2/12/20 9:05 PM, Marek Polacek wrote:

A small improvement for an error in build_user_type_conversion_1:
instead of

array-init1.C:11:1: error: conversion from ‘long int’ to ‘A’ is ambiguous
11 | };
   | ^

we will print

array-init1.C:8:3: error: conversion from ‘long int’ to ‘A’ is ambiguous
 8 |   0L,
   |   ^~

Bootstrapped/regtested on x86_64-linux, ok for trunk?


OK.


2020-02-12  Marek Polacek  

PR c++/93710 - poor diagnostic for array initializer.
* call.c (build_user_type_conversion_1): Use cp_expr_loc_or_input_loc
for an error call.

* g++.dg/diagnostic/array-init1.C: New test.
---
  gcc/cp/call.c |  5 +++--
  gcc/testsuite/g++.dg/diagnostic/array-init1.C | 11 +++
  2 files changed, 14 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/diagnostic/array-init1.C

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 51621b7dd87..f47f96bf1c2 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -4172,8 +4172,9 @@ build_user_type_conversion_1 (tree totype, tree expr, int 
flags,
if (complain & tf_error)
{
  auto_diagnostic_group d;
- error ("conversion from %qH to %qI is ambiguous",
-fromtype, totype);
+ error_at (cp_expr_loc_or_input_loc (expr),
+   "conversion from %qH to %qI is ambiguous",
+   fromtype, totype);
  print_z_candidates (location_of (expr), candidates);
}
  
diff --git a/gcc/testsuite/g++.dg/diagnostic/array-init1.C b/gcc/testsuite/g++.dg/diagnostic/array-init1.C

new file mode 100644
index 000..78580ad6b83
--- /dev/null
+++ b/gcc/testsuite/g++.dg/diagnostic/array-init1.C
@@ -0,0 +1,11 @@
+// PR c++/93710 - poor diagnostic for array initializer.
+
+struct A { A (int); A (char*); int i; };
+
+int x;
+
+A a1[] = {
+  0L, // { dg-error "3:conversion from .long int. to .A. is ambiguous" }
+  , // { dg-error "3:invalid conversion from .int\\*. to .int." }
+  __builtin_offsetof (A, i) // { dg-error "23:conversion from .long unsigned int. 
to .A. is ambiguous" }
+};

base-commit: 5bfc8303ffe2d86e938d45f13cd99a39469dac4f





Re: [PATCH] c++: Fix hashing and testing for equality of ATOMIC_CONST_EXPRs

2020-02-13 Thread Jason Merrill

On 2/12/20 5:15 PM, Patrick Palka wrote:

Two equal atomic constraint expressions do not necessarily share the same tree,
so we can't assume that two ATOMIC_CONST_EXPRs are equal if and only if they
point to the same tree.


This is incorrect; comparison of atomic constraints is based on them 
coming from the same actual tokens, which is why we use pointer comparison.


In your concepts-partial-spec7.C, test2 is properly rejected.  If you 
want to be able to write the same constraint in multiple places, you 
need to use a concept.


13.5.1.2:

Two atomic constraints are identical if they are formed from the same 
expression and the targets of the parameter mappings are equivalent 
according to the rules for expressions described in 13.7.6.1.


Jason



Re: Fix existing fold-vec-extract-longlong.p8.c testcase

2020-02-13 Thread Segher Boessenkool
Hi!

On Thu, Feb 13, 2020 at 04:41:03PM -0600, will schmidt wrote:
>  The code generated by this test changed shortly after
> this test was committed, and we didn't get back to
> updating the scan-assembler statements to match.
> Until now.

Progress!  :-)

Just some nits:

> [testsuite]
> * gcc.target/powerpc/fold-vec-extract-longlong.p8.c: Correct expected 
> insns.

That line is too long.

> +// P8 (LE) variables: addi,xxpermdi,mr,stxvd2x|sxxvd4x,rldicl,sldi,ldx,blr

s/sxx/stx/

> -/* { dg-final { scan-assembler-times {\mmfvsrd\M} 6 { target lp64 } } } */
> -/* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {\mmfvsrd\M} 3 { target lp64 } } } */

Why drop the mtvsrd check?

> -/* { dg-final { scan-assembler-times {\mxxpermdi\M} 6 { target { be && lp64 
> } } } } */

Just like this one.  Do they match 0 times now?  Please keep the entry
then, but for 0 times?

Okay for trunk with that taken care of whichever way.  Thanks!


Segher


Re: [PATCH] c++: Emit DFP typeinfos even when DFP is disabled [PR92906]

2020-02-13 Thread Jason Merrill

On 2/12/20 12:31 PM, Jakub Jelinek wrote:

Hi!

Before Joseph's changes when compiling
libstdc++-v3/libsupc++/fundamental_type_info.cc
we were emitting
_ZTIPDd, _ZTIPDe, _ZTIPDf, _ZTIPKDd, _ZTIPKDe, _ZTIPKDf, _ZTIDd, _ZTIDe, _ZTIDf
symbols even when DFP wasn't usable, but now we don't and thus those 9
symbols @@CXXABI_1.3.4 are gone from libstdc++.  While nothing could
probably use it (except perhaps dlsym etc.), various tools don't really like
symbols disappearing from symbol versioned shared libraries with stable ABI.
Adding those in assembly would be possible, but would be a portability
nightmare (the PR has something Red Hat uses in libstdc++_nonshared.a, but that
can handle only a handful of linux ELF targets we care about).
So, instead this patch hacks up the FE, so that it emits those, but in a way
that won't make the DFP types available again on targets that don't support
them.

Bootstrapped/regtested on x86_64-linux and i686-linux (where it doesn't
change anything, because DFP is supported), aarch64-linux (where it fixed
FAIL: libstdc++-abi/abi_check
) and armv7hl-linux (where abi_check didn't fail because we don't have
baseline file, but comparing abilist of unpatched and patched libstdc++.so.6
shows:
+_ZTIDd@@CXXABI_1.3.4 OBJECT WEAK DEFAULT 8
+_ZTIDe@@CXXABI_1.3.4 OBJECT WEAK DEFAULT 8
+_ZTIDf@@CXXABI_1.3.4 OBJECT WEAK DEFAULT 8
+_ZTIPDd@@CXXABI_1.3.4 OBJECT WEAK DEFAULT 16
+_ZTIPDe@@CXXABI_1.3.4 OBJECT WEAK DEFAULT 16
+_ZTIPDf@@CXXABI_1.3.4 OBJECT WEAK DEFAULT 16
+_ZTIPKDd@@CXXABI_1.3.4 OBJECT WEAK DEFAULT 16
+_ZTIPKDe@@CXXABI_1.3.4 OBJECT WEAK DEFAULT 16
+_ZTIPKDf@@CXXABI_1.3.4 OBJECT WEAK DEFAULT 16
).  Ok for trunk?


OK.


2020-02-12  Jakub Jelinek  

PR libstdc++/92906
* cp-tree.h (enum cp_tree_index): Add CPTI_FALLBACK_DFLOAT32_TYPE,
CPTI_FALLBACK_DFLOAT64_TYPE and CPTI_FALLBACK_DFLOAT128_TYPE.
(fallback_dfloat32_type, fallback_dfloat64_type,
fallback_dfloat128_type): Define.
* mangle.c (write_builtin_type): Handle fallback_dfloat*_type like
dfloat*_type_node.
* rtti.c (emit_support_tinfos): Emit DFP typeinfos even when dfp
is disabled for compatibility.

--- gcc/cp/cp-tree.h.jj 2020-02-10 15:02:05.031846484 +0100
+++ gcc/cp/cp-tree.h2020-02-11 23:00:25.561410042 +0100
@@ -206,6 +206,10 @@ enum cp_tree_index
  
  CPTI_SOURCE_LOCATION_IMPL,
  
+CPTI_FALLBACK_DFLOAT32_TYPE,

+CPTI_FALLBACK_DFLOAT64_TYPE,
+CPTI_FALLBACK_DFLOAT128_TYPE,
+
  CPTI_MAX
  };
  
@@ -366,6 +370,12 @@ extern GTY(()) tree cp_global_trees[CPTI
  
  #define access_default_node		null_node
  
+/* Variant of dfloat{32,64,128}_type_node only used for fundamental

+   rtti purposes if DFP is disabled.  */
+#define fallback_dfloat32_type 
cp_global_trees[CPTI_FALLBACK_DFLOAT32_TYPE]
+#define fallback_dfloat64_type 
cp_global_trees[CPTI_FALLBACK_DFLOAT64_TYPE]
+#define fallback_dfloat128_type
cp_global_trees[CPTI_FALLBACK_DFLOAT128_TYPE]
+
  
  #include "name-lookup.h"
  
--- gcc/cp/mangle.c.jj	2020-01-21 09:13:43.339634944 +0100

+++ gcc/cp/mangle.c 2020-02-11 22:59:28.466265009 +0100
@@ -2569,11 +2569,11 @@ write_builtin_type (tree type)
write_char ('d');
else if (type == long_double_type_node)
write_char ('e');
-  else if (type == dfloat32_type_node)
+  else if (type == dfloat32_type_node || type == fallback_dfloat32_type)
write_string ("Df");
-  else if (type == dfloat64_type_node)
+  else if (type == dfloat64_type_node || type == fallback_dfloat64_type)
write_string ("Dd");
-  else if (type == dfloat128_type_node)
+  else if (type == dfloat128_type_node || type == fallback_dfloat128_type)
write_string ("De");
else
gcc_unreachable ();
--- gcc/cp/rtti.c.jj2020-01-21 09:13:43.359634642 +0100
+++ gcc/cp/rtti.c   2020-02-11 22:59:28.467264994 +0100
@@ -1588,6 +1588,20 @@ emit_support_tinfos (void)
}
for (tree t = registered_builtin_types; t; t = TREE_CHAIN (t))
  emit_support_tinfo_1 (TREE_VALUE (t));
+  /* For compatibility, emit DFP typeinfos even when DFP isn't enabled,
+ because we've emitted that in the past.  */
+  if (!targetm.decimal_float_supported_p ())
+{
+  gcc_assert (dfloat32_type_node == NULL_TREE
+ && dfloat64_type_node == NULL_TREE
+ && dfloat128_type_node == NULL_TREE);
+  fallback_dfloat32_type = make_node (REAL_TYPE);
+  fallback_dfloat64_type = make_node (REAL_TYPE);
+  fallback_dfloat128_type = make_node (REAL_TYPE);
+  emit_support_tinfo_1 (fallback_dfloat32_type);
+  emit_support_tinfo_1 (fallback_dfloat64_type);
+  emit_support_tinfo_1 (fallback_dfloat128_type);
+}
input_location = saved_loc;
  }
  


Jakub





Re: [PATCH] avoid user-constructible types in reshape_init_array (PR 90938)

2020-02-13 Thread Jason Merrill

On 2/12/20 9:21 PM, Martin Sebor wrote:

On 2/11/20 5:28 PM, Jason Merrill wrote:

On 2/11/20 9:00 PM, Martin Sebor wrote:

r270155, committed in GCC 9, introduced a transformation that strips
redundant trailing zero initializers from array initializer lists in
order to support string literals as template arguments.

The transformation neglected to consider the case of array elements
of trivial class types with user-defined conversion ctors and either
defaulted or deleted default ctors.  (It didn't occur to me that
those qualify as trivial types despite the user-defined ctors.)  As
a result, some valid initialization expressions are rejected when
the explicit zero-initializers are dropped in favor of the (deleted)
default ctor,


Hmm, a type with only a deleted default constructor is not trivial, 
that should have been OK already.


For Marek's test case:
   struct A { A () == delete; A (int) = delete; };

trivial_type_p() returns true (as does __is_trivial (A) in both GCC
and Clang).

[class.prop] says that

   A trivial class is a class that is trivially copyable and has one
   or more default constructors (10.3.4.1), all of which are either
   trivial or deleted and at least one of which is not deleted.

That sounds like A above is not trivial because it doesn't have
at least one default ctor that's not deleted, but both GCC and
Clang say it is.  What am I missing?  Is there some other default
constructor hiding in there that I don't know about?


and others are eliminated in favor of the defaulted
ctor instead of invoking a user-defined conversion ctor, leading to
wrong code.


This seems like a bug in type_initializer_zero_p; it shouldn't treat 0 
as a zero initializer for any class.


That does fix it, and it seems like the right solution to me as well.
Thanks for the suggestion.  I'm a little unsure about the condition
I put in place though.

Attached is an updated patch rested on x86_64-linux.



-  if (sized_array_p && trivial_type_p (elt_type))
+  if (sized_array_p
+  && trivial_type_p (elt_type)
+  && !TYPE_NEEDS_CONSTRUCTING (elt_type))


Do we still need this change?  If so, please add a comment about the 
trivial_type_p bug.



   if (TREE_CODE (init) != CONSTRUCTOR

I might change this to

 if (!CP_AGGREGATE_TYPE_P (type))
   return initializer_zerop (init);
 else if (TREE_CODE (init) != CONSTRUCTOR)
   return false;

and then remove the


  if (TYPE_NON_AGGREGATE_CLASS (type))
return false;


later in the function.

More generally, this function could recognize when the initializer is 
equivalent to {}-initialization and return true in that case, but that 
sounds probably too tricky for stage 4.


Jason



Re: [PATCH] document that alias and target must have the same type

2020-02-13 Thread Sandra Loosemore

On 2/5/20 1:13 PM, Martin Sebor wrote:


diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index ec99c38a607..3634ce1c423 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -2557,8 +2557,11 @@ __attribute__ ((access (write_only, 1, 2), access 
(read_write, 3))) int fgets (c
 
 @item alias ("@var{target}")

 @cindex @code{alias} function attribute
-The @code{alias} attribute causes the declaration to be emitted as an
-alias for another symbol, which must be specified.  For instance,
+The @code{alias} attribute causes the declaration to be emitted as an alias
+for another symbol, which must have been previously declared with the same
+type, and for variables same size and alignment.  Declaring an alias with
+a different type than the target is undefined and may be diagnosed.  For
+instance, an alias for another symbol, which must be specified.  For instance,
 
 @smallexample

 void __f () @{ /* @r{Do something.} */; @}


This is kind of garbled.  Do you have an extraneous sentence beginning 
"For instance" in there?  It doesn't flow from the text you added before 
that.



@@ -3919,31 +3922,41 @@ results in warning on line 5.
 
 @item weak

 @cindex @code{weak} function attribute
-The @code{weak} attribute causes the declaration to be emitted as a weak
-symbol rather than a global.  This is primarily useful in defining
-library functions that can be overridden in user code, though it can
-also be used with non-function declarations.  Weak symbols are supported
-for ELF targets, and also for a.out targets when using the GNU assembler
-and linker.
+The @code{weak} attribute causes a declaration of an external symbol
+to be emitted as a weak symbol rather than a global.  This is primarily
+useful in defining library functions that can be overridden in user code,
+though it can also be used with non-function declarations.  The overriding
+symbol must have the same type, and for variables size, and alignment as
+the weak symbol.  Weak symbols are supported for ELF targets, and also for
+a.out targets when using the GNU assembler and linker.


The "for variables" clause is awkward and has a comma in the wrong 
place.  I'd split this into a separate sentence, like


The overriding symbol must have the same type as the weak symbol.  If it 
is a variable it must also have the same size and alignment.



 @item weakref
 @itemx weakref ("@var{target}")
 @cindex @code{weakref} function attribute
 The @code{weakref} attribute marks a declaration as a weak reference.
 Without arguments, it should be accompanied by an @code{alias} attribute
-naming the target symbol.  Optionally, the @var{target} may be given as
-an argument to @code{weakref} itself.  In either case, @code{weakref}
-implicitly marks the declaration as @code{weak}.  Without a
-@var{target}, given as an argument to @code{weakref} or to @code{alias},
-@code{weakref} is equivalent to @code{weak}.
+naming the target symbol.  Alernatively, @var{target} may be given as


s/Alernatively/Alternatively/


+an argument to @code{weakref} itself, naming the target definition of
+the alias.  The the @var{target} must have the same type, and for


s/The the/The/


+variables also the same size and alignment as the declaration.  In either


Same comments above re splitting this into two sentences to avoid 
awkward wording.


The rest of this patch looks OK.

-Sandra


Backports to 9.3

2020-02-13 Thread Jakub Jelinek
Hi!

I've backported following 15 commits from trunk to 9.3 branch,
bootstrapped/regtested on x86_64-linux and i686-linux, committed.

r10-6186-g32667e04c7153d97d09d81c1af073d400f0c719a
r10-6273-gbff948aa337807260344c83ac9079d6386410094
r10-6314-gaa1b56967d85bfc80d71341395f862ec2b30ca36
r10-6315-g8d7c0bf876fa784101f9ad9e3bba82cc065357da
r10-6358-g56b92750f83724177d2c6eae30c208e935a56a37
r10-6444-gb843bcb89519293404bb00d2ed09aae529b54d7f
r10-6460-g5a8ad97b6e4823d4ded00a3ce8d80e4bf93368d4
r10-6470-gcf785618ecc90e3f063b99572de48cb91aa5ab5d
r10-6471-gcb3f06480a17f98579704b9927632627a3814c5c
r10-6522-g79ab8c4321b2dc940bb706a7432a530e26f0df1a
r10-6565-gf57aa9503ff170ff6c8549718bd736f6c8168bab
r10-6593-g62fc0a6ce28c502fc6a7b7c09157840bf98f945f
r10-6612-gdc6d0f89d4be3ed7fde73417606a78c73d954cdf
r10-6617-gae2b8ede40a81a83f50d1e705972bc46fafd4ce5
r10-6625-gbacdd5e978dad84e9c547b0d5c7fed14b8d75157

Jakub
From 3b2fbe3e723b20ea9089e5f45c55b79feb37085b Mon Sep 17 00:00:00 2001
From: Jakub Jelinek 
Date: Thu, 23 Jan 2020 20:08:22 +0100
Subject: [PATCH 01/15] postreload: Fix up postreload combine [PR93402]

The following testcase is miscompiled, because the postreload pass changes:
-(insn 14 13 23 2 (parallel [
-(set (reg:DI 1 dx [94])
-(plus:DI (reg:DI 1 dx [95])
-(reg:DI 5 di [92])))
-(clobber (reg:CC 17 flags))
-]) "pr93402.c":8:30 186 {*adddi_1}
- (expr_list:REG_EQUAL (plus:DI (reg:DI 5 di [92])
-(const_int  [0x19debd01c7]))
-(nil)))
-(insn 23 14 25 2 (set (reg:SI 0 ax)
+(insn 23 13 25 2 (set (reg:SI 0 ax)
 (const_int 0 [0])) "pr93402.c":10:1 67 {*movsi_internal}
  (nil))
 (insn 25 23 26 2 (use (reg:SI 0 ax)) "pr93402.c":10:1 -1
  (nil))
-(insn 26 25 35 2 (use (reg:DI 1 dx)) "pr93402.c":10:1 -1
+(insn 26 25 35 2 (use (plus:DI (reg:DI 1 dx [95])
+(reg:DI 5 di [92]))) "pr93402.c":10:1 -1
  (nil))
A USE insn is not a normal insn and verify_changes called from
apply_change_group is happy about any changes into it.
The following patch avoids this optimization if we were to change
the USE operand (this routine only changes a reg into (plus reg reg2)).

2020-01-23  Jakub Jelinek  

PR rtl-optimization/93402
* postreload.c (reload_combine_recognize_pattern): Don't try to adjust
USE insns.

* gcc.c-torture/execute/pr93402.c: New test.
---
 gcc/ChangeLog |  9 
 gcc/postreload.c  |  4 
 gcc/testsuite/ChangeLog   |  8 +++
 gcc/testsuite/gcc.c-torture/execute/pr93402.c | 21 +++
 4 files changed, 42 insertions(+)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr93402.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1916dab20d1..2029c67bf02 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2020-02-13  Jakub Jelinek  
+
+   Backported from mainline
+   2020-01-23  Jakub Jelinek  
+
+   PR rtl-optimization/93402
+   * postreload.c (reload_combine_recognize_pattern): Don't try to adjust
+   USE insns.
+
 2020-02-11  Tamar Christina  
 
Backport from mainline
diff --git a/gcc/postreload.c b/gcc/postreload.c
index 728aa9b0ed5..b76c7b0b758 100644
--- a/gcc/postreload.c
+++ b/gcc/postreload.c
@@ -1081,6 +1081,10 @@ reload_combine_recognize_pattern (rtx_insn *insn)
   struct reg_use *use = reg_state[regno].reg_use + i;
   if (GET_MODE (*use->usep) != mode)
return false;
+  /* Don't try to adjust (use (REGX)).  */
+  if (GET_CODE (PATTERN (use->insn)) == USE
+ &&  (PATTERN (use->insn), 0) == use->usep)
+   return false;
 }
 
   /* Look for (set (REGX) (CONST_INT))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 1dcf894a92a..bec5eba5033 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2020-02-13  Jakub Jelinek  
+
+   Backported from mainline
+   2020-01-23  Jakub Jelinek  
+
+   PR rtl-optimization/93402
+   * gcc.c-torture/execute/pr93402.c: New test.
+
 2020-02-11  Tamar Christina  
 
Backport from mainline
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr93402.c 
b/gcc/testsuite/gcc.c-torture/execute/pr93402.c
new file mode 100644
index 000..6487797d0aa
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr93402.c
@@ -0,0 +1,21 @@
+/* PR rtl-optimization/93402 */
+
+struct S { unsigned int a; unsigned long long b; };
+
+__attribute__((noipa)) struct S
+foo (unsigned long long x)
+{
+  struct S ret;
+  ret.a = 0;
+  ret.b = x * 111ULL + ULL;
+  return ret;
+}
+
+int
+main ()
+{
+  struct S a = foo (1);
+  if (a.a != 0 || a.b != 1222ULL)
+__builtin_abort ();
+  return 0;
+}
-- 
2.20.1

From 764e831291a2e510978ca7be0bffb55589a5a0b6 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek 
Date: Tue, 28 Jan 2020 08:46:23 +0100
Subject: [PATCH 02/15] 

Re: [PATCH] document that alias and target must have the same type

2020-02-13 Thread Sandra Loosemore

On 2/4/20 6:05 PM, Martin Sebor wrote:

GCC diagnoses declarations of function aliases whose type doesn't
match that of the target (ditto for attribute weakref).  It doesn't
yet diagnose such incompatbilities for variable aliases but that's
just an oversight that I will try to remember to correct in GCC 11.
The attached patch updates the manual to make it clear that
aliases must have the same type as their targets, or the behavior
is undefined (and may be diagnosed).



This is OK with the "The the" typo in this hunk fixed:


@@ -3932,7 +3935,8 @@ and linker.
 The @code{weakref} attribute marks a declaration as a weak reference.
 Without arguments, it should be accompanied by an @code{alias} attribute
 naming the target symbol.  Optionally, the @var{target} may be given as
-an argument to @code{weakref} itself.  In either case, @code{weakref}
+an argument to @code{weakref} itself.  The the @var{target} must have
+the same type as the declaration.  In either case, @code{weakref}
 implicitly marks the declaration as @code{weak}.  Without a
 @var{target}, given as an argument to @code{weakref} or to @code{alias},
 @code{weakref} is equivalent to @code{weak}.


-Sandra


Update gcc.target/powerpc/pr92132-fp test for power7 and older

2020-02-13 Thread will schmidt
Hi,
  Attempting to clean up some more testcase failures.
This test in particular exercises the -ftree-vectorize
option, and by inspection I see this fails out with assorted
"conversion not supported by target" messages on power7 and
earlier systems.

Thus, this would limits the scan-dump check of "LOOP VECTORIZED" to
those targets supporting power8 vector support.
The testcase itself otherwise runs fine, so limiting the test
itself seems unnecessary.

OK for master?

Thanks,
-Will

[testsuite]
* gcc.target/powerpc/pr92132-fp-1.c: Specify target for scan-dump check.

diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c
index 1023e8c..2014896 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c
@@ -292,6 +292,6 @@ main (void)
 abort ();
 
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" { target 
p8vector_hw } } } */



Fix existing fold-vec-extract-longlong.p8.c testcase

2020-02-13 Thread will schmidt
Hi,
 The code generated by this test changed shortly after
this test was committed, and we didn't get back to
updating the scan-assembler statements to match.
Until now.

Tested across assorted power* linux targets.

OK for master?

Thanks
-Will

[testsuite]
* gcc.target/powerpc/fold-vec-extract-longlong.p8.c: Correct expected insns.

diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-longlong.p8.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-longlong.p8.c
index e8aabd0..f8f399b 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-longlong.p8.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-longlong.p8.c
@@ -3,29 +3,25 @@
 
 /* { dg-do compile { target { powerpc*-*-linux* } } } */
 /* { dg-require-effective-target powerpc_p8vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power8 -O2" } */
 
-// targeting P8, both LE and BE. six tests.
+// Targeting P8LE and P8BE, six tests total.
 // P8 (LE) constants: mfvsrd
-// P8 (LE) variables: xori, rldic, mtvsrd, xxpermdi, vslo, mfvsrd
-// P8 (BE) constants: xxpermdi, mfvsrd
-// P8 (BE) Variables:   rldic, mtvsrd, xxpermdi, vslo, mfvsrd
+// P8 (LE) variables: addi,xxpermdi,mr,stxvd2x|sxxvd4x,rldicl,sldi,ldx,blr
+// P8 (BE) constants: mfvsrd
+// P8 (BE) Variables: addi,xxpermdi,rldicl,mr,stxvd2x|stxvd4x,sldi,ldx,blr
 
-/* results. */
-/* { dg-final { scan-assembler-times {\mxori\M} 3 { target le } } } */
-/* { dg-final { scan-assembler-times {\mrldic\M|\mrlwinm\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mrldicl\M|\mrldic\M|\mrlwinm\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvw4x\M} 3 { target lp64 
} } } */
 /* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvw4x\M} 4 { target 
ilp32 } } } */
 /* { dg-final { scan-assembler-times {\madd\M} 3 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times {\mlwz\M} 11 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times {\maddi\M} 6 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {\mmfvsrd\M} 6 { target lp64 } } } */
-/* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mmfvsrd\M} 3 { target lp64 } } } */
 /* { dg-final { scan-assembler-times {\mxxpermdi\M} 3 { target le } } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 6 { target { be && lp64 } 
} } } */
 /* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 { target { be && ilp32 } 
} } } */
-/* { dg-final { scan-assembler-times {\mvslo\M} 3 { target lp64 } } } */
 
 #include 
 
 unsigned long long
 testbl_var (vector bool long long vbl2, signed int si)



[committed] c++: Fix useless using-declaration.

2020-02-13 Thread Jason Merrill
Here reintroducing the same declarations into the global namespace via
using-declaration is useless but OK.  And a function and a function template
with the same parameters do not conflict.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog
2020-02-13  Jason Merrill  

PR c++/93713
* name-lookup.c (matching_fn_p): A function does not match a
template.
---
 gcc/cp/name-lookup.c  | 6 --
 gcc/testsuite/g++.dg/lookup/using62.C | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/lookup/using62.C

diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c
index 2a9bae53162..e5638d2df91 100644
--- a/gcc/cp/name-lookup.c
+++ b/gcc/cp/name-lookup.c
@@ -2321,12 +2321,14 @@ update_local_overload (cxx_binding *binding, tree 
newval)
 static bool
 matching_fn_p (tree one, tree two)
 {
+  if (TREE_CODE (one) != TREE_CODE (two))
+return false;
+
   if (!compparms (TYPE_ARG_TYPES (TREE_TYPE (one)),
  TYPE_ARG_TYPES (TREE_TYPE (two
 return false;
 
-  if (TREE_CODE (one) == TEMPLATE_DECL
-  && TREE_CODE (two) == TEMPLATE_DECL)
+  if (TREE_CODE (one) == TEMPLATE_DECL)
 {
   /* Compare template parms.  */
   if (!comp_template_parms (DECL_TEMPLATE_PARMS (one),
diff --git a/gcc/testsuite/g++.dg/lookup/using62.C 
b/gcc/testsuite/g++.dg/lookup/using62.C
new file mode 100644
index 000..e7a952ace08
--- /dev/null
+++ b/gcc/testsuite/g++.dg/lookup/using62.C
@@ -0,0 +1,3 @@
+template  T bar ();
+void bar () {}
+using :: bar;

base-commit: 613c932f5e5c0cc2e4b88e21d9870fa7b1a6ce5d
-- 
2.18.1



[comitted] c++: Fix static local vars in extern "C".

2020-02-13 Thread Jason Merrill
Since my patch for PR 91476 moved visibility determination sooner, a local
static in a vague linkage function now gets TREE_PUBLIC set before
retrofit_lang_decl calls set_decl_linkage, which was making decl_linkage
think that it has external linkage.  It still has no linkage according to
the standard.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog
2020-02-13  Jason Merrill  

PR c++/93643
PR c++/91476
* tree.c (decl_linkage): Always lk_none for locals.
---
 gcc/cp/tree.c |  9 +++
 .../g++.dg/lookup/extern-c-static1.C  | 27 +++
 2 files changed, 31 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/lookup/extern-c-static1.C

diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c
index 736ef6fe667..72b3a720ee8 100644
--- a/gcc/cp/tree.c
+++ b/gcc/cp/tree.c
@@ -5266,6 +5266,10 @@ decl_linkage (tree decl)
   if (TREE_CODE (decl) == FIELD_DECL)
 return lk_none;
 
+  /* Things in local scope do not have linkage.  */
+  if (decl_function_context (decl))
+return lk_none;
+
   /* Things that are TREE_PUBLIC have external linkage.  */
   if (TREE_PUBLIC (decl))
 return lk_external;
@@ -5285,11 +5289,6 @@ decl_linkage (tree decl)
   if (TREE_CODE (decl) == CONST_DECL)
 return decl_linkage (TYPE_NAME (DECL_CONTEXT (decl)));
 
-  /* Things in local scope do not have linkage, if they don't have
- TREE_PUBLIC set.  */
-  if (decl_function_context (decl))
-return lk_none;
-
   /* Members of the anonymous namespace also have TREE_PUBLIC unset, but
  are considered to have external linkage for language purposes, as do
  template instantiations on targets without weak symbols.  DECLs really
diff --git a/gcc/testsuite/g++.dg/lookup/extern-c-static1.C 
b/gcc/testsuite/g++.dg/lookup/extern-c-static1.C
new file mode 100644
index 000..0ba8d67ba15
--- /dev/null
+++ b/gcc/testsuite/g++.dg/lookup/extern-c-static1.C
@@ -0,0 +1,27 @@
+// PR c++/93643
+
+void* callback(const char* name);
+
+extern "C" {
+
+  inline void f1()
+  {
+static void (*f)();
+f = (void(*)()) callback("f1");
+f();
+  }
+
+  inline void f2()
+  {
+static void (*f)();
+f = (void(*)()) callback("f2");
+f();
+  }
+
+} // extern "C"
+
+int main()
+{
+  f1();
+  f2();
+}

base-commit: 613c932f5e5c0cc2e4b88e21d9870fa7b1a6ce5d
-- 
2.18.1



Re: [PATCH 2/2] libstdc++: Implement ranges [specialized.algorithms]

2020-02-13 Thread Patrick Palka
On Thu, 13 Feb 2020, Jonathan Wakely wrote:

> On 12/02/20 15:41 -0500, Patrick Palka wrote:
> > This implements all the ranges members defined in [specialized.algorithms]:
> > 
> >  ranges::uninitialized_default_construct
> >  ranges::uninitialized_value_construct
> >  ranges::uninitialized_copy
> >  ranges::uninitialized_copy_n
> >  ranges::uninitialized_move
> >  ranges::uninitialized_move_n
> >  ranges::uninitialized_fill
> >  ranges::uninitialized_fill_n
> >  ranges::construct_at
> >  ranges::destroy_at
> >  ranges::destroy
> > 
> > It also implements (hopefully correctly) the "obvious" optimizations for
> > these
> > algos, namely that if the output range has a trivial value type and if the
> > appropriate operation won't throw then we can dispatch to the standard
> > ranges
> > version of the algorithm which will then potentially enable further
> > optimizations.
> > 
> > libstdc++-v3/ChangeLog:
> > 
> > * include/Makefile.am: Add .
> > * include/Makefile.in: Regenerate.
> > * include/bits/ranges_uninitialized.h: New header.
> > * include/std/memory: Include it.
> > * testsuite/20_util/specialized_algorithms/destroy/constrained.cc: New
> > test.
> > * .../uninitialized_copy/constrained.cc: New test.
> > * .../uninitialized_default_construct/constrained.cc: New test.
> > * .../uninitialized_fill/constrained.cc: New test.
> > * .../uninitialized_move/constrained.cc: New test.
> > * .../uninitialized_value_construct/constrained.cc: New test.
> 
> 
> 
> > +  template<__detail::__nothrow_input_iterator _Iter,
> > +  __detail::__nothrow_sentinel<_Iter> _Sent>
> > +requires destructible>
> > +constexpr _Iter
> > +destroy(_Iter __first, _Sent __last) noexcept
> > +{
> > +  if constexpr (is_trivially_destructible_v>)
> > +   return ranges::next(__first, __last);
> > +  else
> > +   {
> > + for (; __first != __last; ++__first)
> > +   ranges::destroy_at(addressof(*__first));
> 
> This should be std::__addressof
> 
> > + return __first;
> > +   }
> > +}
> > +
> > +  template<__detail::__nothrow_input_range _Range>
> > +requires destructible>
> > +constexpr safe_iterator_t<_Range>
> > +destroy(_Range&& __r) noexcept
> > +{ return ranges::destroy(ranges::begin(__r), ranges::end(__r)); }
> > +
> > +  template<__detail::__nothrow_input_iterator _Iter>
> > +requires destructible>
> > +constexpr _Iter
> > +destroy_n(_Iter __first, iter_difference_t<_Iter> __n) noexcept
> > +{
> > +  if constexpr (is_trivially_destructible_v>)
> > +   return ranges::next(__first, __n);
> > +  else
> > +   {
> > + for (; __n > 0; ++__first, (void)--__n)
> > +   ranges::destroy_at(addressof(*__first));
> 
> Same here.
> 
> OK for master with those two adjustments.

Incorporated those two changes and committed both patches.  Thanks for
the review!



Re: Do not call null register_common in emutls

2020-02-13 Thread Jeff Law
On Thu, 2020-02-06 at 04:07 -0300, Alexandre Oliva wrote:
> Thread-local variables with DECL_COMMON trigger an internal compiler
> error on targets that use emulated TLS without register_common, when
> we attempt to expand a call to the NULL register_common, with
> testcases as simple as gcc.dg/tls/emutls-2.c.
> 
> The documentation states that, on such targets, common variables would
> fall back to explicitly initialized.  This patch rearranges the code
> that deals with initialization of common and non-common variables,
> complementing code that is already in place to detect
> register_common-less targets.
> 
> 
> Regstrapped on x86_64-linux-gnu, tested on an affected target.
> Ok to install?
> 
> 
> for  gcc/ChangeLog
> 
>   * tree-emutls.c (new_emutls_decl, emutls_common_1): Complete
>   handling of register_common-less targets.
> 
> for  gcc/testsuite/ChangeLog
> 
>   * gcc.dg/tls/emutls-3.c: New, combining emutls-2.c and
>   thr-init-2.c into an execution test with explicitly common
>   variables.
Not strictly a regression AFAICT, but it seems OK for the trunk to me.

jeff
> 



[PATCH] c++: Fix ICE with ill-formed array list-initialization [PR93712]

2020-02-13 Thread Marek Polacek
My P0388R4 patch changed build_array_conv to create an identity
conversion at the start of the conversion chain.  That was a sound
change but now we crash in convert_like_real

 7457 case ck_identity:
 7458   if (BRACE_ENCLOSED_INITIALIZER_P (expr))
 7459 {
 7460   int nelts = CONSTRUCTOR_NELTS (expr);
 7461   if (nelts == 0)
 7462 expr = build_value_init (totype, complain);
 7463   else if (nelts == 1)
 7464 expr = CONSTRUCTOR_ELT (expr, 0)->value;
 7465   else
 7466 gcc_unreachable ();  // HERE
 7467 }

in a test like this

  int f (int const (&)[2])
  {
return f({1, " "});
  }

I considered fixing this when performing overload resolution (clang says
"no matching function for call to 'f'"), but then it occured to me that
we crash in different contexts too, so I'm just turning the assert into
an early return.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2020-02-13  Marek Polacek  

PR c++/93712 - ICE with ill-formed array list-initialization.
* call.c (convert_like_real): Turn an assert into a return.

* g++.dg/cpp0x/initlist-array11.C: New test.
---
 gcc/cp/call.c |  2 +-
 gcc/testsuite/g++.dg/cpp0x/initlist-array11.C | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/initlist-array11.C

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 51621b7dd87..eba0ed8041d 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -7463,7 +7463,7 @@ convert_like_real (conversion *convs, tree expr, tree fn, 
int argnum,
  else if (nelts == 1)
expr = CONSTRUCTOR_ELT (expr, 0)->value;
  else
-   gcc_unreachable ();
+   return error_mark_node;
}
   expr = mark_use (expr, /*rvalue_p=*/!convs->rvaluedness_matches_p,
   /*read_p=*/true, UNKNOWN_LOCATION,
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-array11.C 
b/gcc/testsuite/g++.dg/cpp0x/initlist-array11.C
new file mode 100644
index 000..7e76b588471
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist-array11.C
@@ -0,0 +1,10 @@
+// PR c++/93712 - ICE with ill-formed array list-initialization.
+// { dg-do compile { target c++11 } }
+
+int f (const int (&)[2]);
+
+int g ()
+{
+  const int ()[2] = {1, "foo"}; // { dg-error "invalid conversion" }
+  return f({1, "foo"}); // { dg-error "invalid conversion" }
+}

base-commit: 1d69147af203d4dcd2270429f90c93f1a37ddfff
-- 
Marek Polacek • Red Hat, Inc. • 300 A St, Boston, MA



Re: Patch ping

2020-02-13 Thread Jeff Law
On Thu, 2020-02-13 at 10:42 -0700, Martin Sebor wrote:
> On 2/13/20 2:54 AM, Jakub Jelinek wrote:
> > On Wed, Feb 12, 2020 at 02:39:05PM -0700, Jeff Law wrote:
> > > On Mon, 2020-02-10 at 10:24 +0100, Jakub Jelinek wrote:
> > > > Hi!
> > > > 
> > > > I'd like to ping a couple of patches:
> > > > 
> > > > PR target/91913 - arm movsi + cmpsi -> movsi_compare0 peephole2 ICE fix
> > > > https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00010.html
> > > Letting the ARM guys deal with this.
> > 
> > Yes, that is resolved now (Richard E. committed his patch and I've
> > committed the testcase).
> > 
> > > > PR preprocessor/92319 - partially implement P1042R1: __VA_OPT__ wording 
> > > > clarifications
> > > > https://gcc.gnu.org/ml/gcc-patches/2020-01/msg02104.html
> > > Jason for this one.
> > 
> > Of course; I just chose to send a ping for all my pending patches and
> > add to To: all relevant maintainers.
> > 
> > > > PR target/93069 - avx512* rejects-valid fix (rejected by assembler)
> > > > http://gcc.gnu.org/ml/gcc-patches/2019-12/msg01606.html
> > > This is in my queue :-)
> > 
> > Ok.
> > 
> > > > PR tree-optimization/92868 - compute_objsize/gimple_call_alloc_size
> > > >   /maybe_warn_overflow fixes
> > > > http://gcc.gnu.org/ml/gcc-patches/2019-12/msg01164.html
> > > Martin's patch should have addressed all the issues and should include
> > > your tests (tweaked, but supposed to be equivalent).
> > 
> > No, this is something different, this isn't what has been covered by the
> > testcases, but something found by code inspection, mainly inconsistencies
> > in the APIs, e.g. the ranges represented as sizetype most of the time,
> > but with one exception where it could be some other type (wider or
> > narrower), or sometimes the range being incorrect (if there is possible
> > overflow and we punt, we didn't change the ranges effectively to VARYING,
> > but just capped the maximum), or INTEGER_CSTs compared by pointer equality
> > rather than operand_equal_p.
> 
> As I said repeatedly in my comments on the patch, I'm not in favor
> of these changes.  I don't think they hurt anything but they also
> don't fix anything that I can see.  There's is no bug the change
> fixes (PR 92868 is closed as resolved) or a test case included in
> the patch that verifies the improvement.  The changes are also not
> in the direction I'd like to see this code evolve.
Jakub, let's defer any cleanups unless there's a reported bug.  We can
come back to this stuff for gcc-11.

jeff
> 
> Martin
> 



Re: [PATCH] c: Fix ICE with cast to VLA [93576]

2020-02-13 Thread Jeff Law
On Thu, 2020-02-13 at 18:57 +0100, Jakub Jelinek wrote:
> Hi!
> 
> The following testcase ICEs, because the PR84305 changes try to evaluate
> the size earlier.  If size has side-effects, that is desirable, and the
> side-effects will actually be wrapped in a SAVE_EXPR.  The problem on this
> testcase is that there are no side-effects, and c_fully_fold doesn't fold
> those COMPOUND_EXPRs to constant, and while before gimplification we unshare
> trees found in the expressions, the unsharing doesn't involve TYPE_SIZE etc.
> of used types.  Gimplification is destructive though, so when we gimplify
> the two nested COMPOUND_EXPRs and then try to gimplify it the second time
> for the TYPE_SIZEs, we ICE.
> Now, we could use unshare_expr in what we push to *expr, SAVE_EXPRs and
> their operands in there aren't unshared, but I really don't see a point of
> evaluating expressions that don't have side-effects before, so instead
> this just pushes there expressions that do have side-effects.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
> release branches?
> 
> 2020-02-13  Jakub Jelinek  
> 
>   PR c/93576
>   * c-decl.c (grokdeclarator): If this_size_varies, only push size into
>   *expr if it has side effects.
> 
>   * gcc.dg/pr93576.c: New test.
OK
jeff
> 



Re: [PATCH 2/3] libstdc++: Implement C++20 constrained algorithms

2020-02-13 Thread Jonathan Wakely

On 13/02/20 19:07 +0100, François Dumont wrote:

On 2/4/20 3:07 AM, Patrick Palka wrote:

This patch implements the C++20 ranges overloads for the algorithms in
[algorithms].  Most of the algorithms were reimplemented, with each of their
implementations very closely following the existing implementation in
bits/stl_algo.h and bits/stl_algobase.h.  The reason for reimplementing most of
the algorithms instead of forwarding to their STL-style overload is because
forwarding cannot be conformantly and efficiently performed for algorithms that
operate on non-random-access iterators.

Why ? Do you have a clear counter-example ?

Maybe at the time you wrote this code those algos were not constexpr 
qualified, but they are now.


It has nothing to do with constexpr.

If you call a ranges algo with an iterator and a sentinel that is a
different type to the iterator, you can't call the old STL algo unless
you can efficiently get an end iterator that refers to the same
position as the sentinel. For random access iterators that is:

auto last2 = first + (last - first);

But for non-random access iterators finding the distance between first
and last is not possible in O(1), and incrementing first by that
distance is also not possible in O(1).


  But algorithms that operate on random
access iterators can safely and efficiently be forwarded to the STL-style
implementation, and this patch does so for push_heap, pop_heap, make_heap,
sort_heap, sort, stable_sort, nth_element, inplace_merge and stable_partition.
IMHO we should try as much as possible to forward to algos in 
stl_algobase.h.


That would not be conforming in many cases.

The old code assumes iterators can be copied. It assumes they have an
iterator_category. It assumes they have operator->(). Most
importantly, it assumes the begin and end iterators have the same
type.

The old algorithms do tag dispatching, which adds function call
overhead at run-time and overload resolution overhead at compile-time.

The new constrained algos can be implemented much more cleanly using
if-constexpr and the new iterator concepts.

There are very good reasons to reimplement the new ranges algos.

Those are highly customized and will be even more in the future when 
some patches I have on my side will be integrated (I hope).


If you do so you won't have to care much about _GLIBCXX_DEBUG iterators.



What's missing from this patch is debug-iterator and container specializations
that are present for some of the STL-style algorithms that need to be ported
over to the ranges algos.  I marked them missing at TODO comments.  There are
also some other minor outstanding TODOs.

The code that could use the most thorough review is ranges::__copy_or_move,
ranges::__copy_or_move_backward, ranges::__equal and
ranges::__lexicographical_compare.  In the tests, I tried to test the interface
of each new overload, as well as the correctness of the new implementation.







[committed, obvious] Add -fdelete-null-pointer-checks to more new C++ testcases.

2020-02-13 Thread Sandra Loosemore
A few weeks ago I committed this patch to fix some problems I saw in 
nios2-elf testing:


https://gcc.gnu.org/ml/gcc-patches/2020-01/msg01385.html

Since then there have been some further new C++ constexpr testcases 
added that have the same problem with assuming 
-fdelete-null-pointer-checks.  I've committed the attached patch as an 
obvious followup to the last one.


-Sandra
commit bb97ad35ead015075ee4747136c9fc75faa27411
Author: Sandra Loosemore 
Date:   Thu Feb 13 10:47:55 2020 -0800

Add -fdelete-null-pointer-checks to more new C++ testcases.

2020-02-13  Sandra Loosemore  

	gcc/testsuite/
	* g++.dg/cpp0x/constexpr-static13.C:
	Add -fdelete-null-pointer-checks.
	* g++.dg/cpp2a/constexpr-new11.C: Likewise.
	* g++.dg/cpp2a/constexpr-new12.C: Likewise.

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c296fc3..560187c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2020-02-13  Sandra Loosemore  
+
+	* g++.dg/cpp0x/constexpr-static13.C:
+	Add -fdelete-null-pointer-checks.
+	* g++.dg/cpp2a/constexpr-new11.C: Likewise.
+	* g++.dg/cpp2a/constexpr-new12.C: Likewise.
+
 2020-02-13  H.J. Lu  
 
 	PR target/93656
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-static13.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-static13.C
index 644f9f7..2677a22 100644
--- a/gcc/testsuite/g++.dg/cpp0x/constexpr-static13.C
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-static13.C
@@ -1,5 +1,6 @@
 // PR c++/92003
 // { dg-do compile { target c++11 } }
+// { dg-additional-options "-fdelete-null-pointer-checks" }
 // { dg-prune-output "narrowing conversion" }
 
 constexpr char const* get_c_str() { return "abc"; }
diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-new11.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-new11.C
index 26658d0..2bf359a 100644
--- a/gcc/testsuite/g++.dg/cpp2a/constexpr-new11.C
+++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-new11.C
@@ -1,5 +1,6 @@
 // PR c++/93633
 // { dg-do compile { target c++2a } }
+// { dg-additional-options "-fdelete-null-pointer-checks" }
 
 struct A {
   constexpr A () : a (0) {}
diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-new12.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-new12.C
index 2dedcd2..04f7597 100644
--- a/gcc/testsuite/g++.dg/cpp2a/constexpr-new12.C
+++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-new12.C
@@ -1,5 +1,6 @@
 // PR c++/93633
 // { dg-do compile { target c++2a } }
+// { dg-additional-options "-fdelete-null-pointer-checks" }
 
 struct A {
   constexpr A () : a (0) {}


Re: [PATCH Coroutines]Insert the default return_void call at correct position

2020-02-13 Thread Iain Sandoe

Hello Bin, Nathan,

Iain Sandoe  wrote:



We are seeking to clarify the standard wording around this (and the cases  
where
unhandled_exception() returns - hopefully during the WG21 meeting this  
week,


FWIW, I think your interpretation makes sense here.


It’s not clear if the committee will have time to process wording changes  
needed for this during the current session, however, it is agreed that  
injecting the return_void at the closing brace of the user’s original  
function (your solution) is the correct approach.


So, from my point of view this patch DTRT and should be applied,

thanks
Iain



Thanks,
bin

On Mon, Feb 3, 2020 at 1:55 PM bin.cheng   
wrote:

Hi,

Exception in coroutine is not correctly handled because the default
return_void call is now inserted before the finish suspend point,
rather than at the end of the original coroutine body.  This patch
fixes the issue by generating following code:
co_await promise.initial_suspend();
try {
 // The original coroutine body

 promise.return_void(); // The default return_void call.
} catch (...) {
 promise.unhandled_exception();
}
final_suspend:
// ...

Bootstrap and test on x86_64.  Is it OK?

Thanks,
bin

gcc/cp
2020-02-03  Bin Cheng  

 * coroutines.cc (build_actor_fn): Factor out code inserting the
 default return_void call to...
 (morph_fn_to_coro): ...here, also hoist local var declarations.

gcc/testsuite
2020-02-03  Bin Cheng  

 * g++.dg/coroutines/torture/co-ret-15-default-return_void.C: New.





Re: [PATCH 2/3] libstdc++: Implement C++20 constrained algorithms

2020-02-13 Thread François Dumont

On 2/4/20 3:07 AM, Patrick Palka wrote:

This patch implements the C++20 ranges overloads for the algorithms in
[algorithms].  Most of the algorithms were reimplemented, with each of their
implementations very closely following the existing implementation in
bits/stl_algo.h and bits/stl_algobase.h.  The reason for reimplementing most of
the algorithms instead of forwarding to their STL-style overload is because
forwarding cannot be conformantly and efficiently performed for algorithms that
operate on non-random-access iterators.

Why ? Do you have a clear counter-example ?

Maybe at the time you wrote this code those algos were not constexpr 
qualified, but they are now.



   But algorithms that operate on random
access iterators can safely and efficiently be forwarded to the STL-style
implementation, and this patch does so for push_heap, pop_heap, make_heap,
sort_heap, sort, stable_sort, nth_element, inplace_merge and stable_partition.
IMHO we should try as much as possible to forward to algos in 
stl_algobase.h.


Those are highly customized and will be even more in the future when 
some patches I have on my side will be integrated (I hope).


If you do so you won't have to care much about _GLIBCXX_DEBUG iterators.



What's missing from this patch is debug-iterator and container specializations
that are present for some of the STL-style algorithms that need to be ported
over to the ranges algos.  I marked them missing at TODO comments.  There are
also some other minor outstanding TODOs.

The code that could use the most thorough review is ranges::__copy_or_move,
ranges::__copy_or_move_backward, ranges::__equal and
ranges::__lexicographical_compare.  In the tests, I tried to test the interface
of each new overload, as well as the correctness of the new implementation.





Re: [PATCH] Generalize -fuse-ld= to support absolute path or arbitrary ld.linker

2020-02-13 Thread Fangrui Song via gcc-patches

On 2020-02-09, Fangrui Song wrote:

PR driver/93645
* common.opt (-fuse-ld=): Delete -fuse-ld=[bfd|gold|lld]. Add -fuse-ld=.
* opts.c (common_handle_option): Handle OPT_fuse_ld_.
* gcc.c (driver_handle_option): Likewise.
* collect2.c (main): Likewise.
---
gcc/ChangeLog   |  8 ++
gcc/collect2.c  | 67 -
gcc/common.opt  | 14 ++
gcc/doc/invoke.texi | 15 +++---
gcc/gcc.c   | 14 --
gcc/opts.c  |  4 +--
6 files changed, 57 insertions(+), 65 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index feb2d066d0b..6bcec12d841 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2020-02-09  Fangrui Song  
+
+   PR driver/93645
+   * common.opt (-fuse-ld=): Delete -fuse-ld=[bfd|gold|lld]. Add -fuse-ld=.
+   * opts.c (common_handle_option): Handle OPT_fuse_ld_.
+   * gcc.c (driver_handle_option): Likewise.
+   * collect2.c (main): Likewise.
+
2020-02-09  Uroš Bizjak  

* recog.c: Move pass_split_before_sched2 code in front of
diff --git a/gcc/collect2.c b/gcc/collect2.c
index 502d629141c..a3ef525a93b 100644
--- a/gcc/collect2.c
+++ b/gcc/collect2.c
@@ -859,18 +859,12 @@ main (int argc, char **argv)
{
  USE_DEFAULT_LD,
  USE_PLUGIN_LD,
-  USE_GOLD_LD,
-  USE_BFD_LD,
-  USE_LLD_LD,
-  USE_LD_MAX
+  USE_LD
} selected_linker = USE_DEFAULT_LD;
-  static const char *const ld_suffixes[USE_LD_MAX] =
+  static const char *const ld_suffixes[USE_LD] =
{
  "ld",
-  PLUGIN_LD_SUFFIX,
-  "ld.gold",
-  "ld.bfd",
-  "ld.lld"
+  PLUGIN_LD_SUFFIX
};
  static const char *const real_ld_suffix = "real-ld";
  static const char *const collect_ld_suffix = "collect-ld";
@@ -882,7 +876,7 @@ main (int argc, char **argv)
  static const char *const strip_suffix = "strip";
  static const char *const gstrip_suffix = "gstrip";

-  const char *full_ld_suffixes[USE_LD_MAX];
+  const char *full_ld_suffixes[USE_LD];
#ifdef CROSS_DIRECTORY_STRUCTURE
  /* If we look for a program in the compiler directories, we just use
 the short name, since these directories are already system-specific.
@@ -924,6 +918,7 @@ main (int argc, char **argv)
  const char **ld1;
  bool use_plugin = false;
  bool use_collect_ld = false;
+  const char *use_ld = NULL;

  /* The kinds of symbols we will have to consider when scanning the
 outcome of a first pass link.  This is ALL to start with, then might
@@ -948,7 +943,7 @@ main (int argc, char **argv)
#endif
  int i;

-  for (i = 0; i < USE_LD_MAX; i++)
+  for (i = 0; i < USE_LD; i++)
full_ld_suffixes[i]
#ifdef CROSS_DIRECTORY_STRUCTURE
  = concat (target_machine, "-", ld_suffixes[i], NULL);
@@ -1041,12 +1036,11 @@ main (int argc, char **argv)
if (selected_linker == USE_DEFAULT_LD)
  selected_linker = USE_PLUGIN_LD;
  }
-   else if (strcmp (argv[i], "-fuse-ld=bfd") == 0)
- selected_linker = USE_BFD_LD;
-   else if (strcmp (argv[i], "-fuse-ld=gold") == 0)
- selected_linker = USE_GOLD_LD;
-   else if (strcmp (argv[i], "-fuse-ld=lld") == 0)
- selected_linker = USE_LLD_LD;
+   else if (!strncmp (argv[i], "-fuse-ld=", 9))
+ {
+   use_ld = argv[i] + 9;
+   selected_linker = USE_LD;
+ }
else if (strncmp (argv[i], "-o", 2) == 0)
  {
/* Parse the output filename if it's given so that we can make
@@ -1152,8 +1146,7 @@ main (int argc, char **argv)
  /* Maybe we know the right file to use (if not cross).  */
  ld_file_name = 0;
#ifdef DEFAULT_LINKER
-  if (selected_linker == USE_BFD_LD || selected_linker == USE_GOLD_LD ||
-  selected_linker == USE_LLD_LD)
+  if (!ld_file_name && selected_linker == USE_LD)
{
  char *linker_name;
# ifdef HOST_EXECUTABLE_SUFFIX
@@ -1168,15 +1161,13 @@ main (int argc, char **argv)
  if (! strcmp (_linker[len], HOST_EXECUTABLE_SUFFIX))
{
  default_linker[len] = '\0';
- linker_name = concat (default_linker,
-   _suffixes[selected_linker][2],
+ linker_name = concat (default_linker, ".", use_ld,
HOST_EXECUTABLE_SUFFIX, NULL);
}
}
  if (linker_name == NULL)
# endif
-  linker_name = concat (DEFAULT_LINKER,
-   _suffixes[selected_linker][2],
+  linker_name = concat (DEFAULT_LINKER, ".", use_ld,
NULL);
  if (access (linker_name, X_OK) == 0)
ld_file_name = linker_name;
@@ -1197,14 +1188,28 @@ main (int argc, char **argv)
  ld_file_name = find_a_file (, collect_ld_suffix, X_OK);
  use_collect_ld = ld_file_name != 0;
}
-  /* Search the compiler directories for `ld'.  We have protection against
- recursive calls in find_a_file.  */
-  if (ld_file_name == 0)
-ld_file_name = find_a_file 

[PATCH] c: Fix ICE with cast to VLA [93576]

2020-02-13 Thread Jakub Jelinek
Hi!

The following testcase ICEs, because the PR84305 changes try to evaluate
the size earlier.  If size has side-effects, that is desirable, and the
side-effects will actually be wrapped in a SAVE_EXPR.  The problem on this
testcase is that there are no side-effects, and c_fully_fold doesn't fold
those COMPOUND_EXPRs to constant, and while before gimplification we unshare
trees found in the expressions, the unsharing doesn't involve TYPE_SIZE etc.
of used types.  Gimplification is destructive though, so when we gimplify
the two nested COMPOUND_EXPRs and then try to gimplify it the second time
for the TYPE_SIZEs, we ICE.
Now, we could use unshare_expr in what we push to *expr, SAVE_EXPRs and
their operands in there aren't unshared, but I really don't see a point of
evaluating expressions that don't have side-effects before, so instead
this just pushes there expressions that do have side-effects.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
release branches?

2020-02-13  Jakub Jelinek  

PR c/93576
* c-decl.c (grokdeclarator): If this_size_varies, only push size into
*expr if it has side effects.

* gcc.dg/pr93576.c: New test.

--- gcc/c/c-decl.c.jj   2020-01-15 12:34:15.079253187 +0100
+++ gcc/c/c-decl.c  2020-02-13 13:49:06.427304070 +0100
@@ -6512,11 +6512,14 @@ grokdeclarator (const struct c_declarato
  }
if (this_size_varies)
  {
-   if (*expr)
- *expr = build2 (COMPOUND_EXPR, TREE_TYPE (size),
- *expr, size);
-   else
- *expr = size;
+   if (TREE_SIDE_EFFECTS (size))
+ {
+   if (*expr)
+ *expr = build2 (COMPOUND_EXPR, TREE_TYPE (size),
+ *expr, size);
+   else
+ *expr = size;
+ }
*expr_const_operands &= size_maybe_const;
  }
  }
--- gcc/testsuite/gcc.dg/pr93576.c.jj   2020-02-13 13:53:06.089708612 +0100
+++ gcc/testsuite/gcc.dg/pr93576.c  2020-02-13 13:52:40.041099237 +0100
@@ -0,0 +1,10 @@
+/* PR c/93576 */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void
+foo (void)
+{
+  int b[] = { 0 };
+  (char (*)[(1, 7, 2)]) 0;
+}

Jakub



Re: [PATCH] [arm] Implement Armv8.1-M low overhead loops

2020-02-13 Thread Andrea Corallo
Hi Roman,

Roman Zhuykov  writes:

> This patch is stage1 material, right?

Yes

>>
>>> +(define_insn "*doloop_end"
>>> +  [(parallel [(set (pc)
>>> +   (if_then_else
>>> +   (ne (reg:SI LR_REGNUM) (const_int 1))
>>> + (label_ref (match_operand 0 "" ""))
>>> + (pc)))
>>> +  (set (reg:SI LR_REGNUM)
>>> +   (plus:SI (reg:SI LR_REGNUM) (const_int -1)))])]
>>> +  "TARGET_32BIT && TARGET_HAVE_LOB && !flag_modulo_sched"
>>> +  "le\tlr, %l0")
> I'm not an expert in .md files, but having that "!flag_modulo_sched"
> condition seems wrong to me.  What was the issue on SMS side to add
> that?

With this patch the first insn of the low loop overhead 'doloop_begin'
is expanded by 'doloop_modify' in loop-doloop.c.  The same does not
happen with SMS.  My understanding is that to have it working in that
case too the machine dependent reorg pass should add it later.  Am I
correct on this?

Thanks
  Andrea


Re: Patch ping

2020-02-13 Thread Martin Sebor

On 2/13/20 2:54 AM, Jakub Jelinek wrote:

On Wed, Feb 12, 2020 at 02:39:05PM -0700, Jeff Law wrote:

On Mon, 2020-02-10 at 10:24 +0100, Jakub Jelinek wrote:

Hi!

I'd like to ping a couple of patches:

PR target/91913 - arm movsi + cmpsi -> movsi_compare0 peephole2 ICE fix
https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00010.html

Letting the ARM guys deal with this.


Yes, that is resolved now (Richard E. committed his patch and I've
committed the testcase).


PR preprocessor/92319 - partially implement P1042R1: __VA_OPT__ wording 
clarifications
https://gcc.gnu.org/ml/gcc-patches/2020-01/msg02104.html

Jason for this one.


Of course; I just chose to send a ping for all my pending patches and
add to To: all relevant maintainers.


PR target/93069 - avx512* rejects-valid fix (rejected by assembler)
http://gcc.gnu.org/ml/gcc-patches/2019-12/msg01606.html

This is in my queue :-)


Ok.


PR tree-optimization/92868 - compute_objsize/gimple_call_alloc_size
  /maybe_warn_overflow fixes
http://gcc.gnu.org/ml/gcc-patches/2019-12/msg01164.html

Martin's patch should have addressed all the issues and should include
your tests (tweaked, but supposed to be equivalent).


No, this is something different, this isn't what has been covered by the
testcases, but something found by code inspection, mainly inconsistencies
in the APIs, e.g. the ranges represented as sizetype most of the time,
but with one exception where it could be some other type (wider or
narrower), or sometimes the range being incorrect (if there is possible
overflow and we punt, we didn't change the ranges effectively to VARYING,
but just capped the maximum), or INTEGER_CSTs compared by pointer equality
rather than operand_equal_p.


As I said repeatedly in my comments on the patch, I'm not in favor
of these changes.  I don't think they hurt anything but they also
don't fix anything that I can see.  There's is no bug the change
fixes (PR 92868 is closed as resolved) or a test case included in
the patch that verifies the improvement.  The changes are also not
in the direction I'd like to see this code evolve.

Martin


Re: [PATCH v2] [MIPS] Prevent allocation of a GPR for a floating mode pseudo

2020-02-13 Thread Jeff Law
On Mon, 2020-02-10 at 14:33 +0100, Mihailo Stojanovic wrote:
> Similar to the mirror case of allocating an FPR for an integer mode
> pseudo, prevent GPRs from being allocated for a floating mode pseudo.
> 
> gcc/ChangeLog:
> 
> * gcc/config/mips/mips.c (mips_ira_change_pseudo_allocno_class):
> Limit the allocation of floating mode pseudos to FP_REGS.
Per Andrew's concern, would it make sense to make this conditional on
hardware floating point?

jeff
> 



Re: [fixincludes] skip fixinc on vxworks7*, amend mkheaders

2020-02-13 Thread Jeff Law
On Thu, 2020-02-13 at 01:30 -0300, Alexandre Oliva wrote:
> vxworks7 headers haven't required fixes, and we've long avoided
> running fixinc on them.
> 
> The problem with that is that, with a dummy fixinc, mkheaders wipes
> out include-fixed but then multi_dir subdirs are not created again, so
> we end up with a limits.h named after each multi_dir, when there are
> non-default multilibs.  Oops.
> 
> This patch arranges for a dummy fixinc to be created for *-*-vxworks7*
> targets, and fixes mkheaders so as to create multi_dir subdirs in
> include-fixed after wiping them out, and to copy limits.h so that it
> won't take the name that should be of a subdir (unless the multi_dir
> is limits.h, but that's hopefully never the case ;-)
> 
> This was tested on x86_64-linux-gnu (no changes to include-fixed there,
> as expected), and with various of AdaCore's vx6 and vx7 targets,
> including ones with and without multilibs.  Ok to install?
> 
> 
> for  fixincludes/ChangeLog
> 
>   * mkheaders.in: Re-create subdirs, copy limits.h into subdir.
>   * mkfixinc.sh: Create dummy fixinc for *-*-vxworks7*.
OK
jeff
> 



Re: [PATCH] libiberty.h: punt duplicate strverscmp prototype

2020-02-13 Thread Jeff Law
On Thu, 2020-02-13 at 00:27 -0500, Mike Frysinger wrote:
> SVN r216772 accidentally copied & pasted this prototype when adding
> other ones nearby.
> 
> 2020-02-13  Mike Frysinger  
> 
>   * libiberty.h (strverscmp): Delete duplicate prototype.
OK.  Please push if you've got privs.  If not, let me know and I'll
take care of it.

jeff
> 



Re: [PATCH] config: import pkg.m4 from pkg-config

2020-02-13 Thread Jeff Law
On Thu, 2020-02-13 at 00:30 -0500, Mike Frysinger wrote:
> We use this in the sim tree currently.  Rather than require people to
> have pkg-config installed, include it in the config/ dir.
> 
> 2012-12-23  Mike Frysinger  
> 
>   * pkg.m4: New file from pkg-config-0.29.2.
Presumably you're sending this to gcc because the GDB and other
packages consider GCC upstream for config/?

If so, this is fine even though we're in stage4.  GCC doesn't use it
and thus it's highly unlikely to cause any headaches on the GCC side.

Do you have commit access to the GCC tree?  I simply can't remember if
we ever set that up for you.

jeff
> 



Re: [PATCH 2/2] libstdc++: Implement ranges [specialized.algorithms]

2020-02-13 Thread Jonathan Wakely

On 12/02/20 15:41 -0500, Patrick Palka wrote:

This implements all the ranges members defined in [specialized.algorithms]:

 ranges::uninitialized_default_construct
 ranges::uninitialized_value_construct
 ranges::uninitialized_copy
 ranges::uninitialized_copy_n
 ranges::uninitialized_move
 ranges::uninitialized_move_n
 ranges::uninitialized_fill
 ranges::uninitialized_fill_n
 ranges::construct_at
 ranges::destroy_at
 ranges::destroy

It also implements (hopefully correctly) the "obvious" optimizations for these
algos, namely that if the output range has a trivial value type and if the
appropriate operation won't throw then we can dispatch to the standard ranges
version of the algorithm which will then potentially enable further
optimizations.

libstdc++-v3/ChangeLog:

* include/Makefile.am: Add .
* include/Makefile.in: Regenerate.
* include/bits/ranges_uninitialized.h: New header.
* include/std/memory: Include it.
* testsuite/20_util/specialized_algorithms/destroy/constrained.cc: New
test.
* .../uninitialized_copy/constrained.cc: New test.
* .../uninitialized_default_construct/constrained.cc: New test.
* .../uninitialized_fill/constrained.cc: New test.
* .../uninitialized_move/constrained.cc: New test.
* .../uninitialized_value_construct/constrained.cc: New test.





+  template<__detail::__nothrow_input_iterator _Iter,
+  __detail::__nothrow_sentinel<_Iter> _Sent>
+requires destructible>
+constexpr _Iter
+destroy(_Iter __first, _Sent __last) noexcept
+{
+  if constexpr (is_trivially_destructible_v>)
+   return ranges::next(__first, __last);
+  else
+   {
+ for (; __first != __last; ++__first)
+   ranges::destroy_at(addressof(*__first));


This should be std::__addressof


+ return __first;
+   }
+}
+
+  template<__detail::__nothrow_input_range _Range>
+requires destructible>
+constexpr safe_iterator_t<_Range>
+destroy(_Range&& __r) noexcept
+{ return ranges::destroy(ranges::begin(__r), ranges::end(__r)); }
+
+  template<__detail::__nothrow_input_iterator _Iter>
+requires destructible>
+constexpr _Iter
+destroy_n(_Iter __first, iter_difference_t<_Iter> __n) noexcept
+{
+  if constexpr (is_trivially_destructible_v>)
+   return ranges::next(__first, __n);
+  else
+   {
+ for (; __n > 0; ++__first, (void)--__n)
+   ranges::destroy_at(addressof(*__first));


Same here.

OK for master with those two adjustments.



Re: [PING^3][PATCH] optimize costly division in rtx_cost

2020-02-13 Thread Richard Biener
On Thu, Feb 13, 2020 at 2:46 PM Alexander Monakov  wrote:
>
> Ping^3.

OK.

> On Sun, 5 Jan 2020, Alexander Monakov wrote:
>
> > Hi,
> >
> > I noticed there's a costly signed 64-bit division in rtx_cost on x86 as 
> > well as
> > any other target where UNITS_PER_WORD is implemented like TARGET_64BIT ? 8 
> > : 4.
> > It's also evident that rtx_cost does redundant work for a SET rtx argument.
> >
> > Obviously the variable named 'factor' rarely exceeds 1, so in the majority 
> > of
> > cases it can be computed with a well-predictable branch rather than a 
> > division.
> >
> > This patch makes rtx_cost do the division only in case mode is wider than
> > UNITS_PER_WORD, and also moves a test for a SET up front to avoid 
> > redundancy.
> > No functional change.
> >
> > Bootstrapped on x86_64, ok for trunk?
> >
> > To illustrate the improvement this buys, for tramp3d -O2 compilation, I got
> >
> > before:
> >73887675319  instructions:u
> >
> >72438432200  cycles:u
> >  924298569  idq.ms_uops:u
> >   102603799255  uops_dispatched.thread:u
> >
> > after:
> >73888371724  instructions:u
> >
> >72386986612  cycles:u
> >  802744775  idq.ms_uops:u
> >   102096987220  uops_dispatched.thread:u
> >
> > (this is on Sandybridge, idq.ms_uops are uops going via the microcode 
> > sequencer,
> > so the unneeded division is responsible for a good fraction of them)
> >
> >   * rtlanal.c (rtx_cost): Handle a SET up front. Avoid division if the
> >   mode is not wider than UNITS_PER_WORD.
> >
> > diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
> > index 9a7afccefb8..c7ab86e228b 100644
> > --- a/gcc/rtlanal.c
> > +++ b/gcc/rtlanal.c
> > @@ -4207,18 +4207,23 @@ rtx_cost (rtx x, machine_mode mode, enum rtx_code 
> > outer_code,
> >const char *fmt;
> >int total;
> >int factor;
> > +  unsigned mode_size;
> >
> >if (x == 0)
> >  return 0;
> >
> > -  if (GET_MODE (x) != VOIDmode)
> > +  if (GET_CODE (x) == SET)
> > +/* A SET doesn't have a mode, so let's look at the SET_DEST to get
> > +   the mode for the factor.  */
> > +mode = GET_MODE (SET_DEST (x));
> > +  else if (GET_MODE (x) != VOIDmode)
> >  mode = GET_MODE (x);
> >
> > +  mode_size = estimated_poly_value (GET_MODE_SIZE (mode));
> > +
> >/* A size N times larger than UNITS_PER_WORD likely needs N times as
> >   many insns, taking N times as long.  */
> > -  factor = estimated_poly_value (GET_MODE_SIZE (mode)) / UNITS_PER_WORD;
> > -  if (factor == 0)
> > -factor = 1;
> > +  factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;
> >
> >/* Compute the default costs of certain things.
> >   Note that targetm.rtx_costs can override the defaults.  */
> > @@ -4243,14 +4248,6 @@ rtx_cost (rtx x, machine_mode mode, enum rtx_code 
> > outer_code,
> >/* Used in combine.c as a marker.  */
> >total = 0;
> >break;
> > -case SET:
> > -  /* A SET doesn't have a mode, so let's look at the SET_DEST to get
> > -  the mode for the factor.  */
> > -  mode = GET_MODE (SET_DEST (x));
> > -  factor = estimated_poly_value (GET_MODE_SIZE (mode)) / 
> > UNITS_PER_WORD;
> > -  if (factor == 0)
> > - factor = 1;
> > -  /* FALLTHRU */
> >  default:
> >total = factor * COSTS_N_INSNS (1);
> >  }
> >


[PING^3][PATCH] optimize costly division in rtx_cost

2020-02-13 Thread Alexander Monakov
Ping^3.

On Sun, 5 Jan 2020, Alexander Monakov wrote:

> Hi,
> 
> I noticed there's a costly signed 64-bit division in rtx_cost on x86 as well 
> as
> any other target where UNITS_PER_WORD is implemented like TARGET_64BIT ? 8 : 
> 4.
> It's also evident that rtx_cost does redundant work for a SET rtx argument.
> 
> Obviously the variable named 'factor' rarely exceeds 1, so in the majority of
> cases it can be computed with a well-predictable branch rather than a 
> division.
> 
> This patch makes rtx_cost do the division only in case mode is wider than
> UNITS_PER_WORD, and also moves a test for a SET up front to avoid redundancy.
> No functional change.
> 
> Bootstrapped on x86_64, ok for trunk?
> 
> To illustrate the improvement this buys, for tramp3d -O2 compilation, I got
> 
> before:
>73887675319  instructions:u
> 
>72438432200  cycles:u
>  924298569  idq.ms_uops:u
>   102603799255  uops_dispatched.thread:u
> 
> after:
>73888371724  instructions:u
> 
>72386986612  cycles:u
>  802744775  idq.ms_uops:u
>   102096987220  uops_dispatched.thread:u
> 
> (this is on Sandybridge, idq.ms_uops are uops going via the microcode 
> sequencer,
> so the unneeded division is responsible for a good fraction of them)
> 
>   * rtlanal.c (rtx_cost): Handle a SET up front. Avoid division if the
>   mode is not wider than UNITS_PER_WORD.
> 
> diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
> index 9a7afccefb8..c7ab86e228b 100644
> --- a/gcc/rtlanal.c
> +++ b/gcc/rtlanal.c
> @@ -4207,18 +4207,23 @@ rtx_cost (rtx x, machine_mode mode, enum rtx_code 
> outer_code,
>const char *fmt;
>int total;
>int factor;
> +  unsigned mode_size;
>  
>if (x == 0)
>  return 0;
>  
> -  if (GET_MODE (x) != VOIDmode)
> +  if (GET_CODE (x) == SET)
> +/* A SET doesn't have a mode, so let's look at the SET_DEST to get
> +   the mode for the factor.  */
> +mode = GET_MODE (SET_DEST (x));
> +  else if (GET_MODE (x) != VOIDmode)
>  mode = GET_MODE (x);
>  
> +  mode_size = estimated_poly_value (GET_MODE_SIZE (mode));
> +
>/* A size N times larger than UNITS_PER_WORD likely needs N times as
>   many insns, taking N times as long.  */
> -  factor = estimated_poly_value (GET_MODE_SIZE (mode)) / UNITS_PER_WORD;
> -  if (factor == 0)
> -factor = 1;
> +  factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;
>  
>/* Compute the default costs of certain things.
>   Note that targetm.rtx_costs can override the defaults.  */
> @@ -4243,14 +4248,6 @@ rtx_cost (rtx x, machine_mode mode, enum rtx_code 
> outer_code,
>/* Used in combine.c as a marker.  */
>total = 0;
>break;
> -case SET:
> -  /* A SET doesn't have a mode, so let's look at the SET_DEST to get
> -  the mode for the factor.  */
> -  mode = GET_MODE (SET_DEST (x));
> -  factor = estimated_poly_value (GET_MODE_SIZE (mode)) / UNITS_PER_WORD;
> -  if (factor == 0)
> - factor = 1;
> -  /* FALLTHRU */
>  default:
>total = factor * COSTS_N_INSNS (1);
>  }
> 


Re: gcov: reduce code quality loss by reproducible topn merging [PR92924]

2020-02-13 Thread Jan Hubicka
> On 1/30/20 5:13 PM, Jan Hubicka wrote:
> > Martin, I welcome your opinion on the patch
> 
> Ok, recently we've made quite some experiments with Honza about
> TOP N counters. Based on the numbers, it shows that tracking a negative
> value per-counter value does not help much. So that I suggest to use
> only one global flag (a negative total) in order to distinguish in between
> reproducible and non-reproducible profile.
> 
> I'm sending patch that simplifies the merging and introduces a new option.
> 
> For the future, we may (similarly to LLVM) come up with a dynamic allocation
> for TOPN counters. I've git a working patch for that.
> 
> Martin

> From acf77e7ebba2a4f4370388f83901d67cc71e5a0c Mon Sep 17 00:00:00 2001
> From: Martin Liska 
> Date: Wed, 5 Feb 2020 14:44:43 +0100
> Subject: [PATCH] Introduce -fprofile-reproducible and support it with TOP N.
> 
> gcc/ChangeLog:
> 
> 2020-02-05  Martin Liska  
> 
>   PR ipa/92924
>   * common.opt: Add -fprofile-reproducible.
>   * doc/invoke.texi: Document it.
>   * value-prof.c (dump_histogram_value):
>   Document and support behavior for counters[0]
>   being a negative value.
>   (get_nth_most_common_value): Handle negative
>   counters[0] in respect to flag_profile_reproducible.
> 
> libgcc/ChangeLog:
> 
> 2020-02-05  Martin Liska  
> 
>   PR ipa/92924
>   * libgcov-merge.c (merge_topn_values_set): Record
>   when a TOP N counter becomes invalid.  When merging
>   remove a smallest value if the space is needed.

I think the patch is fine except for the command line option name.
First the profiling is reproducible as long as you do not run streaming
in parallel and second we will probably eventually run into need of some
kind of reproducibility for multi-threade dprograms.

So what about
 -fprofile-reproducibility=serial
 -fprofile-reproducibility=parallel-runs (or parallel-merging)
and in future
 -fprofile-reproducibility=multithreaded

It will make users to read manual what it odes really mean.
> ---
>  gcc/common.opt |  4 
>  gcc/doc/invoke.texi| 12 +-
>  gcc/value-prof.c   | 26 --
>  libgcc/libgcov-merge.c | 50 +-
>  4 files changed, 69 insertions(+), 23 deletions(-)
> 
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 5692cd04374..7f643516a62 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2168,6 +2168,10 @@ fprofile-exclude-files=
>  Common Joined RejectNegative Var(flag_profile_exclude_files)
>  Instrument only functions from files where names do not match all the 
> regular expressions (separated by a semi-colon).
>  
> +fprofile-reproducible
> +Common Report Var(flag_profile_reproducible)
> +Enable only profile based optimizations that do not depend on order of 
> training runs.
> +
>  Enum
>  Name(profile_update) Type(enum profile_update) UnknownError(unknown profile 
> update method %qs)
>  
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 35b341e759f..6725c543c3b 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -562,7 +562,7 @@ Objective-C and Objective-C++ Dialects}.
>  -fprofile-abs-path @gol
>  -fprofile-dir=@var{path}  -fprofile-generate  -fprofile-generate=@var{path} 
> @gol
>  -fprofile-note=@var{path}  -fprofile-update=@var{method} @gol
> --fprofile-filter-files=@var{regex}  -fprofile-exclude-files=@var{regex} @gol
> +-fprofile-filter-files=@var{regex}  -fprofile-exclude-files=@var{regex} 
> -fprofile-reproducibly @gol
>  -fsanitize=@var{style}  -fsanitize-recover  -fsanitize-recover=@var{style} 
> @gol
>  -fasan-shadow-offset=@var{number}  -fsanitize-sections=@var{s1},@var{s2},... 
> @gol
>  -fsanitize-undefined-trap-on-error  -fbounds-check @gol
> @@ -13324,6 +13324,16 @@ all the regular expressions (separated by a 
> semi-colon).
>  For example, @option{-fprofile-exclude-files=/usr/*} will prevent 
> instrumentation
>  of all files that are located in @file{/usr/} folder.
>  
> +Enable only profile based optimizations (PGO) that do not depend on order of 
> training runs.
> +
> +The PGO optimizations depend on a run-time profile that is always merged 
> after
> +each training run.  The merged profile can end up being different based on
> +sequence of these training runs.  Using the option, the compiler will use
> +only the profile information which cannot be changed by order of training 
> runs.
> +
> +@item -fprofile-reproducible
> +@opindex fprofile-reproducible

Isn't this backwards (i.e. @item first and descrition later).
I think this is bit hard to understand feature so we should explain it
bit more including the surprises one can run into WRT reproducibility.

-fprofile-reproducibility=
Control level of reproducibility of profile gathered by
@code{-fprofile-generate}.  This makes it possible to rebuild program
with same outcome which is useful, for example, for distribution
packages.

With @option{-fprofile-reproducibility=serial} the profile 

Re: [PATCH] i386: Also skip ENDBR32 at the target function entry

2020-02-13 Thread H.J. Lu
On Thu, Feb 13, 2020 at 5:10 AM Uros Bizjak  wrote:
>
> On Thu, Feb 13, 2020 at 1:42 PM H.J. Lu  wrote:
> >
> > On Thu, Feb 13, 2020 at 01:28:43PM +0100, Uros Bizjak wrote:
> > > On Thu, Feb 13, 2020 at 1:06 PM H.J. Lu  wrote:
> > > >
> > > > On Thu, Feb 13, 2020 at 09:29:32AM +0100, Uros Bizjak wrote:
> > > > > On Wed, Feb 12, 2020 at 1:21 PM H.J. Lu  wrote:
> > > > > >
> > > > > > On Mon, Feb 10, 2020 at 12:01 PM Uros Bizjak  
> > > > > > wrote:
> > > > > > >
> > > > > > > On Mon, Feb 10, 2020 at 8:53 PM H.J. Lu  
> > > > > > > wrote:
> > > > > > > >
> > > > > > > > On Mon, Feb 10, 2020 at 11:40 AM Uros Bizjak 
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > On Mon, Feb 10, 2020 at 8:22 PM H.J. Lu  
> > > > > > > > > wrote:
> > > > > > > > > >
> > > > > > > > > > Since nested function isn't only called directly, there is 
> > > > > > > > > > ENDBR32 at
> > > > > > > > > > function entry and we need to skip it for direct jump in 
> > > > > > > > > > trampoline.
> > > > > > > > >
> > > > > > > > > Hm, I'm afraid I don't understand this comment. Can you 
> > > > > > > > > perhaps rephrase it?
> > > > > > > > >
> > > > > > > >
> > > > > > > > ix86_trampoline_init has
> > > > > > > >
> > > > > > > >  /* Compute offset from the end of the jmp to the target 
> > > > > > > > function.
> > > > > > > >  In the case in which the trampoline stores the static 
> > > > > > > > chain on
> > > > > > > >  the stack, we need to skip the first insn which pushes 
> > > > > > > > the
> > > > > > > >  (call-saved) register static chain; this push is 1 
> > > > > > > > byte.  */
> > > > > > > >   offset += 5;
> > > > > > > >   disp = expand_binop (SImode, sub_optab, fnaddr,
> > > > > > > >plus_constant (Pmode, XEXP (m_tramp, 
> > > > > > > > 0),
> > > > > > > >   offset - (MEM_P 
> > > > > > > > (chain) ? 1 : 0)),
> > > > > > > >NULL_RTX, 1, OPTAB_DIRECT);
> > > > > > > >   emit_move_insn (mem, disp);
> > > > > > > >
> > > > > > > > Without CET, we got
> > > > > > > >
> > > > > > > > 011 :
> > > > > > > >   11: 56push   %esi
> > > > > > > >   12: 55push   %ebp   << trampoline 
> > > > > > > > jumps here.
> > > > > > > >   13: 89 e5mov%esp,%ebp
> > > > > > > >   15: 83 ec 08  sub$0x8,%esp
> > > > > > > >
> > > > > > > > With CET, if bar isn't only called directly, we got
> > > > > > > >
> > > > > > > > 0015 :
> > > > > > > >   15: f3 0f 1e fb  endbr32
> > > > > > > >   19: 56push   %esi
> > > > > > > >   1a: 55push   %ebp    trampoline 
> > > > > > > > jumps here.
> > > > > > > >   1b: 89 e5mov%esp,%ebp
> > > > > > > >   1d: 83 ec 08  sub$0x8,%esp
> > > > > > > >
> > > > > > > > We need to add 4 bytes for trampoline to skip endbr32.
> > > > > > > >
> > > > > > > > Here is the updated patch to check if nested function isn't only
> > > > > > > > called directly,
> > > > > > >
> > > > > > > Please figure out the final patch. I don't want to waste my time
> > > > > > > reviewing different version every half hour. Ping me in a couple 
> > > > > > > of
> > > > > > > days.
> > > > > >
> > > > > > This is the final version:
> > > > > >
> > > > > > https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00586.html
> > > > > >
> > > > > > You can try the testcase in the patch on any machine with CET 
> > > > > > binutils
> > > > > > since ENDBR32 is nop on none-CET machines.  Without this patch,
> > > > > > the test will fail.
> > > > >
> > > > > Please rephrase the comment. I don't understand what it tries to say.
> > > > >
> > > >
> > > > Here is the patch with updated comments.  OK for master and 8/9 
> > > > branches?
> > > >
> > > > Thanks.
> > > >
> > > > H.J.
> > > > ---
> > > > Since pass_insert_endbranch inserts ENDBR32 at entry of the target
> > > > function if it may be called indirectly, we also need to skip the
> > > > 4-byte ENDBR32 for direct jump in trampoline if it is the case.
> > >
> > > "
> > > Skip ENDBR32 at the entry if called indirectly.
> > > "
> > > pass_insert_endbranch inserts ENDBR at the entry of the target
> > > function.  We need to skip
> > > >
> > > > Tested on Linux/x86-64 CET machine with and without -m32.
> > > >
> > > > gcc/
> > > >
> > > > PR target/93656
> > > > * config/i386/i386.c (ix86_trampoline_init): Skip ENDBR32 at
> > > > the target function entry.
> > > >
> > > > gcc/testsuite/
> > > >
> > > > PR target/93656
> > > > * gcc.target/i386/pr93656.c: New test.
> > > > ---
> > > >  gcc/config/i386/i386.c  | 9 -
> > > >  gcc/testsuite/gcc.target/i386/pr93656.c | 4 
> > > >  2 files changed, 12 insertions(+), 1 deletion(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr93656.c
> > > >
> > > > diff --git 

Re: [PATCH] i386: Also skip ENDBR32 at the target function entry

2020-02-13 Thread Uros Bizjak
On Thu, Feb 13, 2020 at 1:42 PM H.J. Lu  wrote:
>
> On Thu, Feb 13, 2020 at 01:28:43PM +0100, Uros Bizjak wrote:
> > On Thu, Feb 13, 2020 at 1:06 PM H.J. Lu  wrote:
> > >
> > > On Thu, Feb 13, 2020 at 09:29:32AM +0100, Uros Bizjak wrote:
> > > > On Wed, Feb 12, 2020 at 1:21 PM H.J. Lu  wrote:
> > > > >
> > > > > On Mon, Feb 10, 2020 at 12:01 PM Uros Bizjak  
> > > > > wrote:
> > > > > >
> > > > > > On Mon, Feb 10, 2020 at 8:53 PM H.J. Lu  wrote:
> > > > > > >
> > > > > > > On Mon, Feb 10, 2020 at 11:40 AM Uros Bizjak  
> > > > > > > wrote:
> > > > > > > >
> > > > > > > > On Mon, Feb 10, 2020 at 8:22 PM H.J. Lu  
> > > > > > > > wrote:
> > > > > > > > >
> > > > > > > > > Since nested function isn't only called directly, there is 
> > > > > > > > > ENDBR32 at
> > > > > > > > > function entry and we need to skip it for direct jump in 
> > > > > > > > > trampoline.
> > > > > > > >
> > > > > > > > Hm, I'm afraid I don't understand this comment. Can you perhaps 
> > > > > > > > rephrase it?
> > > > > > > >
> > > > > > >
> > > > > > > ix86_trampoline_init has
> > > > > > >
> > > > > > >  /* Compute offset from the end of the jmp to the target 
> > > > > > > function.
> > > > > > >  In the case in which the trampoline stores the static 
> > > > > > > chain on
> > > > > > >  the stack, we need to skip the first insn which pushes 
> > > > > > > the
> > > > > > >  (call-saved) register static chain; this push is 1 byte. 
> > > > > > >  */
> > > > > > >   offset += 5;
> > > > > > >   disp = expand_binop (SImode, sub_optab, fnaddr,
> > > > > > >plus_constant (Pmode, XEXP (m_tramp, 
> > > > > > > 0),
> > > > > > >   offset - (MEM_P (chain) 
> > > > > > > ? 1 : 0)),
> > > > > > >NULL_RTX, 1, OPTAB_DIRECT);
> > > > > > >   emit_move_insn (mem, disp);
> > > > > > >
> > > > > > > Without CET, we got
> > > > > > >
> > > > > > > 011 :
> > > > > > >   11: 56push   %esi
> > > > > > >   12: 55push   %ebp   << trampoline jumps 
> > > > > > > here.
> > > > > > >   13: 89 e5mov%esp,%ebp
> > > > > > >   15: 83 ec 08  sub$0x8,%esp
> > > > > > >
> > > > > > > With CET, if bar isn't only called directly, we got
> > > > > > >
> > > > > > > 0015 :
> > > > > > >   15: f3 0f 1e fb  endbr32
> > > > > > >   19: 56push   %esi
> > > > > > >   1a: 55push   %ebp    trampoline 
> > > > > > > jumps here.
> > > > > > >   1b: 89 e5mov%esp,%ebp
> > > > > > >   1d: 83 ec 08  sub$0x8,%esp
> > > > > > >
> > > > > > > We need to add 4 bytes for trampoline to skip endbr32.
> > > > > > >
> > > > > > > Here is the updated patch to check if nested function isn't only
> > > > > > > called directly,
> > > > > >
> > > > > > Please figure out the final patch. I don't want to waste my time
> > > > > > reviewing different version every half hour. Ping me in a couple of
> > > > > > days.
> > > > >
> > > > > This is the final version:
> > > > >
> > > > > https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00586.html
> > > > >
> > > > > You can try the testcase in the patch on any machine with CET binutils
> > > > > since ENDBR32 is nop on none-CET machines.  Without this patch,
> > > > > the test will fail.
> > > >
> > > > Please rephrase the comment. I don't understand what it tries to say.
> > > >
> > >
> > > Here is the patch with updated comments.  OK for master and 8/9 branches?
> > >
> > > Thanks.
> > >
> > > H.J.
> > > ---
> > > Since pass_insert_endbranch inserts ENDBR32 at entry of the target
> > > function if it may be called indirectly, we also need to skip the
> > > 4-byte ENDBR32 for direct jump in trampoline if it is the case.
> >
> > "
> > Skip ENDBR32 at the entry if called indirectly.
> > "
> > pass_insert_endbranch inserts ENDBR at the entry of the target
> > function.  We need to skip
> > >
> > > Tested on Linux/x86-64 CET machine with and without -m32.
> > >
> > > gcc/
> > >
> > > PR target/93656
> > > * config/i386/i386.c (ix86_trampoline_init): Skip ENDBR32 at
> > > the target function entry.
> > >
> > > gcc/testsuite/
> > >
> > > PR target/93656
> > > * gcc.target/i386/pr93656.c: New test.
> > > ---
> > >  gcc/config/i386/i386.c  | 9 -
> > >  gcc/testsuite/gcc.target/i386/pr93656.c | 4 
> > >  2 files changed, 12 insertions(+), 1 deletion(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr93656.c
> > >
> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > index 44bc0e0176a..52640b74cc8 100644
> > > --- a/gcc/config/i386/i386.c
> > > +++ b/gcc/config/i386/i386.c
> > > @@ -16839,9 +16839,16 @@ ix86_trampoline_init (rtx m_tramp, tree fndecl, 
> > > rtx chain_value)
> > >  the stack, we need to skip the first insn which pushes the
> > >

PING^7: [PATCH] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move

2020-02-13 Thread H.J. Lu
On Thu, Feb 6, 2020 at 8:17 PM H.J. Lu  wrote:
>
> On Mon, Jan 27, 2020 at 10:59 AM H.J. Lu  wrote:
> >
> > On Mon, Jul 8, 2019 at 8:19 AM H.J. Lu  wrote:
> > >
> > > On Tue, Jun 18, 2019 at 8:59 AM H.J. Lu  wrote:
> > > >
> > > > On Fri, May 31, 2019 at 10:38 AM H.J. Lu  wrote:
> > > > >
> > > > > On Tue, May 21, 2019 at 2:43 PM H.J. Lu  wrote:
> > > > > >
> > > > > > On Fri, Feb 22, 2019 at 8:25 AM H.J. Lu  
> > > > > > wrote:
> > > > > > >
> > > > > > > Hi Jan, Uros,
> > > > > > >
> > > > > > > This patch fixes the wrong code bug:
> > > > > > >
> > > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89229
> > > > > > >
> > > > > > > Tested on AVX2 and AVX512 with and without --with-arch=native.
> > > > > > >
> > > > > > > OK for trunk?
> > > > > > >
> > > > > > > Thanks.
> > > > > > >
> > > > > > > H.J.
> > > > > > > --
> > > > > > > i386 backend has
> > > > > > >
> > > > > > > INT_MODE (OI, 32);
> > > > > > > INT_MODE (XI, 64);
> > > > > > >
> > > > > > > So, XI_MODE represents 64 INTEGER bytes = 64 * 8 = 512 bit 
> > > > > > > operation,
> > > > > > > in case of const_1, all 512 bits set.
> > > > > > >
> > > > > > > We can load zeros with narrower instruction, (e.g. 256 bit by 
> > > > > > > inherent
> > > > > > > zeroing of highpart in case of 128 bit xor), so TImode in this 
> > > > > > > case.
> > > > > > >
> > > > > > > Some targets prefer V4SF mode, so they will emit float xorps for 
> > > > > > > zeroing.
> > > > > > >
> > > > > > > sse.md has
> > > > > > >
> > > > > > > (define_insn "mov_internal"
> > > > > > >   [(set (match_operand:VMOVE 0 "nonimmediate_operand"
> > > > > > >  "=v,v ,v ,m")
> > > > > > > (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
> > > > > > >  " C,BC,vm,v"))]
> > > > > > > 
> > > > > > >   /* There is no evex-encoded vmov* for sizes smaller than 
> > > > > > > 64-bytes
> > > > > > >  in avx512f, so we need to use workarounds, to access sse 
> > > > > > > registers
> > > > > > >  16-31, which are evex-only. In avx512vl we don't need 
> > > > > > > workarounds.  */
> > > > > > >   if (TARGET_AVX512F &&  < 64 && !TARGET_AVX512VL
> > > > > > >   && (EXT_REX_SSE_REG_P (operands[0])
> > > > > > >   || EXT_REX_SSE_REG_P (operands[1])))
> > > > > > > {
> > > > > > >   if (memory_operand (operands[0], mode))
> > > > > > > {
> > > > > > >   if ( == 32)
> > > > > > > return "vextract64x4\t{$0x0, %g1, 
> > > > > > > %0|%0, %g1, 0x0}";
> > > > > > >   else if ( == 16)
> > > > > > > return "vextract32x4\t{$0x0, %g1, 
> > > > > > > %0|%0, %g1, 0x0}";
> > > > > > >   else
> > > > > > > gcc_unreachable ();
> > > > > > > }
> > > > > > > ...
> > > > > > >
> > > > > > > However, since ix86_hard_regno_mode_ok has
> > > > > > >
> > > > > > >  /* TODO check for QI/HI scalars.  */
> > > > > > >   /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
> > > > > > >   if (TARGET_AVX512VL
> > > > > > >   && (mode == OImode
> > > > > > >   || mode == TImode
> > > > > > >   || VALID_AVX256_REG_MODE (mode)
> > > > > > >   || VALID_AVX512VL_128_REG_MODE (mode)))
> > > > > > > return true;
> > > > > > >
> > > > > > >   /* xmm16-xmm31 are only available for AVX-512.  */
> > > > > > >   if (EXT_REX_SSE_REGNO_P (regno))
> > > > > > > return false;
> > > > > > >
> > > > > > >   if (TARGET_AVX512F &&  < 64 && !TARGET_AVX512VL
> > > > > > >   && (EXT_REX_SSE_REG_P (operands[0])
> > > > > > >   || EXT_REX_SSE_REG_P (operands[1])))
> > > > > > >
> > > > > > > is a dead code.
> > > > > > >
> > > > > > > Also for
> > > > > > >
> > > > > > > long long *p;
> > > > > > > volatile __m256i yy;
> > > > > > >
> > > > > > > void
> > > > > > > foo (void)
> > > > > > > {
> > > > > > >_mm256_store_epi64 (p, yy);
> > > > > > > }
> > > > > > >
> > > > > > > with AVX512VL, we should generate
> > > > > > >
> > > > > > > vmovdqa %ymm0, (%rax)
> > > > > > >
> > > > > > > not
> > > > > > >
> > > > > > > vmovdqa64   %ymm0, (%rax)
> > > > > > >
> > > > > > > All TYPE_SSEMOV vector moves are consolidated to 
> > > > > > > ix86_output_ssemov:
> > > > > > >
> > > > > > > 1. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE/AVX 
> > > > > > > vector
> > > > > > > moves will be generated.
> > > > > > > 2. If xmm16-xmm31/ymm16-ymm31 registers are used:
> > > > > > >a. With AVX512VL, AVX512VL vector moves will be generated.
> > > > > > >b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to 
> > > > > > > register
> > > > > > >   move will be done with zmm register move.
> > > > > > >
> > > > > > > ext_sse_reg_operand is removed since it is no longer needed.
> > > > > > >
> > > > > > > Tested on AVX2 and AVX512 with and without --with-arch=native.
> > > > > > >
> > > > > > > 

Re: [PATCH] i386: Also skip ENDBR32 at the target function entry

2020-02-13 Thread H.J. Lu
On Thu, Feb 13, 2020 at 01:28:43PM +0100, Uros Bizjak wrote:
> On Thu, Feb 13, 2020 at 1:06 PM H.J. Lu  wrote:
> >
> > On Thu, Feb 13, 2020 at 09:29:32AM +0100, Uros Bizjak wrote:
> > > On Wed, Feb 12, 2020 at 1:21 PM H.J. Lu  wrote:
> > > >
> > > > On Mon, Feb 10, 2020 at 12:01 PM Uros Bizjak  wrote:
> > > > >
> > > > > On Mon, Feb 10, 2020 at 8:53 PM H.J. Lu  wrote:
> > > > > >
> > > > > > On Mon, Feb 10, 2020 at 11:40 AM Uros Bizjak  
> > > > > > wrote:
> > > > > > >
> > > > > > > On Mon, Feb 10, 2020 at 8:22 PM H.J. Lu  
> > > > > > > wrote:
> > > > > > > >
> > > > > > > > Since nested function isn't only called directly, there is 
> > > > > > > > ENDBR32 at
> > > > > > > > function entry and we need to skip it for direct jump in 
> > > > > > > > trampoline.
> > > > > > >
> > > > > > > Hm, I'm afraid I don't understand this comment. Can you perhaps 
> > > > > > > rephrase it?
> > > > > > >
> > > > > >
> > > > > > ix86_trampoline_init has
> > > > > >
> > > > > >  /* Compute offset from the end of the jmp to the target 
> > > > > > function.
> > > > > >  In the case in which the trampoline stores the static 
> > > > > > chain on
> > > > > >  the stack, we need to skip the first insn which pushes the
> > > > > >  (call-saved) register static chain; this push is 1 byte.  
> > > > > > */
> > > > > >   offset += 5;
> > > > > >   disp = expand_binop (SImode, sub_optab, fnaddr,
> > > > > >plus_constant (Pmode, XEXP (m_tramp, 0),
> > > > > >   offset - (MEM_P (chain) ? 
> > > > > > 1 : 0)),
> > > > > >NULL_RTX, 1, OPTAB_DIRECT);
> > > > > >   emit_move_insn (mem, disp);
> > > > > >
> > > > > > Without CET, we got
> > > > > >
> > > > > > 011 :
> > > > > >   11: 56push   %esi
> > > > > >   12: 55push   %ebp   << trampoline jumps 
> > > > > > here.
> > > > > >   13: 89 e5mov%esp,%ebp
> > > > > >   15: 83 ec 08  sub$0x8,%esp
> > > > > >
> > > > > > With CET, if bar isn't only called directly, we got
> > > > > >
> > > > > > 0015 :
> > > > > >   15: f3 0f 1e fb  endbr32
> > > > > >   19: 56push   %esi
> > > > > >   1a: 55push   %ebp    trampoline jumps 
> > > > > > here.
> > > > > >   1b: 89 e5mov%esp,%ebp
> > > > > >   1d: 83 ec 08  sub$0x8,%esp
> > > > > >
> > > > > > We need to add 4 bytes for trampoline to skip endbr32.
> > > > > >
> > > > > > Here is the updated patch to check if nested function isn't only
> > > > > > called directly,
> > > > >
> > > > > Please figure out the final patch. I don't want to waste my time
> > > > > reviewing different version every half hour. Ping me in a couple of
> > > > > days.
> > > >
> > > > This is the final version:
> > > >
> > > > https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00586.html
> > > >
> > > > You can try the testcase in the patch on any machine with CET binutils
> > > > since ENDBR32 is nop on none-CET machines.  Without this patch,
> > > > the test will fail.
> > >
> > > Please rephrase the comment. I don't understand what it tries to say.
> > >
> >
> > Here is the patch with updated comments.  OK for master and 8/9 branches?
> >
> > Thanks.
> >
> > H.J.
> > ---
> > Since pass_insert_endbranch inserts ENDBR32 at entry of the target
> > function if it may be called indirectly, we also need to skip the
> > 4-byte ENDBR32 for direct jump in trampoline if it is the case.
> 
> "
> Skip ENDBR32 at the entry if called indirectly.
> "
> pass_insert_endbranch inserts ENDBR at the entry of the target
> function.  We need to skip
> >
> > Tested on Linux/x86-64 CET machine with and without -m32.
> >
> > gcc/
> >
> > PR target/93656
> > * config/i386/i386.c (ix86_trampoline_init): Skip ENDBR32 at
> > the target function entry.
> >
> > gcc/testsuite/
> >
> > PR target/93656
> > * gcc.target/i386/pr93656.c: New test.
> > ---
> >  gcc/config/i386/i386.c  | 9 -
> >  gcc/testsuite/gcc.target/i386/pr93656.c | 4 
> >  2 files changed, 12 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr93656.c
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index 44bc0e0176a..52640b74cc8 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -16839,9 +16839,16 @@ ix86_trampoline_init (rtx m_tramp, tree fndecl, 
> > rtx chain_value)
> >  the stack, we need to skip the first insn which pushes the
> >  (call-saved) register static chain; this push is 1 byte.  */
> >offset += 5;
> > +  int skip = MEM_P (chain) ? 1 : 0;
> 
> offset += 5 - MEM_P (chain) ? 1: 0;

This is done on purpose sinc there is

  gcc_assert (offset <= TRAMPOLINE_SIZE);

after it.  We shouldn't update offset.

> 
> > +  /* Since 

Re: [PATCH] i386: Also skip ENDBR32 at the target function entry

2020-02-13 Thread Uros Bizjak
On Thu, Feb 13, 2020 at 1:06 PM H.J. Lu  wrote:
>
> On Thu, Feb 13, 2020 at 09:29:32AM +0100, Uros Bizjak wrote:
> > On Wed, Feb 12, 2020 at 1:21 PM H.J. Lu  wrote:
> > >
> > > On Mon, Feb 10, 2020 at 12:01 PM Uros Bizjak  wrote:
> > > >
> > > > On Mon, Feb 10, 2020 at 8:53 PM H.J. Lu  wrote:
> > > > >
> > > > > On Mon, Feb 10, 2020 at 11:40 AM Uros Bizjak  
> > > > > wrote:
> > > > > >
> > > > > > On Mon, Feb 10, 2020 at 8:22 PM H.J. Lu  wrote:
> > > > > > >
> > > > > > > Since nested function isn't only called directly, there is 
> > > > > > > ENDBR32 at
> > > > > > > function entry and we need to skip it for direct jump in 
> > > > > > > trampoline.
> > > > > >
> > > > > > Hm, I'm afraid I don't understand this comment. Can you perhaps 
> > > > > > rephrase it?
> > > > > >
> > > > >
> > > > > ix86_trampoline_init has
> > > > >
> > > > >  /* Compute offset from the end of the jmp to the target function.
> > > > >  In the case in which the trampoline stores the static chain 
> > > > > on
> > > > >  the stack, we need to skip the first insn which pushes the
> > > > >  (call-saved) register static chain; this push is 1 byte.  */
> > > > >   offset += 5;
> > > > >   disp = expand_binop (SImode, sub_optab, fnaddr,
> > > > >plus_constant (Pmode, XEXP (m_tramp, 0),
> > > > >   offset - (MEM_P (chain) ? 1 
> > > > > : 0)),
> > > > >NULL_RTX, 1, OPTAB_DIRECT);
> > > > >   emit_move_insn (mem, disp);
> > > > >
> > > > > Without CET, we got
> > > > >
> > > > > 011 :
> > > > >   11: 56push   %esi
> > > > >   12: 55push   %ebp   << trampoline jumps 
> > > > > here.
> > > > >   13: 89 e5mov%esp,%ebp
> > > > >   15: 83 ec 08  sub$0x8,%esp
> > > > >
> > > > > With CET, if bar isn't only called directly, we got
> > > > >
> > > > > 0015 :
> > > > >   15: f3 0f 1e fb  endbr32
> > > > >   19: 56push   %esi
> > > > >   1a: 55push   %ebp    trampoline jumps 
> > > > > here.
> > > > >   1b: 89 e5mov%esp,%ebp
> > > > >   1d: 83 ec 08  sub$0x8,%esp
> > > > >
> > > > > We need to add 4 bytes for trampoline to skip endbr32.
> > > > >
> > > > > Here is the updated patch to check if nested function isn't only
> > > > > called directly,
> > > >
> > > > Please figure out the final patch. I don't want to waste my time
> > > > reviewing different version every half hour. Ping me in a couple of
> > > > days.
> > >
> > > This is the final version:
> > >
> > > https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00586.html
> > >
> > > You can try the testcase in the patch on any machine with CET binutils
> > > since ENDBR32 is nop on none-CET machines.  Without this patch,
> > > the test will fail.
> >
> > Please rephrase the comment. I don't understand what it tries to say.
> >
>
> Here is the patch with updated comments.  OK for master and 8/9 branches?
>
> Thanks.
>
> H.J.
> ---
> Since pass_insert_endbranch inserts ENDBR32 at entry of the target
> function if it may be called indirectly, we also need to skip the
> 4-byte ENDBR32 for direct jump in trampoline if it is the case.

"
Skip ENDBR32 at the entry if called indirectly.
"
pass_insert_endbranch inserts ENDBR at the entry of the target
function.  We need to skip
>
> Tested on Linux/x86-64 CET machine with and without -m32.
>
> gcc/
>
> PR target/93656
> * config/i386/i386.c (ix86_trampoline_init): Skip ENDBR32 at
> the target function entry.
>
> gcc/testsuite/
>
> PR target/93656
> * gcc.target/i386/pr93656.c: New test.
> ---
>  gcc/config/i386/i386.c  | 9 -
>  gcc/testsuite/gcc.target/i386/pr93656.c | 4 
>  2 files changed, 12 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr93656.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 44bc0e0176a..52640b74cc8 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -16839,9 +16839,16 @@ ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx 
> chain_value)
>  the stack, we need to skip the first insn which pushes the
>  (call-saved) register static chain; this push is 1 byte.  */
>offset += 5;
> +  int skip = MEM_P (chain) ? 1 : 0;

offset += 5 - MEM_P (chain) ? 1: 0;

> +  /* Since pass_insert_endbranch inserts ENDBR32 at entry of the
> +target function if it may be called indirectly, we also need
> +to skip the 4-byte ENDBR32 if it is the case.  */

/* Skip ENDBR32 at the entry of the target function when called indirectly.  */

> +  if (need_endbr
> + && !cgraph_node::get (fndecl)->only_called_directly_p ())
> +   skip += 4;

offset += 4;

>disp = expand_binop (SImode, sub_optab, fnaddr,
>

Re: [PATCH 1/2] libstdc++: Move some ranges algos to a new header

2020-02-13 Thread Jonathan Wakely

On 12/02/20 15:41 -0500, Patrick Palka wrote:

This roughly mirrors the existing split between  and
.  The ranges [specialized.algorithms] will use this new
header to avoid including all of of .

libstdc++-v3/ChangeLog:

* include/Makefile.am: Add bits/ranges_algobase.h
* include/Makefile.in: Regenerate.
* bits/ranges_algo.h: Include  and refactor
existing #includes.
(__detail::__is_normal_iterator, __detail::is_reverse_iterator,
__detail::__is_move_iterator, copy_result, move_result,
__equal, equal, copy_result, move_result, move_backward_result,
copy_backward_result, __copy_or_move_backward, __copy_or_move, copy,
move, copy_backward, move_backward, copy_n_result, copy_n, fill_n,
fill): Split out into ...
* bits/range_algobase.h: ... this new header.


OK for master.



[PATCH] i386: Also skip ENDBR32 at the target function entry

2020-02-13 Thread H.J. Lu
On Thu, Feb 13, 2020 at 09:29:32AM +0100, Uros Bizjak wrote:
> On Wed, Feb 12, 2020 at 1:21 PM H.J. Lu  wrote:
> >
> > On Mon, Feb 10, 2020 at 12:01 PM Uros Bizjak  wrote:
> > >
> > > On Mon, Feb 10, 2020 at 8:53 PM H.J. Lu  wrote:
> > > >
> > > > On Mon, Feb 10, 2020 at 11:40 AM Uros Bizjak  wrote:
> > > > >
> > > > > On Mon, Feb 10, 2020 at 8:22 PM H.J. Lu  wrote:
> > > > > >
> > > > > > Since nested function isn't only called directly, there is ENDBR32 
> > > > > > at
> > > > > > function entry and we need to skip it for direct jump in trampoline.
> > > > >
> > > > > Hm, I'm afraid I don't understand this comment. Can you perhaps 
> > > > > rephrase it?
> > > > >
> > > >
> > > > ix86_trampoline_init has
> > > >
> > > >  /* Compute offset from the end of the jmp to the target function.
> > > >  In the case in which the trampoline stores the static chain on
> > > >  the stack, we need to skip the first insn which pushes the
> > > >  (call-saved) register static chain; this push is 1 byte.  */
> > > >   offset += 5;
> > > >   disp = expand_binop (SImode, sub_optab, fnaddr,
> > > >plus_constant (Pmode, XEXP (m_tramp, 0),
> > > >   offset - (MEM_P (chain) ? 1 : 
> > > > 0)),
> > > >NULL_RTX, 1, OPTAB_DIRECT);
> > > >   emit_move_insn (mem, disp);
> > > >
> > > > Without CET, we got
> > > >
> > > > 011 :
> > > >   11: 56push   %esi
> > > >   12: 55push   %ebp   << trampoline jumps here.
> > > >   13: 89 e5mov%esp,%ebp
> > > >   15: 83 ec 08  sub$0x8,%esp
> > > >
> > > > With CET, if bar isn't only called directly, we got
> > > >
> > > > 0015 :
> > > >   15: f3 0f 1e fb  endbr32
> > > >   19: 56push   %esi
> > > >   1a: 55push   %ebp    trampoline jumps 
> > > > here.
> > > >   1b: 89 e5mov%esp,%ebp
> > > >   1d: 83 ec 08  sub$0x8,%esp
> > > >
> > > > We need to add 4 bytes for trampoline to skip endbr32.
> > > >
> > > > Here is the updated patch to check if nested function isn't only
> > > > called directly,
> > >
> > > Please figure out the final patch. I don't want to waste my time
> > > reviewing different version every half hour. Ping me in a couple of
> > > days.
> >
> > This is the final version:
> >
> > https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00586.html
> >
> > You can try the testcase in the patch on any machine with CET binutils
> > since ENDBR32 is nop on none-CET machines.  Without this patch,
> > the test will fail.
> 
> Please rephrase the comment. I don't understand what it tries to say.
> 

Here is the patch with updated comments.  OK for master and 8/9 branches?

Thanks.

H.J.
---
Since pass_insert_endbranch inserts ENDBR32 at entry of the target
function if it may be called indirectly, we also need to skip the
4-byte ENDBR32 for direct jump in trampoline if it is the case.

Tested on Linux/x86-64 CET machine with and without -m32.

gcc/

PR target/93656
* config/i386/i386.c (ix86_trampoline_init): Skip ENDBR32 at
the target function entry.

gcc/testsuite/

PR target/93656
* gcc.target/i386/pr93656.c: New test.
---
 gcc/config/i386/i386.c  | 9 -
 gcc/testsuite/gcc.target/i386/pr93656.c | 4 
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr93656.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 44bc0e0176a..52640b74cc8 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -16839,9 +16839,16 @@ ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
 the stack, we need to skip the first insn which pushes the
 (call-saved) register static chain; this push is 1 byte.  */
   offset += 5;
+  int skip = MEM_P (chain) ? 1 : 0;
+  /* Since pass_insert_endbranch inserts ENDBR32 at entry of the
+target function if it may be called indirectly, we also need
+to skip the 4-byte ENDBR32 if it is the case.  */
+  if (need_endbr
+ && !cgraph_node::get (fndecl)->only_called_directly_p ())
+   skip += 4;
   disp = expand_binop (SImode, sub_optab, fnaddr,
   plus_constant (Pmode, XEXP (m_tramp, 0),
- offset - (MEM_P (chain) ? 1 : 0)),
+ offset - skip),
   NULL_RTX, 1, OPTAB_DIRECT);
   emit_move_insn (mem, disp);
 }
diff --git a/gcc/testsuite/gcc.target/i386/pr93656.c 
b/gcc/testsuite/gcc.target/i386/pr93656.c
new file mode 100644
index 000..f0ac8c8edaa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr93656.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { ia32 && cet } } } */
+/* { dg-options "-O2 -fcf-protection" } */
+
+#include 

Re: [ARM] Fix -mpure-code for v6m

2020-02-13 Thread Christophe Lyon
On Mon, 10 Feb 2020 at 17:45, Richard Earnshaw (lists)
 wrote:
>
> On 10/02/2020 09:27, Christophe Lyon wrote:
> > On Fri, 7 Feb 2020 at 17:55, Richard Earnshaw (lists)
> >  wrote:
> >>
> >> On 07/02/2020 16:43, Christophe Lyon wrote:
> >>> On Fri, 7 Feb 2020 at 14:49, Richard Earnshaw (lists)
> >>>  wrote:
> 
>  On 07/02/2020 13:19, Christophe Lyon wrote:
> > When running the testsuite with -fdisable-rtl-fwprop2 and -mpure-code
> > for cortex-m0, I noticed that some testcases were failing because we
> > still generate "ldr rX, .LCY", which is what we want to avoid with
> > -mpure-code. This is latent since a recent improvement in fwprop
> > (PR88833).
> >
> > In this patch I change the thumb1_movsi_insn pattern so that it emits
> > the desired instruction sequence when arm_disable_literal_pool is set.
> >
> > I tried to add a define_split instead, but couldn't make it work: the
> > compiler then complains it cannot split the instruction, while my new
> > define_split accepts the same operand types as thumb1_movsi_insn:
> >
> > c-c++-common/torture/complex-sign-mixed-add.c:41:1: error: could not 
> > split insn
> > (insn 2989 425 4844 (set (reg/f:SI 3 r3 [1342])
> >(symbol_ref/u:SI ("*.LC6") [flags 0x2])) 836 
> > {*thumb1_movsi_insn}
> > (expr_list:REG_EQUIV (symbol_ref/u:SI ("*.LC6") [flags 0x2])
> >(nil)))
> > during RTL pass: final
> >
> > (define_split
> >  [(set (match_operand:SI 0 "register_operand" "")
> >(match_operand:SI 1 "general_operand" ""))]
> >  "TARGET_THUMB1
> >   && arm_disable_literal_pool
> >   && GET_CODE (operands[1]) == SYMBOL_REF"
> >  [(clobber (const_int 0))]
> >  "
> >gen_thumb1_movsi_symbol_ref(operands[0], operands[1]);
> >DONE;
> >  "
> > )
> > and I put this in thumb1_movsi_insn:
> > if (GET_CODE (operands[1]) == SYMBOL_REF && arm_disable_literal_pool)
> >  {
> >return \"#\";
> >  }
> >  return \"ldr\\t%0, %1\";
> >
> > 2020-02-07  Christophe Lyon  
> >
> >* config/arm/thumb1.md (thumb1_movsi_insn): Fix ldr 
> > alternative to
> >work with -mpure-code.
> >
> 
>  +case 0:
>  +case 1:
>  +  return \"movs%0, %1\";
>  +case 2:
>  +  return \"movw%0, %1\";
> 
>  This is OK, but please replace the hard tab in the strings for MOVS/MOVW
>  with \\t.
> 
> >>>
> >>> OK that was merely a cut & paste from the existing code.
> >>>
> >>> I'm concerned that the length attribute is becoming wrong with my
> >>> patch, isn't this a problem?
> >>>
> >>
> >> Potentially yes.  The branch range code needs this to handle overly long
> >> jumps correctly.
> >>
> >
> > Do you mean that the probability of problems due to that shortcoming
> > is low enough that I can commit my patch?
>
> It's hard to say that.  The number of instructions you generate for this
> is significant, so it likely will throw off the calculations and
> somebody will get unlucky.  On the other hand, I don't think we should
> pessimize code for the non-pure-code variants by inflating the size for
> this unconditionally.
>
> It seems there are two ways to fix this.
>
> 1) create a new alternative for the pure-code variant with its own
> length attribute, then only enable that for the case you need.  That
> would also allow you to go back to the templated asm format.
> 2) use a level of indirection to calculate the length - I haven't tried
> this, but it would be something like:
>
>   - create a new attribute - lets say default_length
>   - rename length for this pattern to default_length
>   - create another new attribute - lets say purecode_length, add lengths
> for each alternative but adjusted for the purecode variant.
>   - make the length attribute for this pattern select based on the
> disable_literal_pool variable between the default_length and
> purecode_length attributes.
>

Hi Richard,

Thanks for the suggestions.  I've implemented option (1) above, does
it match what you had in mind?

Tested on arm-eabi, with -mpure-code forced, no regression. Manually
checked that the expected sequence is generated with
-fdisable-rtl-fwprop2.

Thanks,

Christophe

> R.
>
> >
> > Thanks,
> >
> > Christophe
> >
> >> R.
> >>
> >>> Thanks,
> >>>
> >>> Christophe
> >>>
>  R.
> >>
>
[ARM] Fix -mpure-code for v6m

When running the testsuite with -fdisable-rtl-fwprop2 and -mpure-code
for cortex-m0, I noticed that some testcases were failing because we
still generate "ldr rX, .LCY", which is what we want to avoid with
-mpure-code. This is latent since a recent improvement in fwprop
(PR88833).

In this patch I change the thumb1_movsi_insn pattern so that it emits
the desired instruction sequence when arm_disable_literal_pool is set.

To achieve that, 

Re: Patch ping

2020-02-13 Thread Jakub Jelinek
On Wed, Feb 12, 2020 at 02:39:05PM -0700, Jeff Law wrote:
> On Mon, 2020-02-10 at 10:24 +0100, Jakub Jelinek wrote:
> > Hi!
> > 
> > I'd like to ping a couple of patches:
> > 
> > PR target/91913 - arm movsi + cmpsi -> movsi_compare0 peephole2 ICE fix
> >https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00010.html
> Letting the ARM guys deal with this.

Yes, that is resolved now (Richard E. committed his patch and I've
committed the testcase).

> > PR preprocessor/92319 - partially implement P1042R1: __VA_OPT__ wording 
> > clarifications
> >https://gcc.gnu.org/ml/gcc-patches/2020-01/msg02104.html
> Jason for this one.

Of course; I just chose to send a ping for all my pending patches and
add to To: all relevant maintainers.

> > PR target/93069 - avx512* rejects-valid fix (rejected by assembler)
> >http://gcc.gnu.org/ml/gcc-patches/2019-12/msg01606.html
> This is in my queue :-)

Ok.

> > PR tree-optimization/92868 - compute_objsize/gimple_call_alloc_size
> >  /maybe_warn_overflow fixes
> >http://gcc.gnu.org/ml/gcc-patches/2019-12/msg01164.html
> Martin's patch should have addressed all the issues and should include
> your tests (tweaked, but supposed to be equivalent).

No, this is something different, this isn't what has been covered by the
testcases, but something found by code inspection, mainly inconsistencies
in the APIs, e.g. the ranges represented as sizetype most of the time,
but with one exception where it could be some other type (wider or
narrower), or sometimes the range being incorrect (if there is possible
overflow and we punt, we didn't change the ranges effectively to VARYING,
but just capped the maximum), or INTEGER_CSTs compared by pointer equality
rather than operand_equal_p.

Jakub



Re: [PATCH] openmp: ignore nowait if async execution is unsupported [PR93481]

2020-02-13 Thread Harwath, Frederik
Hi Jakub,

On 13.02.20 09:30, Jakub Jelinek wrote:
> On Thu, Feb 13, 2020 at 09:04:36AM +0100, Harwath, Frederik wrote:
>> --- a/libgomp/target.c
>> +++ b/libgomp/target.c
>> @@ -2022,6 +2022,16 @@ GOMP_target (int device, void (*fn) (void *), const 
>> void *unused,
>>gomp_unmap_vars (tgt_vars, true);
>>  }
>>  
>> +static unsigned int
> 
> Add inline?
> 

Added.

>> @@ -2257,6 +2269,8 @@ GOMP_target_update_ext (int device, size_t mapnum, 
>> void **hostaddrs,
>>  {
>>struct gomp_device_descr *devicep = resolve_device (device);
>>  
>> +  flags = clear_unsupported_flags (devicep, flags);

>> @@ -2398,6 +2412,8 @@ GOMP_target_enter_exit_data (int device, size_t 
>> mapnum, void **hostaddrs,
>>  {
>>struct gomp_device_descr *devicep = resolve_device (device);
>>  
>> +  flags = clear_unsupported_flags (devicep, flags);

> I don't see why you need to do the above two.  GOMP_TARGET_TASK_DATA
> is done on the host side, async_run callback isn't called in that case
> and while we create a task, all we do is wait for the (host) dependencies
> in there and then perform the data transfer we need.
> I think it is perfectly fine to ignore nowait on target but honor it
> on target update or target {enter,exit} data.

I see. Removed.


> Otherwise LGTM.

Thanks for the review! I have committed the patch with those changes. I forgot 
to include the ChangeLog
entry which I had to add in a separate commit. Sorry for that! It seems that I 
have to adapt my workflow -
perhaps some pre-push hook ;-).

Best regards,
Frederik

From 001ab12e620c6f117b2e93c77d188bd62fe7ba03 Mon Sep 17 00:00:00 2001
From: Frederik Harwath 
Date: Thu, 13 Feb 2020 07:30:16 +0100
Subject: [PATCH 1/2] openmp: ignore nowait if async execution is unsupported
 [PR93481]

An OpenMP "nowait" clause on a target construct currently leads to
a call to GOMP_OFFLOAD_async_run in the plugin that is used for
offloading at execution time. The nvptx plugin contains only a stub
of this function that always produces a fatal error if called.

This commit changes the "nowait" implementation to ignore the clause
if the executing device's plugin does not implement GOMP_OFFLOAD_async_run.
The stub in the nvptx plugin is removed which effectively means that
programs containing "nowait" can now be executed with nvptx offloading
as if the clause had not been used.
This behavior is consistent with the OpenMP specification which says that
"[...] execution of the target task *may* be deferred" (emphasis added),
cf. OpenMP 5.0, page 172.

libgomp/

	* plugin/plugin-nvptx.c: Remove GOMP_OFFLOAD_async_run stub.
	* target.c (gomp_load_plugin_for_device): Make "async_run" loading
	optional.
	(gomp_target_task_fn): Assert "devicep->async_run_func".
	(clear_unsupported_flags): New function to remove unsupported flags
	(right now only GOMP_TARGET_FLAG_NOWAIT) that can be be ignored.
	(GOMP_target_ext): Apply clear_unsupported_flags to flags.
	* testsuite/libgomp.c/target-33.c:
	Remove xfail for offload_target_nvptx.
	* testsuite/libgomp.c/target-34.c: Likewise.
---
 libgomp/plugin/plugin-nvptx.c   |  7 +--
 libgomp/target.c| 15 ++-
 libgomp/testsuite/libgomp.c/target-33.c |  3 ---
 libgomp/testsuite/libgomp.c/target-34.c |  3 ---
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 6033c71a9db..ec103a2f40b 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1931,9 +1931,4 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
   nvptx_stacks_free (stacks, teams * threads);
 }
 
-void
-GOMP_OFFLOAD_async_run (int ord, void *tgt_fn, void *tgt_vars, void **args,
-			void *async_data)
-{
-  GOMP_PLUGIN_fatal ("GOMP_OFFLOAD_async_run unimplemented");
-}
+/* TODO: Implement GOMP_OFFLOAD_async_run. */
diff --git a/libgomp/target.c b/libgomp/target.c
index 3df007283f4..0ff727de47d 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -2022,6 +2022,16 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
   gomp_unmap_vars (tgt_vars, true);
 }
 
+static inline unsigned int
+clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
+{
+  /* If we cannot run asynchronously, simply ignore nowait.  */
+  if (devicep != NULL && devicep->async_run_func == NULL)
+flags &= ~GOMP_TARGET_FLAG_NOWAIT;
+
+  return flags;
+}
+
 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
and several arguments have been added:
FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
@@ -2054,6 +2064,8 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
   size_t tgt_align = 0, tgt_size = 0;
   bool fpc_done = false;
 
+  flags = clear_unsupported_flags (devicep, flags);
+
   if (flags & GOMP_TARGET_FLAG_NOWAIT)
 {
   struct gomp_thread *thr = gomp_thread ();
@@ -2524,6 +2536,7 @@ gomp_target_task_fn (void *data)
 	}
   

Re: [PATCH]Several intrinsic macros lack a closing parenthesis[PR93274]

2020-02-13 Thread Hongtao Liu
On Thu, Feb 13, 2020 at 5:12 PM Uros Bizjak  wrote:
>
> On Thu, Feb 13, 2020 at 9:53 AM Jakub Jelinek  wrote:
> >
> > On Thu, Feb 13, 2020 at 09:39:05AM +0100, Uros Bizjak wrote:
> > > > Changelog
> > > > gcc/
> > > >* config/i386/avx512vbmi2intrin.h
> > > >(_mm512_[,mask_,maskz_]shrdi_epi16,
> > > >_mm512_[,mask_,maskz_]shrdi_epi32,
> > > >_m512_[,mask_,maskz_]shrdi_epi64,
> > > >_mm512_[,mask_,maskz_]shldi_epi16,
> > > >_mm512_[,mask_,maskz_]shldi_epi32,
> > > >_m512_[,mask_,maskz_]shldi_epi64): Fix typo of lacking a
> > > >closing parenthesis.
> > > >* config/i386/avx512vbmi2vlintrin.h
> > > >(_mm256_[,mask_,maskz_]shrdi_epi16,
> > > >_mm256_[,mask_,maskz_]shrdi_epi32,
> > > >_m256_[,mask_,maskz_]shrdi_epi64,
> > > >_mm_[,mask_,maskz_]shrdi_epi16,
> > > >_mm_[,mask_,maskz_]shrdi_epi32,
> > > >_mm_[,mask_,maskz_]shrdi_epi64,
> > > >_mm256_[,mask_,maskz_]shldi_epi16,
> > > >_mm256_[,mask_,maskz_]shldi_epi32,
> > > >_m256_[,mask_,maskz_]shldi_epi64,
> > > >_mm_[,mask_,maskz_]shldi_epi16,
> > > >_mm_[,mask_,maskz_]shldi_epi32,
> > > >_mm_[,mask_,maskz_]shldi_epi64): Ditto.
> > > >
> > > > gcc/testsuite/
> > > >* gcc.target/i386/avx512vbmi2-vpshld-1.c: New test.
> > > >* gcc.target/i386/avx512vbmi2-vpshld-O0-1.c: Ditto.
> > > >* gcc.target/i386/avx512vbmi2-vpshrd-1.c: Ditto.
> > > >* gcc.target/i386/avx512vbmi2-vpshrd-O0-1.c: Ditto.
> > > >* gcc.target/i386/avx512vl-vpshld-O0-1.c: Ditto.
> > > >* gcc.target/i386/avx512vl-vpshrd-O0-1.c: Ditto.
> > >
> > > This is obvious patch, so OK for mainline and backports.
> >
> > The header changes sure, but for the testsuite, the standard way
> > would be to have it covered in the standard tests we have for this.
> > I think that is gcc.target/i386/sse-{13,14,22a,23}.c, so it would be worth
> > trying to figure out why it hasn't caught that.
>
> Indeed. It looks that these macros are not listed in sse-14.c, which
> would catch the problem. So, there is no need for new -O0 tests,
> please add missing functions to sse-14.c and sse-22.c testcases. I was
> also surprised that no testsuite coverage for vbmi2 functions was
> added at submission.
>
Yes, i saw that, thanks.
> Uros.
>
> > And, I don't think we allow any wildcards etc. (and [,whatever,whateverelse]
> > isn't even one, neither regexp nor shell wildcard) in the names of functions
> > changed, they can appear in the description text, but for the names of
> > macros one needs to list them all expanded, people do grep for those.
> >
> > Jakub
> >



-- 
BR,
Hongtao


Re: [PATCH] i386: Fix up _mm*_mask_popcnt_epi* [PR93696]

2020-02-13 Thread Uros Bizjak
On Thu, Feb 13, 2020 at 9:47 AM Jakub Jelinek  wrote:
>
> Hi!
>
> As mentioned in the PR and as
> https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mask_popcnt_epi
> also documents, _mm*_popcnt_epi* intrinsics are consistent with all other
> unary AVX512* intrinsics regarding arguments, i.e. the
> _mm*_whatever has just single argument (called a in the docs, and __A in the
> GCC headers),
> _mm*_mask_whatever has 3 arguments (called src, k, a in the docs and
> _W, __U, __A in GCC headers) and
> _mm*_maskz_whatever 2 arguments (called k, a in the docs and __U, __A in GCC
> headers).  Unfortunately, whomever implemented the _mm*_popcnt_epi*
> intrinsics got it wrong for the _mm*_mask_popcnt_epi* ones, calling the
> args __A, __U, __B and not passing them in the canonical order to the
> builtins, making it API incompatible with ICC as well as clang (tested on
> godbolts clang 7/8/9/trunk and ICC 19.0.{0,1}, older clang/ICC don't
> understand those, so it isn't that it used to be broken even in other
> compilers and got changed afterwards).
>
> The following patch fixes it, bootstrapped/regtested on x86_64-linux and
> i686-linux, ok for trunk?  Not really sure about release branches, perhaps
> with big fat warning in gcc-{8,9}/changes.html ?

OK for trunk and gcc-9 branch, so one can say that gcc version > 9.2 is OK.

What to do with gcc-8 branch? We had the same situation in the past,
we silently fixed it, so I think we should fix it on gcc-8 branch,
too. I guess that users understand and assume by now that these
headers are in some flux as far as new ISAs are concerned, so the
latest branch release should be used when new(ish) ISAs are used.

Uros.

> 2020-02-13  Jakub Jelinek  
>
> PR target/93696
> * config/i386/avx512bitalgintrin.h (_mm512_mask_popcnt_epi8,
> _mm512_mask_popcnt_epi16, _mm256_mask_popcnt_epi8,
> _mm256_mask_popcnt_epi16, _mm_mask_popcnt_epi8,
> _mm_mask_popcnt_epi16): Rename __B argument to __A and __A to __W,
> pass __A to the builtin followed by __W instead of __A followed by
> __B.
> * config/i386/avx512vpopcntdqintrin.h (_mm512_mask_popcnt_epi32,
> _mm512_mask_popcnt_epi64): Likewise.
> * config/i386/avx512vpopcntdqvlintrin.h (_mm_mask_popcnt_epi32,
> _mm256_mask_popcnt_epi32, _mm_mask_popcnt_epi64,
> _mm256_mask_popcnt_epi64): Likewise.
>
> * gcc.target/i386/pr93696-1.c: New test.
> * gcc.target/i386/pr93696-2.c: New test.
> * gcc.target/i386/avx512bitalg-vpopcntw-1.c (TEST): Fix argument order
> of _mm*_mask_popcnt_*.
> * gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c (TEST): Likewise.
> * gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c (TEST): Likewise.
> * gcc.target/i386/avx512bitalg-vpopcntb-1.c (TEST): Likewise.
> * gcc.target/i386/avx512bitalg-vpopcntb.c (foo): Likewise.
> * gcc.target/i386/avx512bitalg-vpopcntbvl.c (foo): Likewise.
> * gcc.target/i386/avx512vpopcntdq-vpopcntd.c (foo): Likewise.
> * gcc.target/i386/avx512bitalg-vpopcntwvl.c (foo): Likewise.
> * gcc.target/i386/avx512bitalg-vpopcntw.c (foo): Likewise.
> * gcc.target/i386/avx512vpopcntdq-vpopcntq.c (foo): Likewise.
>
> --- gcc/config/i386/avx512bitalgintrin.h.jj 2020-02-12 11:43:57.183690204 
> +0100
> +++ gcc/config/i386/avx512bitalgintrin.h2020-02-13 09:01:59.839598980 
> +0100
> @@ -61,10 +61,10 @@ _mm512_popcnt_epi16 (__m512i __A)
>
>  extern __inline __m512i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_popcnt_epi8 (__m512i __A, __mmask64 __U, __m512i __B)
> +_mm512_mask_popcnt_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
>  {
>return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
> -(__v64qi) __B,
> +(__v64qi) __W,
>  (__mmask64) __U);
>  }
>
> @@ -79,10 +79,10 @@ _mm512_maskz_popcnt_epi8 (__mmask64 __U,
>  }
>  extern __inline __m512i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_popcnt_epi16 (__m512i __A, __mmask32 __U, __m512i __B)
> +_mm512_mask_popcnt_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
>  {
>return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
> -   (__v32hi) __B,
> +   (__v32hi) __W,
> (__mmask32) __U);
>  }
>
> @@ -127,10 +127,10 @@ _mm512_mask_bitshuffle_epi64_mask (__mma
>
>  extern __inline __m256i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_popcnt_epi8 (__m256i __A, __mmask32 __U, __m256i __B)
> +_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
>  {
>return 

Re: testsuite: Fix g++.dg/analyzer/pr93212.C with check-c++-all

2020-02-13 Thread David Malcolm
On Thu, 2020-02-13 at 08:11 +0100, Jakub Jelinek wrote:
> On Thu, Feb 06, 2020 at 03:27:29PM -0500, David Malcolm wrote:
> > gcc/testsuite/ChangeLog:
> > PR analyzer/93212
> > * g++.dg/analyzer/analyzer.exp: New subdirectory and .exp
> > suite.
> > * g++.dg/analyzer/malloc.C: New test.
> > * g++.dg/analyzer/pr93212.C: New test.
> 
> The test FAILs with c++11:
> .../gcc/testsuite/g++.dg/analyzer/pr93212.C:4:1: error: 'lol'
> function uses 'auto' type specifier without trailing return type
> .../gcc/testsuite/g++.dg/analyzer/pr93212.C:4:1: note: deduced return
> type only available with '-std=c++14' or '-std=gnu++14'
> 
> Fixed thusly, regtested on x86_64-linux, committed to trunk as
> obvious.

Thanks Jakub, and sorry for the failure.

I did some digging into why this got through my testing.

My standard patch testing involves a bootstrap build with these three
Makefile targets in sequence:

  make all
  make install
  make check

each teed to a logfile, and with a suitable -j

I hadn't noticed the check-c++-all in cp/Make-lang.in.

I've been using "--target_board=unix\{-m32,-m64\}" in my RUNTESTFLAGS
during development of a patch, but I notice now that I didn't have it
in my bootstrap testing.  I've fixed that, and added check-c++-all.

Are there any other Makefile targets I should be testing with?


Thanks
Dave



Re: [PATCH]Several intrinsic macros lack a closing parenthesis[PR93274]

2020-02-13 Thread Uros Bizjak
On Thu, Feb 13, 2020 at 9:53 AM Jakub Jelinek  wrote:
>
> On Thu, Feb 13, 2020 at 09:39:05AM +0100, Uros Bizjak wrote:
> > > Changelog
> > > gcc/
> > >* config/i386/avx512vbmi2intrin.h
> > >(_mm512_[,mask_,maskz_]shrdi_epi16,
> > >_mm512_[,mask_,maskz_]shrdi_epi32,
> > >_m512_[,mask_,maskz_]shrdi_epi64,
> > >_mm512_[,mask_,maskz_]shldi_epi16,
> > >_mm512_[,mask_,maskz_]shldi_epi32,
> > >_m512_[,mask_,maskz_]shldi_epi64): Fix typo of lacking a
> > >closing parenthesis.
> > >* config/i386/avx512vbmi2vlintrin.h
> > >(_mm256_[,mask_,maskz_]shrdi_epi16,
> > >_mm256_[,mask_,maskz_]shrdi_epi32,
> > >_m256_[,mask_,maskz_]shrdi_epi64,
> > >_mm_[,mask_,maskz_]shrdi_epi16,
> > >_mm_[,mask_,maskz_]shrdi_epi32,
> > >_mm_[,mask_,maskz_]shrdi_epi64,
> > >_mm256_[,mask_,maskz_]shldi_epi16,
> > >_mm256_[,mask_,maskz_]shldi_epi32,
> > >_m256_[,mask_,maskz_]shldi_epi64,
> > >_mm_[,mask_,maskz_]shldi_epi16,
> > >_mm_[,mask_,maskz_]shldi_epi32,
> > >_mm_[,mask_,maskz_]shldi_epi64): Ditto.
> > >
> > > gcc/testsuite/
> > >* gcc.target/i386/avx512vbmi2-vpshld-1.c: New test.
> > >* gcc.target/i386/avx512vbmi2-vpshld-O0-1.c: Ditto.
> > >* gcc.target/i386/avx512vbmi2-vpshrd-1.c: Ditto.
> > >* gcc.target/i386/avx512vbmi2-vpshrd-O0-1.c: Ditto.
> > >* gcc.target/i386/avx512vl-vpshld-O0-1.c: Ditto.
> > >* gcc.target/i386/avx512vl-vpshrd-O0-1.c: Ditto.
> >
> > This is obvious patch, so OK for mainline and backports.
>
> The header changes sure, but for the testsuite, the standard way
> would be to have it covered in the standard tests we have for this.
> I think that is gcc.target/i386/sse-{13,14,22a,23}.c, so it would be worth
> trying to figure out why it hasn't caught that.

Indeed. It looks that these macros are not listed in sse-14.c, which
would catch the problem. So, there is no need for new -O0 tests,
please add missing functions to sse-14.c and sse-22.c testcases. I was
also surprised that no testsuite coverage for vbmi2 functions was
added at submission.

Uros.

> And, I don't think we allow any wildcards etc. (and [,whatever,whateverelse]
> isn't even one, neither regexp nor shell wildcard) in the names of functions
> changed, they can appear in the description text, but for the names of
> macros one needs to list them all expanded, people do grep for those.
>
> Jakub
>


Re: [PATCH 0/4 GCC11] IVOPTs consider step cost for different forms when unrolling

2020-02-13 Thread Segher Boessenkool
On Thu, Feb 13, 2020 at 08:48:33AM +0100, Richard Biener wrote:
> On Wed, 12 Feb 2020, Segher Boessenkool wrote:
> > On Wed, Feb 12, 2020 at 11:53:22AM +0100, Richard Biener wrote:
> > > BB reorder switches back and forth as well ... :/
> > 
> > Yes.  It is extremely hard to change any jumps in cfgrtl mode.
> > 
> > The goal is to use cfglayout mode more, ideally *always*, not to use
> > it less!
> 
> Sure!  It must be that split requires CFG RTL (it doesn't say so, but
> we don't have a PROP_rtllayout or a "cannot work with" set of
> properties).  Otherwise I'm not sure why we go out of CFG layout
> mode so early.  Passes not messing with the CFG at all should be
> ignorant of what mode we are in.

Well, split can split jump insns as well, so it currently cannot use
cfglayout mode.

We could probably make split not split jumps, only do that in split2
and later, which is after reload.  Getting IRA and LRA to work with
cfglayout mode will be fun.

Right after split2 is pro_and_epilogue.  Everything after that doesn't
want cfglayout mode I'd say.  So we don't even have to move
out_of_cfglayout all that far, but there are some big nasties it has
to move over.

> > > I think both are closely enough related that we probably should do
> > > partitioning from within the same framework?  OTOH BB reorder
> > > happens _much_ later.
> > 
> > This may be doable.  What we need to do first though is to find a better
> > thing to use than EDGE_CROSSING.
> > 
> > Maybe we can determine which blocks should be hot and cold early, but
> > actually make that happen only very late (maybe adjusting the decision
> > if we have to to make things work)?  That way, intervening passes do not
> > have to care (much).
> 
> The question is whether we can even preserve things like BB counts
> and edge frequencies once we've gone out of CFG layout mode for once.

Why not?  cfglayout mode simply does not include simple branches and
jump instructions, and fallthrough is not necessarily to the next bb in
the insn stream, but that is all (basically).  The cfg is just the same
in both rtl modes.  What destroys the bb and edge frequencies?

> > > > I don't agree.  The whole way EDGE_CROSSING works hinders all other
> > > > optimisations a lot.
> > > 
> > > I'm not sure if it's there for correctness reasons or just to
> > > help checking that nothing "undoes" the partitioning decision.
> > 
> > The latter.  (It may also make it easier for the former, of course, but
> > that can be solved anyway).  And that makes live very hard for all later
> > passes, while it is doubtful that it even give the best decisions: for
> > example it prevents a lot of shrink-wrapping, which you dearly *want* to
> > do on cold code!
> 
> Sure.  So do that earlier ;)

You cannot.  It has to stay after RA, obviously.  And you cannot move RA
earlier, to before combine.

We do shrink-wrapping (and all other *logue work) pretty much as early
as possible, already.


Segher


Re: [PATCH]Several intrinsic macros lack a closing parenthesis[PR93274]

2020-02-13 Thread Jakub Jelinek
On Thu, Feb 13, 2020 at 09:39:05AM +0100, Uros Bizjak wrote:
> > Changelog
> > gcc/
> >* config/i386/avx512vbmi2intrin.h
> >(_mm512_[,mask_,maskz_]shrdi_epi16,
> >_mm512_[,mask_,maskz_]shrdi_epi32,
> >_m512_[,mask_,maskz_]shrdi_epi64,
> >_mm512_[,mask_,maskz_]shldi_epi16,
> >_mm512_[,mask_,maskz_]shldi_epi32,
> >_m512_[,mask_,maskz_]shldi_epi64): Fix typo of lacking a
> >closing parenthesis.
> >* config/i386/avx512vbmi2vlintrin.h
> >(_mm256_[,mask_,maskz_]shrdi_epi16,
> >_mm256_[,mask_,maskz_]shrdi_epi32,
> >_m256_[,mask_,maskz_]shrdi_epi64,
> >_mm_[,mask_,maskz_]shrdi_epi16,
> >_mm_[,mask_,maskz_]shrdi_epi32,
> >_mm_[,mask_,maskz_]shrdi_epi64,
> >_mm256_[,mask_,maskz_]shldi_epi16,
> >_mm256_[,mask_,maskz_]shldi_epi32,
> >_m256_[,mask_,maskz_]shldi_epi64,
> >_mm_[,mask_,maskz_]shldi_epi16,
> >_mm_[,mask_,maskz_]shldi_epi32,
> >_mm_[,mask_,maskz_]shldi_epi64): Ditto.
> >
> > gcc/testsuite/
> >* gcc.target/i386/avx512vbmi2-vpshld-1.c: New test.
> >* gcc.target/i386/avx512vbmi2-vpshld-O0-1.c: Ditto.
> >* gcc.target/i386/avx512vbmi2-vpshrd-1.c: Ditto.
> >* gcc.target/i386/avx512vbmi2-vpshrd-O0-1.c: Ditto.
> >* gcc.target/i386/avx512vl-vpshld-O0-1.c: Ditto.
> >* gcc.target/i386/avx512vl-vpshrd-O0-1.c: Ditto.
> 
> This is obvious patch, so OK for mainline and backports.

The header changes sure, but for the testsuite, the standard way
would be to have it covered in the standard tests we have for this.
I think that is gcc.target/i386/sse-{13,14,22a,23}.c, so it would be worth
trying to figure out why it hasn't caught that.

And, I don't think we allow any wildcards etc. (and [,whatever,whateverelse]
isn't even one, neither regexp nor shell wildcard) in the names of functions
changed, they can appear in the description text, but for the names of
macros one needs to list them all expanded, people do grep for those.

Jakub



Re: [PATCH] sccvn: Handle bitfields in vn_reference_lookup_3 [PR93582]

2020-02-13 Thread Richard Biener
On Thu, 13 Feb 2020, Jakub Jelinek wrote:

> Hi!
> 
> The following patch is first step towards fixing PR93582.
> vn_reference_lookup_3 right now punts on anything that isn't byte aligned,
> so to be able to lookup a constant bitfield store, one needs to use
> the exact same COMPONENT_REF, otherwise it isn't found.
> 
> This patch lifts up that that restriction if the bits to be loaded are
> covered by a single store of a constant (keeps the restriction so far
> for the multiple store case, can tweak that incrementally, but I think
> for bisection etc. it is worth to do it one step at a time).
> 
> Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux
> and powerpc64-linux (the last one regtested with \{-m32,-m64\}).
> Additionally, I've bootstrapped/regtested it on those with statistics
> gathering on when val was non-NULL (i.e. we managed to see something
> through) when any of offseti, offset2i, maxsizei or size2i wasn't multiple
> of BITS_PER_UNIT (i.e. when this optimization triggered).  Below is
> a list of the unique cases where it triggered, the first column says on
> which of the 5 ABIs it triggered, qmath stands for those that enable
> libquadmath, i.e. ia32,x86_64,ppc64le.
> 
> Ok for trunk?

OK.

Thanks,
Richard.

> all ../../gcc/gimple-ssa-backprop.c {anonymous}::backprop::process_var
> all gcc/gcc/testsuite/gcc.c-torture/compile/20191015-1.c f
> all gcc/gcc/testsuite/gcc.c-torture/compile/20191015-2.c f
> all gcc/gcc/testsuite/gcc.c-torture/compile/20200105-1.c g
> all gcc/gcc/testsuite/gcc.c-torture/compile/20200105-2.c g
> all gcc/gcc/testsuite/gcc.c-torture/compile/20200105-3.c g
> all gcc/gcc/testsuite/gcc.c-torture/execute/20181120-1.c main
> all gcc/gcc/testsuite/gcc.c-torture/execute/921204-1.c main
> all gcc/gcc/testsuite/gcc.c-torture/execute/pr58726.c main
> all gcc/gcc/testsuite/gcc.c-torture/execute/pr86492.c foo
> all gcc/gcc/testsuite/gcc.dg/store_merging_24.c foo
> all gcc/gcc/testsuite/gcc.dg/store_merging_25.c foo
> all gcc/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c foo
> all gcc/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c foo
> all gcc/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c foo
> all gcc/gcc/testsuite/g++.dg/other/pr89692.C foo
> all ../../../libgcc/unwind-dw2-fde-dip.c init_object
> all ../../../libgcc/unwind-dw2-fde-dip.c __register_frame_info
> all ../../../libgcc/unwind-dw2-fde-dip.c __register_frame_info_bases
> all ../../../libgcc/unwind-dw2-fde-dip.c __register_frame_info_table
> all ../../../libgcc/unwind-dw2-fde-dip.c __register_frame_info_table_bases
> all ../../../libgcc/unwind-dw2-fde-dip.c __register_frame_table
> all ../../../libgcc/unwind-dw2-fde-dip.c search_object
> all ../../../libgcc/unwind-dw2-fde-dip.c _Unwind_IteratePhdrCallback
> ia32 /tmp/921204-1.exe.NdZMiZ.ltrans0.o main
> ia32 /tmp/ccK7HiiN.o main
> ia32 /tmp/ccWtcuID.o foo
> ia32 /tmp/pr86492.exe.IKs2SA.ltrans0.o foo
> lp64 gcc/gcc/testsuite/gnat.dg/pack22.adb pack22
> ppc32 /tmp/921204-1.exe.1GoDpE.ltrans0.o main
> ppc32 /tmp/ccivx4sg.o foo
> ppc32 /tmp/ccLFNqjQ.o main
> ppc32 /tmp/pr86492.exe.VkJDwY.ltrans0.o foo
> ppc32 /tmp/pr88739.exe.y33n0X.ltrans0.o main
> ppc64le cd2a32a.adb cd2a32a
> ppc64le /tmp/921204-1.exe.la923O.ltrans0.o main
> ppc64le /tmp/ccH3NcwE.o main
> ppc64le /tmp/ccmHysWx.o foo
> ppc64le /tmp/pr86492.exe.EYd24l.ltrans0.o foo
> ppc64le /tmp/pr88739.exe.uAT6pA.ltrans0.o main
> ppc64 /tmp/921204-1.exe.vtoTSo.ltrans0.o main
> ppc64 /tmp/ccm4RGtK.o main
> ppc64 /tmp/cczZpRCD.o foo
> ppc64 /tmp/pr86492.exe.UdbN8I.ltrans0.o foo
> ppc64 /tmp/pr88739.exe.DtQe1H.ltrans0.o main
> qmath ../../../libquadmath/math/expq.c expq
> qmath ../../../libquadmath/math/fmaq.c fmaq
> qmath ../../../libquadmath/math/nanq.c nanq
> qmath ../../../libquadmath/strtod/strtoflt128.c strtoflt128_internal
> x86_64 cd2a32a.adb cd2a32a
> x86_64 /tmp/921204-1.exe.0059fB.ltrans0.o main
> x86_64 /tmp/cci9lHhD.o main
> x86_64 /tmp/ccTlWSbV.o foo
> x86_64 /tmp/pr86492.exe.NE0yez.ltrans0.o foo
> x86_64 /tmp/pr88739.exe.PKCG2M.ltrans0.o main
> x86 gcc/gcc/testsuite/gcc.dg/torture/pr45017.c main
> x86 gcc/gcc/testsuite/gcc.target/i386/pr37870.c main
> 
> 2020-02-13  Jakub Jelinek  
> 
>   PR tree-optimization/93582
>   * fold-const.h (shift_bytes_in_array_left,
>   shift_bytes_in_array_right): Declare.
>   * fold-const.c (shift_bytes_in_array_left,
>   shift_bytes_in_array_right): New function, moved from
>   gimple-ssa-store-merging.c, no longer static.
>   * gimple-ssa-store-merging.c (shift_bytes_in_array): Move
>   to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left.
>   (shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c.
>   (encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of
>   shift_bytes_in_array.
>   (verify_shift_bytes_in_array): Rename to ...
>   (verify_shift_bytes_in_array_left): ... this.  Use
>   shift_bytes_in_array_left instead of shift_bytes_in_array.
>   

[PATCH] i386: Fix up _mm*_mask_popcnt_epi* [PR93696]

2020-02-13 Thread Jakub Jelinek
Hi!

As mentioned in the PR and as
https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mask_popcnt_epi
also documents, _mm*_popcnt_epi* intrinsics are consistent with all other
unary AVX512* intrinsics regarding arguments, i.e. the
_mm*_whatever has just single argument (called a in the docs, and __A in the
GCC headers),
_mm*_mask_whatever has 3 arguments (called src, k, a in the docs and
_W, __U, __A in GCC headers) and
_mm*_maskz_whatever 2 arguments (called k, a in the docs and __U, __A in GCC
headers).  Unfortunately, whomever implemented the _mm*_popcnt_epi*
intrinsics got it wrong for the _mm*_mask_popcnt_epi* ones, calling the
args __A, __U, __B and not passing them in the canonical order to the
builtins, making it API incompatible with ICC as well as clang (tested on
godbolts clang 7/8/9/trunk and ICC 19.0.{0,1}, older clang/ICC don't
understand those, so it isn't that it used to be broken even in other
compilers and got changed afterwards).

The following patch fixes it, bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk?  Not really sure about release branches, perhaps
with big fat warning in gcc-{8,9}/changes.html ?

2020-02-13  Jakub Jelinek  

PR target/93696
* config/i386/avx512bitalgintrin.h (_mm512_mask_popcnt_epi8,
_mm512_mask_popcnt_epi16, _mm256_mask_popcnt_epi8,
_mm256_mask_popcnt_epi16, _mm_mask_popcnt_epi8,
_mm_mask_popcnt_epi16): Rename __B argument to __A and __A to __W,
pass __A to the builtin followed by __W instead of __A followed by
__B.
* config/i386/avx512vpopcntdqintrin.h (_mm512_mask_popcnt_epi32,
_mm512_mask_popcnt_epi64): Likewise.
* config/i386/avx512vpopcntdqvlintrin.h (_mm_mask_popcnt_epi32,
_mm256_mask_popcnt_epi32, _mm_mask_popcnt_epi64,
_mm256_mask_popcnt_epi64): Likewise.

* gcc.target/i386/pr93696-1.c: New test.
* gcc.target/i386/pr93696-2.c: New test.
* gcc.target/i386/avx512bitalg-vpopcntw-1.c (TEST): Fix argument order
of _mm*_mask_popcnt_*.
* gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c (TEST): Likewise.
* gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c (TEST): Likewise.
* gcc.target/i386/avx512bitalg-vpopcntb-1.c (TEST): Likewise.
* gcc.target/i386/avx512bitalg-vpopcntb.c (foo): Likewise.
* gcc.target/i386/avx512bitalg-vpopcntbvl.c (foo): Likewise.
* gcc.target/i386/avx512vpopcntdq-vpopcntd.c (foo): Likewise.
* gcc.target/i386/avx512bitalg-vpopcntwvl.c (foo): Likewise.
* gcc.target/i386/avx512bitalg-vpopcntw.c (foo): Likewise.
* gcc.target/i386/avx512vpopcntdq-vpopcntq.c (foo): Likewise.

--- gcc/config/i386/avx512bitalgintrin.h.jj 2020-02-12 11:43:57.183690204 
+0100
+++ gcc/config/i386/avx512bitalgintrin.h2020-02-13 09:01:59.839598980 
+0100
@@ -61,10 +61,10 @@ _mm512_popcnt_epi16 (__m512i __A)
 
 extern __inline __m512i
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_popcnt_epi8 (__m512i __A, __mmask64 __U, __m512i __B)
+_mm512_mask_popcnt_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
-(__v64qi) __B,
+(__v64qi) __W,
 (__mmask64) __U);
 }
 
@@ -79,10 +79,10 @@ _mm512_maskz_popcnt_epi8 (__mmask64 __U,
 }
 extern __inline __m512i
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_popcnt_epi16 (__m512i __A, __mmask32 __U, __m512i __B)
+_mm512_mask_popcnt_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
-   (__v32hi) __B,
+   (__v32hi) __W,
(__mmask32) __U);
 }
 
@@ -127,10 +127,10 @@ _mm512_mask_bitshuffle_epi64_mask (__mma
 
 extern __inline __m256i
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_popcnt_epi8 (__m256i __A, __mmask32 __U, __m256i __B)
+_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
-(__v32qi) __B,
+(__v32qi) __W,
 (__mmask32) __U);
 }
 
@@ -222,10 +222,10 @@ _mm_popcnt_epi16 (__m128i __A)
 
 extern __inline __m256i
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_popcnt_epi16 (__m256i __A, __mmask16 __U, __m256i __B)
+_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) 

Re: [PATCH]Several intrinsic macros lack a closing parenthesis[PR93274]

2020-02-13 Thread Uros Bizjak
> Changelog
> gcc/
>* config/i386/avx512vbmi2intrin.h
>(_mm512_[,mask_,maskz_]shrdi_epi16,
>_mm512_[,mask_,maskz_]shrdi_epi32,
>_m512_[,mask_,maskz_]shrdi_epi64,
>_mm512_[,mask_,maskz_]shldi_epi16,
>_mm512_[,mask_,maskz_]shldi_epi32,
>_m512_[,mask_,maskz_]shldi_epi64): Fix typo of lacking a
>closing parenthesis.
>* config/i386/avx512vbmi2vlintrin.h
>(_mm256_[,mask_,maskz_]shrdi_epi16,
>_mm256_[,mask_,maskz_]shrdi_epi32,
>_m256_[,mask_,maskz_]shrdi_epi64,
>_mm_[,mask_,maskz_]shrdi_epi16,
>_mm_[,mask_,maskz_]shrdi_epi32,
>_mm_[,mask_,maskz_]shrdi_epi64,
>_mm256_[,mask_,maskz_]shldi_epi16,
>_mm256_[,mask_,maskz_]shldi_epi32,
>_m256_[,mask_,maskz_]shldi_epi64,
>_mm_[,mask_,maskz_]shldi_epi16,
>_mm_[,mask_,maskz_]shldi_epi32,
>_mm_[,mask_,maskz_]shldi_epi64): Ditto.
>
> gcc/testsuite/
>* gcc.target/i386/avx512vbmi2-vpshld-1.c: New test.
>* gcc.target/i386/avx512vbmi2-vpshld-O0-1.c: Ditto.
>* gcc.target/i386/avx512vbmi2-vpshrd-1.c: Ditto.
>* gcc.target/i386/avx512vbmi2-vpshrd-O0-1.c: Ditto.
>* gcc.target/i386/avx512vl-vpshld-O0-1.c: Ditto.
>* gcc.target/i386/avx512vl-vpshrd-O0-1.c: Ditto.

This is obvious patch, so OK for mainline and backports.

Thanks,
Uros.


Re: [PATCH] openmp: ignore nowait if async execution is unsupported [PR93481]

2020-02-13 Thread Jakub Jelinek
On Thu, Feb 13, 2020 at 09:04:36AM +0100, Harwath, Frederik wrote:
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -2022,6 +2022,16 @@ GOMP_target (int device, void (*fn) (void *), const 
> void *unused,
>gomp_unmap_vars (tgt_vars, true);
>  }
>  
> +static unsigned int

Add inline?

> +clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int 
> flags)
> +{
> +  /* If we cannot run asynchronously, simply ignore nowait.  */
> +  if (devicep != NULL && devicep->async_run_func == NULL)
> +flags &= ~GOMP_TARGET_FLAG_NOWAIT;
> +
> +  return flags;
> +}
> +
>  /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
> and several arguments have been added:
> FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
> @@ -2054,6 +2064,8 @@ GOMP_target_ext (int device, void (*fn) (void *), 
> size_t mapnum,
>size_t tgt_align = 0, tgt_size = 0;
>bool fpc_done = false;
>  
> +  flags = clear_unsupported_flags (devicep, flags);
> +
>if (flags & GOMP_TARGET_FLAG_NOWAIT)
>  {
>struct gomp_thread *thr = gomp_thread ();

LGTM.

> @@ -2257,6 +2269,8 @@ GOMP_target_update_ext (int device, size_t mapnum, void 
> **hostaddrs,
>  {
>struct gomp_device_descr *devicep = resolve_device (device);
>  
> +  flags = clear_unsupported_flags (devicep, flags);
> +
>/* If there are depend clauses, but nowait is not present,
>   block the parent task until the dependencies are resolved
>   and then just continue with the rest of the function as if it
> @@ -2398,6 +2412,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, 
> void **hostaddrs,
>  {
>struct gomp_device_descr *devicep = resolve_device (device);
>  
> +  flags = clear_unsupported_flags (devicep, flags);
> +
>/* If there are depend clauses, but nowait is not present,
>   block the parent task until the dependencies are resolved
>   and then just continue with the rest of the function as if it

I don't see why you need to do the above two.  GOMP_TARGET_TASK_DATA
is done on the host side, async_run callback isn't called in that case
and while we create a task, all we do is wait for the (host) dependencies
in there and then perform the data transfer we need.
I think it is perfectly fine to ignore nowait on target but honor it
on target update or target {enter,exit} data.

> @@ -2524,6 +2540,7 @@ gomp_target_task_fn (void *data)
>   }
>ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
>  
> +  assert (devicep->async_run_func);
>devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
>  ttask->args, (void *) ttask);
>return true;
> @@ -3040,7 +3057,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr 
> *device,
>if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
>  {
>DLSYM (run);
> -  DLSYM (async_run);
> +  DLSYM_OPT (async_run, async_run);
>DLSYM_OPT (can_run, can_run);
>DLSYM (dev2dev);
>  }
> diff --git a/libgomp/testsuite/libgomp.c/target-33.c 
> b/libgomp/testsuite/libgomp.c/target-33.c
> index 15d2d7e38ab..1bed4b6bc67 100644
> --- a/libgomp/testsuite/libgomp.c/target-33.c
> +++ b/libgomp/testsuite/libgomp.c/target-33.c
> @@ -1,6 +1,3 @@
> -/* { dg-xfail-run-if "GOMP_OFFLOAD_async_run not implemented" { 
> offload_target_nvptx } }
> -   Cf. https://gcc.gnu.org/PR81688.  */
> -
>  extern void abort (void);
>  
>  int
> diff --git a/libgomp/testsuite/libgomp.c/target-34.c 
> b/libgomp/testsuite/libgomp.c/target-34.c
> index 5a3596424d8..66d9f54202b 100644
> --- a/libgomp/testsuite/libgomp.c/target-34.c
> +++ b/libgomp/testsuite/libgomp.c/target-34.c
> @@ -1,6 +1,3 @@
> -/* { dg-xfail-run-if "GOMP_OFFLOAD_async_run not implemented" { 
> offload_target_nvptx } }
> -   Cf. https://gcc.gnu.org/PR81688.  */
> -
>  extern void abort (void);
>  
>  int
> -- 
> 2.17.1
> 
> 

Otherwise LGTM.

Jakub



Re: [PATCH] i386: Skip ENDBR32 at nested function entry

2020-02-13 Thread Uros Bizjak
On Wed, Feb 12, 2020 at 1:21 PM H.J. Lu  wrote:
>
> On Mon, Feb 10, 2020 at 12:01 PM Uros Bizjak  wrote:
> >
> > On Mon, Feb 10, 2020 at 8:53 PM H.J. Lu  wrote:
> > >
> > > On Mon, Feb 10, 2020 at 11:40 AM Uros Bizjak  wrote:
> > > >
> > > > On Mon, Feb 10, 2020 at 8:22 PM H.J. Lu  wrote:
> > > > >
> > > > > Since nested function isn't only called directly, there is ENDBR32 at
> > > > > function entry and we need to skip it for direct jump in trampoline.
> > > >
> > > > Hm, I'm afraid I don't understand this comment. Can you perhaps 
> > > > rephrase it?
> > > >
> > >
> > > ix86_trampoline_init has
> > >
> > >  /* Compute offset from the end of the jmp to the target function.
> > >  In the case in which the trampoline stores the static chain on
> > >  the stack, we need to skip the first insn which pushes the
> > >  (call-saved) register static chain; this push is 1 byte.  */
> > >   offset += 5;
> > >   disp = expand_binop (SImode, sub_optab, fnaddr,
> > >plus_constant (Pmode, XEXP (m_tramp, 0),
> > >   offset - (MEM_P (chain) ? 1 : 
> > > 0)),
> > >NULL_RTX, 1, OPTAB_DIRECT);
> > >   emit_move_insn (mem, disp);
> > >
> > > Without CET, we got
> > >
> > > 011 :
> > >   11: 56push   %esi
> > >   12: 55push   %ebp   << trampoline jumps here.
> > >   13: 89 e5mov%esp,%ebp
> > >   15: 83 ec 08  sub$0x8,%esp
> > >
> > > With CET, if bar isn't only called directly, we got
> > >
> > > 0015 :
> > >   15: f3 0f 1e fb  endbr32
> > >   19: 56push   %esi
> > >   1a: 55push   %ebp    trampoline jumps here.
> > >   1b: 89 e5mov%esp,%ebp
> > >   1d: 83 ec 08  sub$0x8,%esp
> > >
> > > We need to add 4 bytes for trampoline to skip endbr32.
> > >
> > > Here is the updated patch to check if nested function isn't only
> > > called directly,
> >
> > Please figure out the final patch. I don't want to waste my time
> > reviewing different version every half hour. Ping me in a couple of
> > days.
>
> This is the final version:
>
> https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00586.html
>
> You can try the testcase in the patch on any machine with CET binutils
> since ENDBR32 is nop on none-CET machines.  Without this patch,
> the test will fail.

Please rephrase the comment. I don't understand what it tries to say.

Uros.


[PATCH] testsuite/93717 fix up gcc.dg/optimize-bswapsi-2.c for BE

2020-02-13 Thread Richard Biener


Pushed.

2020-02-13  Richard Biener  

PR testsuite/93717
* gcc.dg/optimize-bswapsi-2.c: Add BE case.
---
 gcc/testsuite/gcc.dg/optimize-bswapsi-2.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c 
b/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
index f111553e1cf..330dfe99445 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
@@ -45,15 +45,26 @@ uint32_t read_be32_3 (unsigned char *data)
 }
 
 static inline unsigned short
-get_unaligned_16 (unsigned char *p)
+get_unaligned_16_le (unsigned char *p)
 {
   return p[0] | (p[1] << 8);
 }
 unsigned int
-get_unaligned_32 (unsigned char *p)
+get_unaligned_32_le (unsigned char *p)
 {
-  return get_unaligned_16 (p) | (get_unaligned_16 (p + 2) << 16);
+  return get_unaligned_16_le (p) | (get_unaligned_16_le (p + 2) << 16);
+}
+
+static inline unsigned short
+get_unaligned_16_be (unsigned char *p)
+{
+  return p[1] | (p[0] << 8);
+}
+unsigned int
+get_unaligned_32_be (unsigned char *p)
+{
+  return get_unaligned_16_be (p + 2) | (get_unaligned_16_be (p) << 16);
 }
 
 /* { dg-final { scan-tree-dump-times "32 bit load in target endianness found 
at" 4 "bswap" } } */
-/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 3 
"bswap" } } */
+/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 4 
"bswap" } } */
-- 
2.12.3


[PATCH] openmp: ignore nowait if async execution is unsupported [PR93481]

2020-02-13 Thread Harwath, Frederik
Hi Jakub,

On 10.02.20 08:49, Harwath, Frederik wrote:

>> There has been even in some PR a suggestion that instead of failing
>> in nvptx async_run we should just ignore the nowait clause if the plugin
>> doesn't implement it properly.
> 
> This must be https://gcc.gnu.org/PR93481.

The attached patch implements the behavior that has been suggested in the PR.
It makes GOMP_OFFLOAD_async_run optional, removes the stub which produces
the error described in the PR from the nvptx plugin, and changes the 
nowait-handling
to ignore the clause if GOMP_OFFLOAD_async_run is not available for the 
executing
device's plugin. I have tested the patch by running the full libgomp testsuite 
with
nvptx-none offloading on x86_64-linux-gnu. I have observed no regressions.

Ok to push the commit to master?

For the record: Someone should implement GOMP_OFFLOAD_async_run properly
in the nvtpx plugin.

Best regards,
Frederik

From 1258f713be317870e9171281e3f7c3a174773aa1 Mon Sep 17 00:00:00 2001
From: Frederik Harwath 
Date: Thu, 13 Feb 2020 07:30:16 +0100
Subject: [PATCH] openmp: ignore nowait if async execution is unsupported
 [PR93481]

An OpenMP "nowait" clause on a target construct currently leads to
a call to GOMP_OFFLOAD_async_run in the plugin that is used for
offloading at execution time. The nvptx plugin contains only a stub
of this function that always produces a fatal error if called.

This commit changes the "nowait" implementation to ignore the clause
if the executing device's plugin does not implement GOMP_OFFLOAD_async_run.
The stub in the nvptx plugin is removed which effectively means that
programs containing "nowait" can now be executed with nvptx offloading
as if the clause had not been used.
This behavior is consistent with the OpenMP specification which says that
"[...] execution of the target task *may* be deferred" (emphasis added),
cf. OpenMP 5.0, page 172.

libgomp/

	* plugin/plugin-nvptx.c: Remove GOMP_OFFLOAD_async_run stub.
	* target.c (gomp_load_plugin_for_device): Make "async_run" loading
	optional.
	(gomp_target_task_fn): Assert "devicep->async_run_func".
	(clear_unsupported_flags): New function to remove unsupported flags
	(right now only GOMP_TARGET_FLAG_NOWAIT) that can be be ignored.
	(GOMP_target_ext): Apply clear_unsupported_flags to flags.
	(GOMP_target_update_ext): Likewise.
	(GOMP_target_enter_exit_data): Likewise.
	* testsuite/libgomp.c/target-33.c:
	Remove xfail for offload_target_nvptx.
	* testsuite/libgomp.c/target-34.c: Likewise.
---
 libgomp/plugin/plugin-nvptx.c   |  7 +--
 libgomp/target.c| 19 ++-
 libgomp/testsuite/libgomp.c/target-33.c |  3 ---
 libgomp/testsuite/libgomp.c/target-34.c |  3 ---
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 6033c71a9db..ec103a2f40b 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1931,9 +1931,4 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
   nvptx_stacks_free (stacks, teams * threads);
 }
 
-void
-GOMP_OFFLOAD_async_run (int ord, void *tgt_fn, void *tgt_vars, void **args,
-			void *async_data)
-{
-  GOMP_PLUGIN_fatal ("GOMP_OFFLOAD_async_run unimplemented");
-}
+/* TODO: Implement GOMP_OFFLOAD_async_run. */
diff --git a/libgomp/target.c b/libgomp/target.c
index 3df007283f4..4fbf963f305 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -2022,6 +2022,16 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
   gomp_unmap_vars (tgt_vars, true);
 }
 
+static unsigned int
+clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
+{
+  /* If we cannot run asynchronously, simply ignore nowait.  */
+  if (devicep != NULL && devicep->async_run_func == NULL)
+flags &= ~GOMP_TARGET_FLAG_NOWAIT;
+
+  return flags;
+}
+
 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
and several arguments have been added:
FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
@@ -2054,6 +2064,8 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
   size_t tgt_align = 0, tgt_size = 0;
   bool fpc_done = false;
 
+  flags = clear_unsupported_flags (devicep, flags);
+
   if (flags & GOMP_TARGET_FLAG_NOWAIT)
 {
   struct gomp_thread *thr = gomp_thread ();
@@ -2257,6 +2269,8 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
+  flags = clear_unsupported_flags (devicep, flags);
+
   /* If there are depend clauses, but nowait is not present,
  block the parent task until the dependencies are resolved
  and then just continue with the rest of the function as if it
@@ -2398,6 +2412,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
+  flags = clear_unsupported_flags (devicep,