Re: [PATCH] Handle fancy_abort before diagnostic initialization [PR98586]

2021-01-11 Thread Richard Biener via Gcc-patches
On Mon, Jan 11, 2021 at 10:57 PM David Malcolm via Gcc-patches
 wrote:
>
> If fancy_abort is called before the diagnostic subsystem is initialized,
> internal_error will crash internally in a way that prevents a useful
> message reaching the user.
>
> This can happen with libgccjit in the case of gcc_assert failures
> that occur outside of the libgccjit mutex that guards the rest of
> gcc's state, including global_dc (when global_dc may not be
> initialized yet, or might be in use by another thread).
>
> I tried a few approaches to fixing this as noted in PR jit/98586
> e.g. using a temporary diagnostic_context and initializing it for
> the call to internal_error, however the more code that runs, the
> more chance there is for other errors to occur.
>
> The best fix appears to be to simply fall back to a minimal abort
> implementation that only relies on i18n, as implemented by this
> patch.
>
> Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
>
> Is there a better way to fix this?  If not I plan to push this
> to master in a few days.

The only other idea I can come up with is to somehow statically
initialize global_dc to a minimal implementation to catch those.

Otherwise your approach is entirely reasonable.

Richard.

> gcc/ChangeLog:
> PR jit/98586
> * diagnostic.c (diagnostic_kind_text): Break out this array
> from...
> (diagnostic_build_prefix): ...here.
> (fancy_abort): Detect when diagnostic_initialize has not yet been
> called and fall back to a minimal implementation of printing the
> ICE, rather than segfaulting in internal_error.
> ---
>  gcc/diagnostic.c | 45 +++--
>  1 file changed, 39 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
> index 4250bf96c8b..3be7748eb39 100644
> --- a/gcc/diagnostic.c
> +++ b/gcc/diagnostic.c
> @@ -431,6 +431,13 @@ diagnostic_get_location_text (diagnostic_context 
> *context,
>line_col, locus_ce);
>  }
>
> +static const char *const diagnostic_kind_text[] = {
> +#define DEFINE_DIAGNOSTIC_KIND(K, T, C) (T),
> +#include "diagnostic.def"
> +#undef DEFINE_DIAGNOSTIC_KIND
> +  "must-not-happen"
> +};
> +
>  /* Return a malloc'd string describing a location and the severity of the
> diagnostic, e.g. "foo.c:42:10: error: ".  The caller is responsible for
> freeing the memory.  */
> @@ -438,12 +445,6 @@ char *
>  diagnostic_build_prefix (diagnostic_context *context,
>  const diagnostic_info *diagnostic)
>  {
> -  static const char *const diagnostic_kind_text[] = {
> -#define DEFINE_DIAGNOSTIC_KIND(K, T, C) (T),
> -#include "diagnostic.def"
> -#undef DEFINE_DIAGNOSTIC_KIND
> -"must-not-happen"
> -  };
>gcc_assert (diagnostic->kind < DK_LAST_DIAGNOSTIC_KIND);
>
>const char *text = _(diagnostic_kind_text[diagnostic->kind]);
> @@ -1832,6 +1833,38 @@ error_recursion (diagnostic_context *context)
>  void
>  fancy_abort (const char *file, int line, const char *function)
>  {
> +  /* If fancy_abort is called before the diagnostic subsystem is initialized,
> + internal_error will crash internally in a way that prevents a
> + useful message reaching the user.
> + This can happen with libgccjit in the case of gcc_assert failures
> + that occur outside of the libgccjit mutex that guards the rest of
> + gcc's state, including global_dc (when global_dc may not be
> + initialized yet, or might be in use by another thread).
> + Handle such cases as gracefully as possible by falling back to a
> + minimal abort handler that only relies on i18n.  */
> +  if (global_dc->printer == NULL)
> +{
> +  /* Print the error message.  */
> +  fnotice (stderr, diagnostic_kind_text[DK_ICE]);
> +  fnotice (stderr, "in %s, at %s:%d", function, trim_filename (file), 
> line);
> +  fputc ('\n', stderr);
> +
> +  /* Attempt to print a backtrace.  */
> +  struct backtrace_state *state
> +   = backtrace_create_state (NULL, 0, bt_err_callback, NULL);
> +  int count = 0;
> +  if (state != NULL)
> +   backtrace_full (state, 2, bt_callback, bt_err_callback,
> +   (void *) &count);
> +
> +  /* We can't call warn_if_plugins or emergency_dump_function as these
> +rely on GCC state that might not be initialized, or might be in
> +use by another thread.  */
> +
> +  /* Abort the process.  */
> +  real_abort ();
> +}
> +
>internal_error ("in %s, at %s:%d", function, trim_filename (file), line);
>  }
>
> --
> 2.26.2
>


[COMMITED] MAINTAINERS: Add myself for write after approval

2021-01-11 Thread Qian Jianhua
ChangeLog:

2021-01-12  Qian Jianhua  

* MAINTAINERS (Write After Approval): Add myself
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c0aa23df57e..e88808f9fe2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -451,6 +451,7 @@ Daniel Jacobowitz   
 Andreas Jaeger 
 Harsha Jagasia 
 Fariborz Jahanian  
+Qian Jianhua
 Janis Johnson  
 Teresa Johnson 
 Kean Johnston  
-- 
2.18.1





Re: [PATCH v4 05/29] Import replacement 'clz' functions from CM0 library

2021-01-11 Thread Daniel Engel
On Mon, Jan 11, 2021, at 8:32 AM, Richard Earnshaw wrote:
> A general comment before we start:
> 
> CLZ was added to the Arm ISA in Armv5.  So all subsequent Arm versions
> (and all versions implementing thumb2) will have this instruction.  So
> the only cases where you'll need a fallback are armv6m (and derivatives)
> and pre-armv5 (Arm or thumb1).  So there's no need in your code to try
> to use a synthesized CLZ operation when compiling for thumb2.

If you are referring to the "library formerly known as CM0", none of
that code was written to call clz, either synthesized or instruction.
The instruction just wasn't available to me, and the stack overhead to
call the library was never worth it.  The clz file was in the CM0
library because higher level application code wanted it and we built
with -nostdlib.  There are several optimizations to be made with the clz
instruction before the v6m floating point is suitable for other
architectures, but I don't anticipate ever calling these functions.

If you're referring to __clzsi2() and __clzdi2() at the top of the file
guarded by __ARM_FEATURE_CLZ, that code path is directly descended
from lib1funcs.S.  I just merged into !__ARM_FEATURE_CLZ.  I think the
trivial functions still have to exist within libgcc, even if the
compiler doesn't call them.

> On 11/01/2021 11:10, g...@danielengel.com wrote:
> > From: Daniel Engel 
> > 
> > On architectures with no clz instruction, this version combines __clzdi2()
> > with __clzsi2() into a single object with an efficient tail call.  Also, 
> > this
> > version merges the formerly separate for Thumb and ARM code implementations
> > into a unified instruction sequence.  This change significantly improves the
> > Thumb performance with affecting ARM performance.  Finally, this version 
> > adds
> > a new __OPTIMIZE_SIZE__ build option (using a loop).
> > 
> > On architectures with a clz instruction, functionality is unchanged.
> > 
> > gcc/libgcc/ChangeLog:
> > 2021-01-07 Daniel Engel 
> > 
> > * config/arm/bits/clz2.S: Size-optimized bitwise versions of __clzsi2()
> > and __clzdi2() (i.e. __ARM_FEATURE_CLZ not available).
> > * config/arm/lib1funcs.S: Moved CFI_FUNCTION macros, added 
> > __ARM_FEATURE_IT.
> > * config/arm/t-elf: Move _clzsi2 to new group of weak LIB1ASMFUNCS.
> > ---
> >  libgcc/config/arm/bits/clz2.S | 342 ++
> >  libgcc/config/arm/lib1funcs.S |  25 ++-
> >  libgcc/config/arm/t-elf   |   8 +-
> >  3 files changed, 248 insertions(+), 127 deletions(-)
> > 
> > diff --git a/libgcc/config/arm/bits/clz2.S b/libgcc/config/arm/bits/clz2.S
> > index 1c8f10a5b29..d0a1fbec4d0 100644
> > --- a/libgcc/config/arm/bits/clz2.S
> > +++ b/libgcc/config/arm/bits/clz2.S
> > @@ -1,124 +1,234 @@
> > +/* clz2.S: Cortex M0 optimized 'clz' functions
> > +
> > +   Copyright (C) 2018-2021 Free Software Foundation, Inc> +   Contributed 
> > by Daniel Engel, Senva Inc (g...@danielengel.com)
> > +
> > +   This file is free software; you can redistribute it and/or modify it
> > +   under the terms of the GNU General Public License as published by the
> > +   Free Software Foundation; either version 3, or (at your option) any
> > +   later version.
> > +
> > +   This file is distributed in the hope that it will be useful, but
> > +   WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   General Public License for more details.
> > +
> > +   Under Section 7 of GPL version 3, you are granted additional
> > +   permissions described in the GCC Runtime Library Exception, version
> > +   3.1, as published by the Free Software Foundation.
> > +
> > +   You should have received a copy of the GNU General Public License and
> > +   a copy of the GCC Runtime Library Exception along with this program;
> > +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> > +   .  */
> > +
> > +
> > +#if defined(__ARM_FEATURE_CLZ) && __ARM_FEATURE_CLZ
> 
> Writing the test this way is pointless.  Either test for
> __ARM_FEATURE_CLZ being defined, or test for it being non-zero; but not
> both.  C Macros default to a value of zero if not defined.
> 
> In this case #ifdef is just fine - it won't be defined if the
> instruction doesn't exist.
> 
> Similar simplification should be used everywhere else you've used this
> type of construct.

I have been burned multiple times in the past by "#define SYMBOL 0".
Some people do that.  And then the simple #ifdef still gives "true" even
though the intent was to disable.  Is the extra robustness a problem?  

I use the simple form for the "L_" defines, since the compiler isn't
going to make this kind of mistake.
 
> > +
> > +#ifdef L_clzdi2
> > +
> > +// int __clzdi2(long long)
> > +// Counts leading zero bits in $r1:$r0.
> > +// Returns the result in $r0.
> > +FUNC_START_SECTION clzdi2 .text.sorted.libgcc.clz2.clzdi2
> > +CFI_START_

Re: [PATCH] c++: -Wmissing-field-initializers in unevaluated ctx [PR98620]

2021-01-11 Thread Jason Merrill via Gcc-patches

On 1/11/21 6:19 PM, Marek Polacek wrote:

This PR wants us not to warn about missing field initializers when
the code in question takes places in decltype and similar.


Hmm, the warning seems of questionable utility with templated code like 
this, but I guess this is a reasonable middle ground.  OK.




Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

gcc/cp/ChangeLog:

PR c++/98620
* typeck2.c (process_init_constructor_record): Don't emit
-Wmissing-field-initializers warnings in unevaluated contexts.

gcc/testsuite/ChangeLog:

PR c++/98620
* g++.dg/warn/Wmissing-field-initializers-2.C: New test.
---
  gcc/cp/typeck2.c  |  2 +
  .../warn/Wmissing-field-initializers-2.C  | 44 +++
  2 files changed, 46 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/warn/Wmissing-field-initializers-2.C

diff --git a/gcc/cp/typeck2.c b/gcc/cp/typeck2.c
index e50d5fe94cd..93744fdafde 100644
--- a/gcc/cp/typeck2.c
+++ b/gcc/cp/typeck2.c
@@ -1563,6 +1563,7 @@ process_init_constructor_record (tree type, tree init, 
int nested, int flags,
  
  	  /* Warn when some struct elements are implicitly initialized.  */

  if ((complain & tf_warning)
+ && !cp_unevaluated_operand
  && !EMPTY_CONSTRUCTOR_P (init))
warning (OPT_Wmissing_field_initializers,
 "missing initializer for member %qD", field);
@@ -1593,6 +1594,7 @@ process_init_constructor_record (tree type, tree init, 
int nested, int flags,
  /* Warn when some struct elements are implicitly initialized
 to zero.  */
  if ((complain & tf_warning)
+ && !cp_unevaluated_operand
  && !EMPTY_CONSTRUCTOR_P (init))
warning (OPT_Wmissing_field_initializers,
 "missing initializer for member %qD", field);
diff --git a/gcc/testsuite/g++.dg/warn/Wmissing-field-initializers-2.C 
b/gcc/testsuite/g++.dg/warn/Wmissing-field-initializers-2.C
new file mode 100644
index 000..31d4d897984
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wmissing-field-initializers-2.C
@@ -0,0 +1,44 @@
+// PR c++/98620
+// { dg-do compile { target c++11 } }
+
+namespace std {
+  template
+  T&& declval() noexcept;
+
+  template
+  struct bool_constant {
+static constexpr bool value = B;
+using type = bool_constant;
+  };
+  using true_type = bool_constant;
+  using false_type = bool_constant;
+};
+
+template 
+struct TmpArray
+{
+   T arr[1];
+};
+
+template 
+struct is_non_narrowing_conversion : std::false_type
+{};
+
+template 
+struct is_non_narrowing_conversion<
+Src, Dst,
+decltype(void(TmpArray{{ std::declval() }})) // { dg-bogus "missing 
initializer" }
+> : std::true_type
+{};
+
+struct mystruct
+{
+int a;
+void * b;
+};
+
+void test_nok()
+{
+  is_non_narrowing_conversion::type v;
+  (void) v;
+}

base-commit: a958b2fc6dab3d8b01b6ee32178e2fccd97f77f8





Re: [PATCH] c++: Fix ICE with CTAD in concept [PR98611]

2021-01-11 Thread Jason Merrill via Gcc-patches

On 1/11/21 8:08 PM, Patrick Palka wrote:

On Mon, 11 Jan 2021, Jason Merrill wrote:


On 1/9/21 5:23 PM, Patrick Palka wrote:

This patch teaches find_template_parameters to visit the template
represented by a CTAD placeholder, which is normally not visited by
for_each_template_parm.  This template may be a template template
parameter (as in the first testcase), or it may implicitly use the
template parameters of an enclosing class template (as in the second
testcase), and in either case we need to record the template parameters
used therein for later satisfaction.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps the 10 branch?  Also tested on range-v3 and cmcstl2.

gcc/cp/ChangeLog:

PR c++/98611
* pt.c (any_template_parm_r) : Visit
the template of a CTAD placeholder.


Did you consider doing this in cp_walk_subtrees instead of here?


Briefly, but I couldn't convince myself which of the three visitors
(cp_walk_subtrees, for_each_template_parm_r or any_template_parm_r) is
the most appropriate place to do it in, so I defaulted to the most
specific routine of the three.

The following passes bootstrap and regtesting on x86_64-pc-linux-gnu.
Shall we go with this?


I think so; a class template placeholder is written explicitly in 
whatever we're walking over, so looking at it here makes sense to me. 
The patch is OK.



-- >8 --

gcc/cp/ChangeLog:

PR c++/98611
* tree.c (cp_walk_subtrees) : Visit
the template of a CTAD placeholder.

gcc/testsuite/ChangeLog:

PR c++/98611
* g++.dg/cpp2a/concepts-ctad1.C: New test.
* g++.dg/cpp2a/concepts-ctad2.C: New test.
---
  gcc/cp/tree.c   |  5 -
  gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C | 16 
  gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C | 13 +
  3 files changed, 33 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C

diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c
index c536eb581a7..d339036e88e 100644
--- a/gcc/cp/tree.c
+++ b/gcc/cp/tree.c
@@ -5173,12 +5173,15 @@ cp_walk_subtrees (tree *tp, int *walk_subtrees_p, 
walk_tree_fn func,
result = NULL_TREE;
switch (code)
  {
+case TEMPLATE_TYPE_PARM:
+  if (template_placeholder_p (*tp))
+   WALK_SUBTREE (CLASS_PLACEHOLDER_TEMPLATE (*tp));
+  /* Fall through.  */
  case DEFERRED_PARSE:
  case TEMPLATE_TEMPLATE_PARM:
  case BOUND_TEMPLATE_TEMPLATE_PARM:
  case UNBOUND_CLASS_TEMPLATE:
  case TEMPLATE_PARM_INDEX:
-case TEMPLATE_TYPE_PARM:
  case TYPEOF_TYPE:
  case UNDERLYING_TYPE:
/* None of these have subtrees other than those already walked
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C
new file mode 100644
index 000..ec2e4b014d7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C
@@ -0,0 +1,16 @@
+// PR c++/98611
+// { dg-do compile { target c++20 } }
+
+template 
+concept IsSame = __is_same(T, U);
+
+template  class _Class>
+concept IsInstantiationOf = requires(T object) {
+ { _Class{object} } -> IsSame;
+};
+
+template  struct Degrees {};
+static_assert(IsInstantiationOf, Degrees>);
+
+template  struct NotDegrees {};
+static_assert(!IsInstantiationOf, NotDegrees>);
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C
new file mode 100644
index 000..0d7f9790777
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C
@@ -0,0 +1,13 @@
+// PR c++/98611
+// { dg-do compile { target c++20 } }
+
+template 
+struct S {
+  template  struct Tmpl { Tmpl(T); };
+
+  template 
+requires requires (T object) { Tmpl{object}; }
+  static int f(T);
+};
+
+int a = S::f(0);





[committed] analyzer: fix ICE merging dereferencing unknown ptrs [PR98628]

2021-01-11 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-6603-gab88f3607233376c3145c320e92e71943a495bb5

gcc/analyzer/ChangeLog:
PR analyzer/98628
* store.cc (binding_cluster::make_unknown_relative_to): Don't mark
dereferenced unknown pointers as having escaped.

gcc/testsuite/ChangeLog:
PR analyzer/98628
* gcc.dg/analyzer/pr98628.c: New test.
---
 gcc/analyzer/store.cc   |  7 +--
 gcc/testsuite/gcc.dg/analyzer/pr98628.c | 19 +++
 2 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr98628.c

diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc
index 23118d05685..bbd2e7c2d40 100644
--- a/gcc/analyzer/store.cc
+++ b/gcc/analyzer/store.cc
@@ -1323,8 +1323,11 @@ binding_cluster::make_unknown_relative_to (const 
binding_cluster *other,
{
  const region *base_reg
= region_sval->get_pointee ()->get_base_region ();
- binding_cluster *c = out_store->get_or_create_cluster (base_reg);
- c->mark_as_escaped ();
+ if (!base_reg->symbolic_for_unknown_ptr_p ())
+   {
+ binding_cluster *c = out_store->get_or_create_cluster (base_reg);
+ c->mark_as_escaped ();
+   }
}
 }
 }
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr98628.c 
b/gcc/testsuite/gcc.dg/analyzer/pr98628.c
new file mode 100644
index 000..e2fa778472c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr98628.c
@@ -0,0 +1,19 @@
+/* { dg-additional-options "-O1" } */
+
+void foo(void *);
+struct chanset_t help_subst_chan;
+struct chanset_t *help_subst_chan_0_0;
+struct chanset_t {
+  struct chanset_t *next;
+  char dname[];
+};
+void help_subst() {
+  char *writeidx;
+  for (;; help_subst_chan = *help_subst_chan_0_0) {
+foo(help_subst_chan.next->dname);
+if (help_subst_chan_0_0) {
+  writeidx++;
+  *writeidx++ = ' ';
+}
+  }
+}
-- 
2.26.2



Re: [PATCH] c++: Fix ICE with CTAD in concept [PR98611]

2021-01-11 Thread Patrick Palka via Gcc-patches
On Mon, 11 Jan 2021, Jason Merrill wrote:

> On 1/9/21 5:23 PM, Patrick Palka wrote:
> > This patch teaches find_template_parameters to visit the template
> > represented by a CTAD placeholder, which is normally not visited by
> > for_each_template_parm.  This template may be a template template
> > parameter (as in the first testcase), or it may implicitly use the
> > template parameters of an enclosing class template (as in the second
> > testcase), and in either case we need to record the template parameters
> > used therein for later satisfaction.
> > 
> > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
> > trunk and perhaps the 10 branch?  Also tested on range-v3 and cmcstl2.
> > 
> > gcc/cp/ChangeLog:
> > 
> > PR c++/98611
> > * pt.c (any_template_parm_r) : Visit
> > the template of a CTAD placeholder.
> 
> Did you consider doing this in cp_walk_subtrees instead of here?

Briefly, but I couldn't convince myself which of the three visitors
(cp_walk_subtrees, for_each_template_parm_r or any_template_parm_r) is
the most appropriate place to do it in, so I defaulted to the most
specific routine of the three.

The following passes bootstrap and regtesting on x86_64-pc-linux-gnu.
Shall we go with this?

-- >8 --

gcc/cp/ChangeLog:

PR c++/98611
* tree.c (cp_walk_subtrees) : Visit
the template of a CTAD placeholder.

gcc/testsuite/ChangeLog:

PR c++/98611
* g++.dg/cpp2a/concepts-ctad1.C: New test.
* g++.dg/cpp2a/concepts-ctad2.C: New test.
---
 gcc/cp/tree.c   |  5 -
 gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C | 16 
 gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C | 13 +
 3 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C

diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c
index c536eb581a7..d339036e88e 100644
--- a/gcc/cp/tree.c
+++ b/gcc/cp/tree.c
@@ -5173,12 +5173,15 @@ cp_walk_subtrees (tree *tp, int *walk_subtrees_p, 
walk_tree_fn func,
   result = NULL_TREE;
   switch (code)
 {
+case TEMPLATE_TYPE_PARM:
+  if (template_placeholder_p (*tp))
+   WALK_SUBTREE (CLASS_PLACEHOLDER_TEMPLATE (*tp));
+  /* Fall through.  */
 case DEFERRED_PARSE:
 case TEMPLATE_TEMPLATE_PARM:
 case BOUND_TEMPLATE_TEMPLATE_PARM:
 case UNBOUND_CLASS_TEMPLATE:
 case TEMPLATE_PARM_INDEX:
-case TEMPLATE_TYPE_PARM:
 case TYPEOF_TYPE:
 case UNDERLYING_TYPE:
   /* None of these have subtrees other than those already walked
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C
new file mode 100644
index 000..ec2e4b014d7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C
@@ -0,0 +1,16 @@
+// PR c++/98611
+// { dg-do compile { target c++20 } }
+
+template 
+concept IsSame = __is_same(T, U);
+
+template  class _Class>
+concept IsInstantiationOf = requires(T object) {
+ { _Class{object} } -> IsSame;
+};
+
+template  struct Degrees {};
+static_assert(IsInstantiationOf, Degrees>);
+
+template  struct NotDegrees {};
+static_assert(!IsInstantiationOf, NotDegrees>);
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C
new file mode 100644
index 000..0d7f9790777
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C
@@ -0,0 +1,13 @@
+// PR c++/98611
+// { dg-do compile { target c++20 } }
+
+template 
+struct S {
+  template  struct Tmpl { Tmpl(T); };
+
+  template 
+requires requires (T object) { Tmpl{object}; }
+  static int f(T);
+};
+
+int a = S::f(0);
-- 
2.30.0



Re: [PATCH] binuitils: Check if AR is usable for LTO build

2021-01-11 Thread Alan Modra via Gcc-patches
On Mon, Jan 11, 2021 at 02:52:43PM -0800, H.J. Lu wrote:
> On Mon, Jan 11, 2021 at 1:20 PM Alan Modra  wrote:
> >
> > On Mon, Jan 11, 2021 at 11:53:15AM -0800, H.J. Lu via Binutils wrote:
> > > Check if AR is usable for LTO build with --enable-pgo-build=lto:
> > >
> > > checking for -plugin option... ar: no operation specified
> > > Failed: ar --plugin 
> > > /usr/gcc-11.0.0-x32/libexec/gcc/x86_64-pc-linux-gnu/11.0.0/liblto_plugin.so
> > >  rc
> > > no
> > > configure: error: AR with --plugin and rc is required for LTO build
> > >
> > > instead of build failure later.
> > >
> > >   PR binutils/26766
> > >   * configure.ac:
> > >   * configure: Regenerated.
> >
> > See pr27173 too.  The problem isn't a matter of finding an "ar" that
> > supports --plugin, we have versions of GNU ar (2.30 to 2.32?) that
> > accept --plugin but then don't parse the "rc" or other command
> > options.  I don't think this patch will help.
> 
> PR 27173 patches are at
> 
> https://sourceware.org/pipermail/binutils/2021-January/114879.html

After that one is committed, this patch is OK too (with any needed
modifications).

-- 
Alan Modra
Australia Development Lab, IBM


Re: V2 [PATCH 1/2] GCC: Check if AR works with --plugin and rc

2021-01-11 Thread H.J. Lu via Gcc-patches
On Mon, Jan 11, 2021 at 4:22 PM Alan Modra  wrote:
>
> On Mon, Jan 11, 2021 at 04:07:22PM -0800, H.J. Lu wrote:
> > These are not fatal errors.  Here is the updated patch to use
> > AC_MSG_WARN instead.  OK for master?
>
> OK by me.  Please squash the two patches.
>

I will keep 2 separate since I am hoping the first one can go into GCC.

-- 
H.J.


Re: V2 [PATCH 1/2] GCC: Check if AR works with --plugin and rc

2021-01-11 Thread Alan Modra via Gcc-patches
On Mon, Jan 11, 2021 at 04:07:22PM -0800, H.J. Lu wrote:
> These are not fatal errors.  Here is the updated patch to use
> AC_MSG_WARN instead.  OK for master?

OK by me.  Please squash the two patches.

-- 
Alan Modra
Australia Development Lab, IBM


V2 [PATCH 1/2] GCC: Check if AR works with --plugin and rc

2021-01-11 Thread H.J. Lu via Gcc-patches
On Mon, Jan 11, 2021 at 3:27 PM Alan Modra  wrote:
>
> On Mon, Jan 11, 2021 at 08:57:05AM -0800, H.J. Lu via Binutils wrote:
> > diff --git a/config/gcc-plugin.m4 b/config/gcc-plugin.m4
> > index c5b72e9a13d..798a2054edd 100644
> > --- a/config/gcc-plugin.m4
> > +++ b/config/gcc-plugin.m4
> > @@ -145,6 +145,18 @@ for plugin in $plugin_names; do
> >  break
> >fi
> >  done
> > +dnl Check if ${AR} $plugin_option rc works.
> > +AC_CHECK_TOOL(AR, ar)
> > +if test "${AR}" = "" ; then
> > +  AC_MSG_ERROR([Required archive tool 'ar' not found on PATH.])
> > +fi
> > +touch conftest.c
> > +${AR} $plugin_option rc conftest.a conftest.c
> > +if test "$?" != 0; then
> > +  echo "Failed: ${AR} $plugin_option rc"
>
> Use AC_MSG_ERROR rather than echo.
>
> > +  plugin_option=
> > +fi
> > +rm -f conftest.*
> >  if test -n "$plugin_option"; then
> >$1="$plugin_option"
> >AC_MSG_RESULT($plugin_option)
>
> > diff --git a/libtool.m4 b/libtool.m4
> > index 3672e9516e2..150971974c1 100644
> > --- a/libtool.m4
> > +++ b/libtool.m4
> > @@ -1340,7 +1340,14 @@ AC_CHECK_TOOL(AR, ar, false)
> >  test -z "$AR" && AR=ar
> >  if test -n "$plugin_option"; then
> >if $AR --help 2>&1 | grep -q "\--plugin"; then
> > -AR="$AR $plugin_option"
> > +touch conftest.c
> > +$AR $plugin_option rc conftest.a conftest.c
> > +if test "$?" != 0; then
> > +  echo "Failed: $AR $plugin_option rc"
>
> AC_MSG_ERROR again.

These are not fatal errors.  Here is the updated patch to use
AC_MSG_WARN instead.  OK for master?

-- 
H.J.
From 90e4f853ef5b0291d7cc514fffc80794a91b7012 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Mon, 11 Jan 2021 08:22:35 -0800
Subject: [PATCH] GCC: Check if AR works with --plugin and rc

AR from older binutils doesn't work with --plugin and rc:

[hjl@gnu-cfl-2 bin]$ touch foo.c
[hjl@gnu-cfl-2 bin]$ ar --plugin /usr/libexec/gcc/x86_64-redhat-linux/10/liblto_plugin.so rc libfoo.a foo.c
[hjl@gnu-cfl-2 bin]$ ./ar --plugin /usr/libexec/gcc/x86_64-redhat-linux/10/liblto_plugin.so rc libfoo.a foo.c
./ar: no operation specified
[hjl@gnu-cfl-2 bin]$ ./ar --version
GNU ar (Linux/GNU Binutils) 2.29.51.0.1.20180112
Copyright (C) 2018 Free Software Foundation, Inc.
This program is free software; you may redistribute it under the terms of
the GNU General Public License version 3 or (at your option) any later version.
This program has absolutely no warranty.
[hjl@gnu-cfl-2 bin]$

Check if AR works with --plugin and rc before passing --plugin to AR and
RANLIB.

	PR ld/27173
	* configure: Regenerated.
	* libtool.m4 (_LT_CMD_OLD_ARCHIVE): Check if AR works with
	--plugin and rc before enabling --plugin.

config/

	PR ld/27173
	* gcc-plugin.m4 (GCC_PLUGIN_OPTION): Check if AR works with
	--plugin and rc before enabling --plugin.

libiberty/

	PR ld/27173
	* configure: Regenerated.

zlib/

	PR ld/27173
	* configure: Regenerated.
---
 config/gcc-plugin.m4 |  12 +
 configure| 103 +++
 libiberty/configure  | 103 +++
 libtool.m4   |   9 +++-
 zlib/configure   |  14 --
 5 files changed, 237 insertions(+), 4 deletions(-)

diff --git a/config/gcc-plugin.m4 b/config/gcc-plugin.m4
index c5b72e9a13d..ca98d674912 100644
--- a/config/gcc-plugin.m4
+++ b/config/gcc-plugin.m4
@@ -145,6 +145,18 @@ for plugin in $plugin_names; do
 break
   fi
 done
+dnl Check if ${AR} $plugin_option rc works.
+AC_CHECK_TOOL(AR, ar)
+if test "${AR}" = "" ; then
+  AC_MSG_ERROR([Required archive tool 'ar' not found on PATH.])
+fi
+touch conftest.c
+${AR} $plugin_option rc conftest.a conftest.c
+if test "$?" != 0; then
+  AC_MSG_WARN([Failed: $AR $plugin_option rc])
+  plugin_option=
+fi
+rm -f conftest.*
 if test -n "$plugin_option"; then
   $1="$plugin_option"
   AC_MSG_RESULT($plugin_option)
diff --git a/configure b/configure
index a75bc26978c..5437ef12c72 100755
--- a/configure
+++ b/configure
@@ -10120,6 +10120,109 @@ for plugin in $plugin_names; do
 break
   fi
 done
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AR"; then
+  ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ac_cv_prog_AR="${ac_tool_prefix}ar"
+$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5

Re: [PATCH 1/2] GCC: Check if AR works with --plugin and rc

2021-01-11 Thread Alan Modra via Gcc-patches
On Mon, Jan 11, 2021 at 08:57:05AM -0800, H.J. Lu via Binutils wrote:
> diff --git a/config/gcc-plugin.m4 b/config/gcc-plugin.m4
> index c5b72e9a13d..798a2054edd 100644
> --- a/config/gcc-plugin.m4
> +++ b/config/gcc-plugin.m4
> @@ -145,6 +145,18 @@ for plugin in $plugin_names; do
>  break
>fi
>  done
> +dnl Check if ${AR} $plugin_option rc works.
> +AC_CHECK_TOOL(AR, ar)
> +if test "${AR}" = "" ; then
> +  AC_MSG_ERROR([Required archive tool 'ar' not found on PATH.])
> +fi
> +touch conftest.c
> +${AR} $plugin_option rc conftest.a conftest.c
> +if test "$?" != 0; then
> +  echo "Failed: ${AR} $plugin_option rc"

Use AC_MSG_ERROR rather than echo.

> +  plugin_option=
> +fi
> +rm -f conftest.*
>  if test -n "$plugin_option"; then
>$1="$plugin_option"
>AC_MSG_RESULT($plugin_option)

> diff --git a/libtool.m4 b/libtool.m4
> index 3672e9516e2..150971974c1 100644
> --- a/libtool.m4
> +++ b/libtool.m4
> @@ -1340,7 +1340,14 @@ AC_CHECK_TOOL(AR, ar, false)
>  test -z "$AR" && AR=ar
>  if test -n "$plugin_option"; then
>if $AR --help 2>&1 | grep -q "\--plugin"; then
> -AR="$AR $plugin_option"
> +touch conftest.c
> +$AR $plugin_option rc conftest.a conftest.c
> +if test "$?" != 0; then
> +  echo "Failed: $AR $plugin_option rc"

AC_MSG_ERROR again.

> +else
> +  AR="$AR $plugin_option"
> +fi
> +rm -f conftest.*
>fi
>  fi
>  test -z "$AR_FLAGS" && AR_FLAGS=cru

-- 
Alan Modra
Australia Development Lab, IBM


[PATCH] c++: -Wmissing-field-initializers in unevaluated ctx [PR98620]

2021-01-11 Thread Marek Polacek via Gcc-patches
This PR wants us not to warn about missing field initializers when
the code in question takes places in decltype and similar.  Fixed
thus.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

gcc/cp/ChangeLog:

PR c++/98620
* typeck2.c (process_init_constructor_record): Don't emit
-Wmissing-field-initializers warnings in unevaluated contexts.

gcc/testsuite/ChangeLog:

PR c++/98620
* g++.dg/warn/Wmissing-field-initializers-2.C: New test.
---
 gcc/cp/typeck2.c  |  2 +
 .../warn/Wmissing-field-initializers-2.C  | 44 +++
 2 files changed, 46 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/warn/Wmissing-field-initializers-2.C

diff --git a/gcc/cp/typeck2.c b/gcc/cp/typeck2.c
index e50d5fe94cd..93744fdafde 100644
--- a/gcc/cp/typeck2.c
+++ b/gcc/cp/typeck2.c
@@ -1563,6 +1563,7 @@ process_init_constructor_record (tree type, tree init, 
int nested, int flags,
 
  /* Warn when some struct elements are implicitly initialized.  */
  if ((complain & tf_warning)
+ && !cp_unevaluated_operand
  && !EMPTY_CONSTRUCTOR_P (init))
warning (OPT_Wmissing_field_initializers,
 "missing initializer for member %qD", field);
@@ -1593,6 +1594,7 @@ process_init_constructor_record (tree type, tree init, 
int nested, int flags,
  /* Warn when some struct elements are implicitly initialized
 to zero.  */
  if ((complain & tf_warning)
+ && !cp_unevaluated_operand
  && !EMPTY_CONSTRUCTOR_P (init))
warning (OPT_Wmissing_field_initializers,
 "missing initializer for member %qD", field);
diff --git a/gcc/testsuite/g++.dg/warn/Wmissing-field-initializers-2.C 
b/gcc/testsuite/g++.dg/warn/Wmissing-field-initializers-2.C
new file mode 100644
index 000..31d4d897984
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wmissing-field-initializers-2.C
@@ -0,0 +1,44 @@
+// PR c++/98620
+// { dg-do compile { target c++11 } }
+
+namespace std {
+  template
+  T&& declval() noexcept;
+
+  template
+  struct bool_constant {
+static constexpr bool value = B;
+using type = bool_constant;
+  };
+  using true_type = bool_constant;
+  using false_type = bool_constant;
+};
+
+template 
+struct TmpArray
+{
+   T arr[1];
+};
+
+template 
+struct is_non_narrowing_conversion : std::false_type
+{};
+
+template 
+struct is_non_narrowing_conversion<
+Src, Dst,
+decltype(void(TmpArray{{ std::declval() }})) // { dg-bogus 
"missing initializer" }
+> : std::true_type
+{};
+
+struct mystruct
+{
+int a;
+void * b;
+};
+
+void test_nok()
+{
+  is_non_narrowing_conversion::type v;
+  (void) v;
+}

base-commit: a958b2fc6dab3d8b01b6ee32178e2fccd97f77f8
-- 
2.29.2



Re: [PATCH v4 01/29] Add and organize macros.

2021-01-11 Thread Daniel Engel


On Mon, Jan 11, 2021, at 7:21 AM, Richard Earnshaw wrote:
> Some initial comments.
> 
> On 11/01/2021 11:10, g...@danielengel.com wrote:
> > From: Daniel Engel 
> > 
> > These definitions facilitate subsequent patches in this series.
> > 
> > gcc/libgcc/ChangeLog:
> > 2021-01-07 Daniel Engel 
> > 
> > * config/arm/t-elf: Organize functions into logical groups.
> > * config/arm/lib1funcs.S: Add FUNC_START macro variations for
> > weak functions and manual control of the target section;
> > rename THUMB_FUNC_START as THUMB_FUNC_ENTRY for consistency;
> > removed unused macros THUMB_SYNTAX, ARM_SYM_START, SYM_END;
> > removed redundant syntax directives.
> 
> This needs to be re-formatted using the correct ChangeLog style, which
> is in most cases
> 
>   *  (): .
> 
> You can repeat for multiple functions in the same file, but leave off
> the "* " part as long as they are contiguous in the log.

Will do.  Sorry.

> > ---
> >  libgcc/config/arm/lib1funcs.S | 114 +++---
> >  libgcc/config/arm/t-elf   |  55 +---
> >  2 files changed, 110 insertions(+), 59 deletions(-)
> > 
> > diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
> > index c2fcfc503ec..b4541bae791 100644
> > --- a/libgcc/config/arm/lib1funcs.S
> > +++ b/libgcc/config/arm/lib1funcs.S
> > @@ -69,11 +69,13 @@ see the files COPYING3 and COPYING.RUNTIME 
> > respectively.  If not, see
> >  #define TYPE(x) .type SYM(x),function
> >  #define SIZE(x) .size SYM(x), . - SYM(x)
> >  #define LSYM(x) .x
> > +#define LLSYM(x) .L##x
> >  #else
> >  #define __PLT__
> >  #define TYPE(x)
> >  #define SIZE(x)
> >  #define LSYM(x) x
> > +#define LLSYM(x) x
> >  #endif
> 
> I can live with this.
> 
> >  
> >  /* Function end macros.  Variants for interworking.  */
> > @@ -247,6 +249,14 @@ LSYM(Lend_fde):
> >  
> >  #define COND(op1, op2, cond) op1 ## op2 ## cond
> >  
> > +#ifdef __ARM_FEATURE_IT
> > +  #define IT(ins,c) ins##c
> > +#else
> > +  // Assume default Thumb-1 flags-affecting suffix 's'.
> > +  // Almost all instructions require this in unified syntax.
> > +  #define IT(ins,c) ins##s
> 
> This simply doesn't make much sense, at least, not enough to make it
> generally available.  It seems it would be invariably wrong to replace a
> conditional instruction in arm/thumb2 code with a non-conditional flag
> setting instruction in thumb1.  So please don't do this as it's likely
> to be a source of bugs going forwards if folk don't understand exactly
> when it is safe.

I 'm going to push back to use this approach.  I'm a huge believer in
DRY code, and duplicating sequences of 1-4 instructions in a dozen
different places feels unclean.  Duplicating instructions also makes
code comments cumbersome, particularly when doing something tricky in
the conditional block.

I do understand your concern about the __ARM_FEATURE_IT name stepping
into ARM's namespace.  I chose that as a low-mental-friction pattern and
I'd like to keep any replacement similar.  Is __HAVE_FEATURE_IT OK?

This macro currently saves ~10 #ifdef blocks, and I expect that the
number to rise significantly if/when I have time to merge ieee754-sf.S
(One example might be __mulsf3() since it's relatively independent. It's
now 360 bytes in v7m, and 96 bytes on v6m.   I would just need to go
through the new version and a few Thumb-2 optimizations, such as using
the hardware multiply instructions instead of __mulsidi3.)

The safety you want boils down to whether or not __HAVE_FEATURE_IT gets
set correctly.  The do_it() macro seems universally used within libgcc
to support both Thumb-2 and arm compilation of the same code.  I've
defined __HAVE_FEATURE_IT to have the same scope as do_it(), and the 
assembler checks that conditionals are consistent with the previous IT.

While using the IT() macro without do_it() could result in unintended "s"
suffix instructions being emitted for Thumb-1, compilation will fail 
when attempting to build any Thumb-2 multilib.  At that point, adding 
do_it() macro will lead to __HAVE_FEATURE_IT and everything should 
be self-evident. 

I want the macro name to be short, so that it fits within one indent. 
I briefly considered _(), but figured that would be too obtuse.  

I will add the following comment before the macro to clarify: 

/* The IT(c) macro streamlines the construction of short branchless
conditional sequences that support ARM, Thumb-2, and Thumb-1.
It is meant as an extension to the .do_it macro defined above.
Code not written to support Thumb-1 need not use IT(c).

   Where the IT instruction is supported by ARM and Thumb-2, the
given instruction compiles with the conditional suffix 'c'.

   Since Thumb-1 and v6m do not support IT, the given instruction
compiles with the standard unified syntax suffix "s".  This is
somewhat simplistic and does not support 'cmp', or 'tst' since
they do not have the suffix "s".  It also fails for 'movs' and

Re: [PATCH 0/2] Check if AR works with --plugin and rc

2021-01-11 Thread H.J. Lu via Gcc-patches
On Mon, Jan 11, 2021 at 3:05 PM Alan Modra  wrote:
>
> On Mon, Jan 11, 2021 at 08:57:04AM -0800, H.J. Lu via Binutils wrote:
> > Check if AR works with --plugin and rc before passing --plugin to AR and
> > RANLIB.
>
> Thanks for looking at this, but next time please assign the bug to
> yourself.  I fixed the bug too this morning, before seeing your
> email.

Will do.

-- 
H.J.


Re: [PATCH v4 02/29] Refactor 'clz' functions into a new file.

2021-01-11 Thread Richard Earnshaw via Gcc-patches
On 11/01/2021 15:58, Daniel Engel wrote:
> 
> On Mon, Jan 11, 2021, at 7:39 AM, Richard Earnshaw wrote:
>> On 11/01/2021 15:26, Richard Earnshaw wrote:
>>> On 11/01/2021 11:10, g...@danielengel.com wrote:
 From: Daniel Engel 

 gcc/libgcc/ChangeLog:
 2021-01-07 Daniel Engel 

* config/arm/lib1funcs.S: Move __clzsi2() and __clzdi2() to
* config/arm/bits/clz2.S: New file.
>>>
>>> No, please don't push these down into a subdirectory.  They do not
>>> represent a clear subfunctional distinction, so creating a load of disk
>>> hierarcy is just confusing.  Just put the code in config/arm/clz.S
>>>
>>> Otherwise this is just a re-org, so it's OK.
>>
>> Oops, missed that as a new file, this needs to copy over the original
>> copyright message.
>>
>> Same with the other re-orgs that split code up.
> 
> This is not a hard change, just noisy, so I'm checking ... the estimated
> lifetime of this particular content is approximately 15 minutes.  There
> is a copyright message in 05/29, and similar for the other re-orgs.
> 

Understood, but IANAL, so I'm erring on the side of caution.

R.

>> R.
>>
>>>
>>> R.
>>>
 ---
  libgcc/config/arm/bits/clz2.S | 124 ++
  libgcc/config/arm/lib1funcs.S | 123 +
  2 files changed, 125 insertions(+), 122 deletions(-)
  create mode 100644 libgcc/config/arm/bits/clz2.S

 diff --git a/libgcc/config/arm/bits/clz2.S b/libgcc/config/arm/bits/clz2.S
 new file mode 100644
 index 000..1c8f10a5b29
 --- /dev/null
 +++ b/libgcc/config/arm/bits/clz2.S
 @@ -0,0 +1,124 @@
 +
 +#ifdef L_clzsi2
 +#ifdef NOT_ISA_TARGET_32BIT
 +FUNC_START clzsi2
 +  movsr1, #28
 +  movsr3, #1
 +  lslsr3, r3, #16
 +  cmp r0, r3 /* 0x1 */
 +  bcc 2f
 +  lsrsr0, r0, #16
 +  subsr1, r1, #16
 +2:lsrsr3, r3, #8
 +  cmp r0, r3 /* #0x100 */
 +  bcc 2f
 +  lsrsr0, r0, #8
 +  subsr1, r1, #8
 +2:lsrsr3, r3, #4
 +  cmp r0, r3 /* #0x10 */
 +  bcc 2f
 +  lsrsr0, r0, #4
 +  subsr1, r1, #4
 +2:adr r2, 1f
 +  ldrbr0, [r2, r0]
 +  addsr0, r0, r1
 +  bx lr
 +.align 2
 +1:
 +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
 +  FUNC_END clzsi2
 +#else
 +ARM_FUNC_START clzsi2
 +# if defined (__ARM_FEATURE_CLZ)
 +  clz r0, r0
 +  RET
 +# else
 +  mov r1, #28
 +  cmp r0, #0x1
 +  do_it   cs, t
 +  movcs   r0, r0, lsr #16
 +  subcs   r1, r1, #16
 +  cmp r0, #0x100
 +  do_it   cs, t
 +  movcs   r0, r0, lsr #8
 +  subcs   r1, r1, #8
 +  cmp r0, #0x10
 +  do_it   cs, t
 +  movcs   r0, r0, lsr #4
 +  subcs   r1, r1, #4
 +  adr r2, 1f
 +  ldrbr0, [r2, r0]
 +  add r0, r0, r1
 +  RET
 +.align 2
 +1:
 +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
 +# endif /* !defined (__ARM_FEATURE_CLZ) */
 +  FUNC_END clzsi2
 +#endif
 +#endif /* L_clzsi2 */
 +
 +#ifdef L_clzdi2
 +#if !defined (__ARM_FEATURE_CLZ)
 +
 +# ifdef NOT_ISA_TARGET_32BIT
 +FUNC_START clzdi2
 +  push{r4, lr}
 +  cmp xxh, #0
 +  bne 1f
 +#  ifdef __ARMEB__
 +  movsr0, xxl
 +  bl  __clzsi2
 +  addsr0, r0, #32
 +  b 2f
 +1:
 +  bl  __clzsi2
 +#  else
 +  bl  __clzsi2
 +  addsr0, r0, #32
 +  b 2f
 +1:
 +  movsr0, xxh
 +  bl  __clzsi2
 +#  endif
 +2:
 +  pop {r4, pc}
 +# else /* NOT_ISA_TARGET_32BIT */
 +ARM_FUNC_START clzdi2
 +  do_push {r4, lr}
 +  cmp xxh, #0
 +  bne 1f
 +#  ifdef __ARMEB__
 +  mov r0, xxl
 +  bl  __clzsi2
 +  add r0, r0, #32
 +  b 2f
 +1:
 +  bl  __clzsi2
 +#  else
 +  bl  __clzsi2
 +  add r0, r0, #32
 +  b 2f
 +1:
 +  mov r0, xxh
 +  bl  __clzsi2
 +#  endif
 +2:
 +  RETLDM  r4
 +  FUNC_END clzdi2
 +# endif /* NOT_ISA_TARGET_32BIT */
 +
 +#else /* defined (__ARM_FEATURE_CLZ) */
 +
 +ARM_FUNC_START clzdi2
 +  cmp xxh, #0
 +  do_it   eq, et
 +  clzeq   r0, xxl
 +  clzne   r0, xxh
 +  addeq   r0, r0, #32
 +  RET
 +  FUNC_END clzdi2
 +
 +#endif
 +#endif /* L_clzdi2 */
 +
 diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
 index b4541bae791..f5aa5505e9d 100644
 --- a/libgcc/config/arm/lib1funcs.S
 +++ b/libgcc/config/arm/lib1funcs.S
 @@ -1722,128 +1722,7 @@ LSYM(Lover12):
  
  #endif /* __symbian__ */
  
 -#ifdef L_clzsi2
 -#ifdef NOT_ISA_TARGET_32BIT
 -FUNC_START clzsi2
 -  movsr1, #28
 -  movsr3, #1
 -  lslsr

Re: [PATCH 0/2] Check if AR works with --plugin and rc

2021-01-11 Thread Alan Modra via Gcc-patches
On Mon, Jan 11, 2021 at 08:57:04AM -0800, H.J. Lu via Binutils wrote:
> Check if AR works with --plugin and rc before passing --plugin to AR and
> RANLIB.

Thanks for looking at this, but next time please assign the bug to
yourself.  I fixed the bug too this morning, before seeing your
email.

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH] binuitils: Check if AR is usable for LTO build

2021-01-11 Thread H.J. Lu via Gcc-patches
On Mon, Jan 11, 2021 at 1:20 PM Alan Modra  wrote:
>
> On Mon, Jan 11, 2021 at 11:53:15AM -0800, H.J. Lu via Binutils wrote:
> > Check if AR is usable for LTO build with --enable-pgo-build=lto:
> >
> > checking for -plugin option... ar: no operation specified
> > Failed: ar --plugin 
> > /usr/gcc-11.0.0-x32/libexec/gcc/x86_64-pc-linux-gnu/11.0.0/liblto_plugin.so 
> > rc
> > no
> > configure: error: AR with --plugin and rc is required for LTO build
> >
> > instead of build failure later.
> >
> >   PR binutils/26766
> >   * configure.ac:
> >   * configure: Regenerated.
>
> See pr27173 too.  The problem isn't a matter of finding an "ar" that
> supports --plugin, we have versions of GNU ar (2.30 to 2.32?) that
> accept --plugin but then don't parse the "rc" or other command
> options.  I don't think this patch will help.

PR 27173 patches are at

https://sourceware.org/pipermail/binutils/2021-January/114879.html

-- 
H.J.


[wwwdocs] Update C++ DR table with new DRs

2021-01-11 Thread Marek Polacek via Gcc-patches
I pushed this patch to update the DR table with a few new DRs.  Also update
some of the older ones to reflect that they're included in C++20.

Marek

commit 6be6626d59e1c492d5e05606e5a6902feb9e5bac
Author: Marek Polacek 
Date:   Mon Jan 11 17:18:45 2021 -0500

C++ DRs: Add new DRs, update older ones.

diff --git a/htdocs/projects/cxx-dr-status.html 
b/htdocs/projects/cxx-dr-status.html
index 72221490..b1b4f40f 100644
--- a/htdocs/projects/cxx-dr-status.html
+++ b/htdocs/projects/cxx-dr-status.html
@@ -15,7 +15,7 @@
 
   This table tracks the implementation status of C++ defect reports in GCC.
   It is based on C++ Standard Core Language Issue Table of Contents, Revision
-  100 (http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_toc.html";>here).
+  102 (http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_toc.html";>here).
 
   
 
@@ -11377,7 +11377,7 @@
 
 
   https://wg21.link/cwg1621";>1621
-  DRWP
+  C++20
   Member initializers in anonymous unions
   ?
   
@@ -14401,7 +14401,7 @@
 
 
   https://wg21.link/cwg2053";>2053
-  DR
+  C++20
   auto in non-generic lambdas
   ?
   
@@ -14912,7 +14912,7 @@
 
 
   https://wg21.link/cwg2126";>2126
-  DRWP
+  C++20
   Lifetime-extended temporaries in constant expressions
   ?
   
@@ -15990,7 +15990,7 @@
 
 
   https://wg21.link/cwg2280";>2280
-  DRWP
+  C++20
   Matching a usual deallocation function with placement new
   ?
   
@@ -16004,7 +16004,7 @@
 
 
   https://wg21.link/cwg2282";>2282
-  DRWP
+  C++20
   Consistency with mismatched aligned/non-over-aligned 
allocation/deallocation functions
   ?
   
@@ -16291,7 +16291,7 @@
 
 
   https://wg21.link/cwg2323";>2323
-  accepted
+  C++20
   Expunge POD
   ?
   
@@ -16459,7 +16459,7 @@
 
 
   https://wg21.link/cwg2347";>2347
-  DRWP
+  C++20
   Passing short scoped enumerations to ellipsis
   ?
   
@@ -16648,8 +16648,8 @@
 
 
   https://wg21.link/cwg2374";>2374
-  DRWP
-  Overly permissive specification of enum 
direct-list-initialization
+  C++20
+  Overly permissive specification of enum 
direct-list-initialization
   ?
   
 
@@ -16676,9 +16676,9 @@
 
 
   https://wg21.link/cwg2378";>2378
-  accepted
-  Inconsistent grammar for reference init-capture of pack
-  ?
+  C++20
+  Inconsistent grammar for reference init-capture of pack
+  10
   https://gcc.gnu.org/PR91847";>PR91847
 
 
@@ -16926,11 +16926,11 @@
   10
   
 
-
+
   https://wg21.link/cwg2414";>2414
-  drafting
-   Unclear results if both member and friend 
operator<=> are declared
-  -
+  C++20
+  Unclear results if both member and friend 
operator<=> are declared
+  ?
   
 
 
@@ -16942,7 +16942,7 @@
 
 
   https://wg21.link/cwg2416";>2416
-  DRWP
+  C++20
   Explicit specializations vs constexpr and 
consteval
   ?
   
@@ -16963,7 +16963,7 @@
 
 
   https://wg21.link/cwg2419";>2419
-  DRWP
+  C++20
   Loss of generality treating pointers to objects as one-element 
arrays
   ?
   
@@ -16984,7 +16984,7 @@
 
 
   https://wg21.link/cwg2422";>2422
-  DRWP
+  C++20
   Incorrect grammar for deduction-guide
   ?
   
@@ -16998,8 +16998,8 @@
 
 
   https://wg21.link/cwg2424";>2424
-  DRWP
-  constexpr initialization requirements for variant 
members 
+  C++20
+  constexpr initialization requirements for variant 
members
   ?
   
 
@@ -17012,14 +17012,14 @@
 
 
   https://wg21.link/cwg2426";>2426
-  DRWP
+  C++20
   Reference to destructor that cannot be invoked
   ?
   
 
 
   https://wg21.link/cwg2427";>2427
-  DRWP
+  C++20
   Deprecation of volatile operands and unevaluated contexts
   ?
   
@@ -17033,35 +17033,35 @@
 
 
   https://wg21.link/cwg2429";>2429
-  DRWP
-  Initialization of thread_local variables referenced by 
lambdas 
+  C++20
+  Initialization of thread_local variables referenced by 
lambdas
   ?
   
 
 
   https://wg21.link/cwg2430";>2430
-  DRWP
-  Completeness of return and parameter types of member functions 
+  C++20
+  Completeness of return and parameter types of member functions
   ?
   
 
 
   https://wg21.link/cwg2431";>2431
-  DRWP
-  Full-expressions and temporaries bound to references 
+  C++20
+  Full-expressions and temporaries bound to references
   ?
   
 
 
   https://wg21.link/cwg2432";>2432
-  DRWP
+  C++20
   Return types for defaulted <=>
   ?
   
 
 
   https://wg21.link/cwg2433";>2433
-  DRWP
+  C++20
   

Re: [PATCH] libstdc++: implement locale support for AIX

2021-01-11 Thread David Edelsohn via Gcc-patches
On Mon, Jan 11, 2021 at 10:56 AM CHIGOT, CLEMENT
 wrote:
>
> >> Hi David, Clement,
> >>
> >>> The patch is local to libstdc++ AIX support, so I believe that I can 
> >>> approve it.
> >>
> >>have you considered merging the dragonfly and aix trees?  I'm asking
> >>because it seems prudent to try and avoid creating more and more
> >>almost-but-not-quite-similar configurations (Solaris might be able to
> >>use the same code, at least in 11.4 which has XPG7 support).
> >
> >Agreed.
> >
> >See also 
> >https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgcc.gnu.org%2Fbugzilla%2Fshow_bug.cgi%3Fid%3D57585&data=04%7C01%7Cclement.chigot%40atos.net%7Cb9820136976149e643b408d8b64756c2%7C33440fc6b7c7412cbb730e70b0198d5a%7C0%7C0%7C637459764803442555%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=qinNdbwsbyUdvijD76PanMf0Y1iSfn1FhRAt2T2dl%2B0%3D&reserved=0
>
> I haven't thought about that. It should be possible.
> The main problem with AIX are the few missing locale functions
> (strtof_l, localeconv_l, etc). I've defined some in c_locale.h, some
> are simply wrapped by uselocale in the code itself.
> The question is if we merged dragonfly, aix and allow others OS
> to use this new locale support, what should go in c_locale.h ?
> Do we want it to define all missing functions or do we rather
> have some #ifdef wrapping "uselocale" when a *_l function
> is missing ?
> Both ways, I'm fearing there will be a lot of #ifdef.

The OS-specific definitions could be placed in
config/os/XXX/os_defines.h.  And the DragonFly locale files could
include  to obtain the definitions.

I'm not certain where to place the uselocale changes.  Either #ifdefs
or move part of the files to config/os/XXX/...

Thanks, David


Re: [PATCH] c++: ICE with constrained placeholder return type [PR98346]

2021-01-11 Thread Jason Merrill via Gcc-patches

On 1/11/21 4:40 PM, Jason Merrill wrote:

On 1/7/21 4:06 PM, Patrick Palka wrote:

This is essentially a followup to r11-3714 -- we ICEing from another
"unguarded" call to build_concept_check, this time in do_auto_deduction,
due to the presence of templated trees when !processing_template_decl.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps the 10 branch?

gcc/cp/ChangeLog:

PR c++/98346
* pt.c (do_auto_deduction): Temporarily increment
processing_template_decl before calling build_concept_check.

gcc/testsuite/ChangeLog:

PR c++/98346
* g++.dg/cpp2a/concepts-placeholder3.C: New test.
---
  gcc/cp/pt.c   |  2 ++
  .../g++.dg/cpp2a/concepts-placeholder3.C  | 15 +++
  2 files changed, 17 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder3.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index beabcc4b027..111a694e0c5 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -29464,7 +29464,9 @@ do_auto_deduction (tree type, tree init, tree 
auto_node,

    cargs = targs;
  /* Rebuild the check using the deduced arguments.  */
+    ++processing_template_decl;
  check = build_concept_check (cdecl, cargs, tf_none);
+    --processing_template_decl;


This shouldn't be necessary; if processing_template_decl is 0, we should 
have non-dependent args.


I think your patch only works for this testcase because the concept is 
trivial and doesn't actually try to to do anything with the arguments.


Handling of PLACEHOLDER_TYPE_CONSTRAINTS is overly complex, partly 
because the 'auto' is represented as an argument in its own constraints.


A constrained auto variable declaration has the same problem.


Appling the patch below turns up similar problems in a couple of 
existing testcases.
commit 3825157b4d54c7f0f3e16f08b3dec5c271b01921
Author: Jason Merrill 
Date:   Mon Jan 11 16:45:16 2021 -0500

assert

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 9049d087859..1d87c7e48a3 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3069,6 +3069,9 @@ satisfy_constraint_expression (tree t, tree args, sat_info info)
   else
 norm = normalize_constraint_expression (t, info.noisy ());
 
+  /* Satisfaction can only be determined with real args.  */
+  gcc_checking_assert (!uses_template_parms (args));
+
   /* Perform satisfaction.  */
   return satisfy_constraint (norm, args, info);
 }


[PATCH] Handle fancy_abort before diagnostic initialization [PR98586]

2021-01-11 Thread David Malcolm via Gcc-patches
If fancy_abort is called before the diagnostic subsystem is initialized,
internal_error will crash internally in a way that prevents a useful
message reaching the user.

This can happen with libgccjit in the case of gcc_assert failures
that occur outside of the libgccjit mutex that guards the rest of
gcc's state, including global_dc (when global_dc may not be
initialized yet, or might be in use by another thread).

I tried a few approaches to fixing this as noted in PR jit/98586
e.g. using a temporary diagnostic_context and initializing it for
the call to internal_error, however the more code that runs, the
more chance there is for other errors to occur.

The best fix appears to be to simply fall back to a minimal abort
implementation that only relies on i18n, as implemented by this
patch.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

Is there a better way to fix this?  If not I plan to push this
to master in a few days.

gcc/ChangeLog:
PR jit/98586
* diagnostic.c (diagnostic_kind_text): Break out this array
from...
(diagnostic_build_prefix): ...here.
(fancy_abort): Detect when diagnostic_initialize has not yet been
called and fall back to a minimal implementation of printing the
ICE, rather than segfaulting in internal_error.
---
 gcc/diagnostic.c | 45 +++--
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index 4250bf96c8b..3be7748eb39 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -431,6 +431,13 @@ diagnostic_get_location_text (diagnostic_context *context,
   line_col, locus_ce);
 }
 
+static const char *const diagnostic_kind_text[] = {
+#define DEFINE_DIAGNOSTIC_KIND(K, T, C) (T),
+#include "diagnostic.def"
+#undef DEFINE_DIAGNOSTIC_KIND
+  "must-not-happen"
+};
+
 /* Return a malloc'd string describing a location and the severity of the
diagnostic, e.g. "foo.c:42:10: error: ".  The caller is responsible for
freeing the memory.  */
@@ -438,12 +445,6 @@ char *
 diagnostic_build_prefix (diagnostic_context *context,
 const diagnostic_info *diagnostic)
 {
-  static const char *const diagnostic_kind_text[] = {
-#define DEFINE_DIAGNOSTIC_KIND(K, T, C) (T),
-#include "diagnostic.def"
-#undef DEFINE_DIAGNOSTIC_KIND
-"must-not-happen"
-  };
   gcc_assert (diagnostic->kind < DK_LAST_DIAGNOSTIC_KIND);
 
   const char *text = _(diagnostic_kind_text[diagnostic->kind]);
@@ -1832,6 +1833,38 @@ error_recursion (diagnostic_context *context)
 void
 fancy_abort (const char *file, int line, const char *function)
 {
+  /* If fancy_abort is called before the diagnostic subsystem is initialized,
+ internal_error will crash internally in a way that prevents a
+ useful message reaching the user.
+ This can happen with libgccjit in the case of gcc_assert failures
+ that occur outside of the libgccjit mutex that guards the rest of
+ gcc's state, including global_dc (when global_dc may not be
+ initialized yet, or might be in use by another thread).
+ Handle such cases as gracefully as possible by falling back to a
+ minimal abort handler that only relies on i18n.  */
+  if (global_dc->printer == NULL)
+{
+  /* Print the error message.  */
+  fnotice (stderr, diagnostic_kind_text[DK_ICE]);
+  fnotice (stderr, "in %s, at %s:%d", function, trim_filename (file), 
line);
+  fputc ('\n', stderr);
+
+  /* Attempt to print a backtrace.  */
+  struct backtrace_state *state
+   = backtrace_create_state (NULL, 0, bt_err_callback, NULL);
+  int count = 0;
+  if (state != NULL)
+   backtrace_full (state, 2, bt_callback, bt_err_callback,
+   (void *) &count);
+
+  /* We can't call warn_if_plugins or emergency_dump_function as these
+rely on GCC state that might not be initialized, or might be in
+use by another thread.  */
+
+  /* Abort the process.  */
+  real_abort ();
+}
+
   internal_error ("in %s, at %s:%d", function, trim_filename (file), line);
 }
 
-- 
2.26.2



Re: [PATCH 0/8] [RS6000] rs6000_rtx_costs V2

2021-01-11 Thread Alan Modra via Gcc-patches
On Sat, Dec 05, 2020 at 07:42:07PM +1030, Alan Modra wrote:
> Hi Segher,
> I've been holding off pinging these knowing you had a lot of other
> review work, but maybe that's settling down now?  You already OK'd
> 1/8, 2/8 and 6/8.

Ping.

> [PATCH 3/8] [RS6000] rs6000_rtx_costs tidy AND
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555754.html
> 
> [PATCH 4/8] [RS6000] rs6000_rtx_costs tidy break/return
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555755.html
> 
> [PATCH 5/8] [RS6000] rs6000_rtx_costs cost IOR
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555756.html
> 
> [PATCH 7/8] [RS6000] rs6000_rtx_costs reduce cost for SETs
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555758.html
> 
> [PATCH 8/8] [RS6000] rs6000_rtx_costs for !speed
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555759.html
> 
> [RS6000] rotate and mask constants
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555760.html
> 
> [RS6000] Adjust testcases for power10 instructions V3
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/557587.html

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH] c++: ICE with constrained placeholder return type [PR98346]

2021-01-11 Thread Jason Merrill via Gcc-patches

On 1/7/21 4:06 PM, Patrick Palka wrote:

This is essentially a followup to r11-3714 -- we ICEing from another
"unguarded" call to build_concept_check, this time in do_auto_deduction,
due to the presence of templated trees when !processing_template_decl.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps the 10 branch?

gcc/cp/ChangeLog:

PR c++/98346
* pt.c (do_auto_deduction): Temporarily increment
processing_template_decl before calling build_concept_check.

gcc/testsuite/ChangeLog:

PR c++/98346
* g++.dg/cpp2a/concepts-placeholder3.C: New test.
---
  gcc/cp/pt.c   |  2 ++
  .../g++.dg/cpp2a/concepts-placeholder3.C  | 15 +++
  2 files changed, 17 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder3.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index beabcc4b027..111a694e0c5 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -29464,7 +29464,9 @@ do_auto_deduction (tree type, tree init, tree auto_node,
cargs = targs;
  
  	/* Rebuild the check using the deduced arguments.  */

+   ++processing_template_decl;
check = build_concept_check (cdecl, cargs, tf_none);
+   --processing_template_decl;


This shouldn't be necessary; if processing_template_decl is 0, we should 
have non-dependent args.


I think your patch only works for this testcase because the concept is 
trivial and doesn't actually try to to do anything with the arguments.


Handling of PLACEHOLDER_TYPE_CONSTRAINTS is overly complex, partly 
because the 'auto' is represented as an argument in its own constraints.


A constrained auto variable declaration has the same problem.


if (!constraints_satisfied_p (check))
{
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder3.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder3.C
new file mode 100644
index 000..a5d0b1e1d0f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder3.C
@@ -0,0 +1,15 @@
+// PR c++/98346
+// { dg-do compile { target c++20 } }
+
+template 
+concept always_satisfied = true;
+
+using arg_alias = int;
+
+template 
+using result_of = decltype(F{}(arg_alias{}));
+
+template 
+always_satisfied> auto foo(F) {}
+
+void bar() { foo(0); }





Re: [PATCH] binuitils: Check if AR is usable for LTO build

2021-01-11 Thread Alan Modra via Gcc-patches
On Mon, Jan 11, 2021 at 11:53:15AM -0800, H.J. Lu via Binutils wrote:
> Check if AR is usable for LTO build with --enable-pgo-build=lto:
> 
> checking for -plugin option... ar: no operation specified
> Failed: ar --plugin 
> /usr/gcc-11.0.0-x32/libexec/gcc/x86_64-pc-linux-gnu/11.0.0/liblto_plugin.so rc
> no
> configure: error: AR with --plugin and rc is required for LTO build
> 
> instead of build failure later.
> 
>   PR binutils/26766
>   * configure.ac:
>   * configure: Regenerated.

See pr27173 too.  The problem isn't a matter of finding an "ar" that
supports --plugin, we have versions of GNU ar (2.30 to 2.32?) that
accept --plugin but then don't parse the "rc" or other command
options.  I don't think this patch will help.

> ---
>  configure| 4 
>  configure.ac | 4 
>  2 files changed, 8 insertions(+)
> 
> diff --git a/configure b/configure
> index c44184f72ff..84285addafe 100755
> --- a/configure
> +++ b/configure
> @@ -10240,6 +10240,10 @@ if test -n "$PLUGIN_OPTION"; then
>if $RANLIB --help 2>&1 | grep -q "\--plugin"; then
>  RANLIB_PLUGIN_OPTION="$PLUGIN_OPTION"
>fi
> +else
> +  if test "$enable_pgo_build" != "no"; then
> +as_fn_error $? "AR with --plugin and rc is required for LTO build" 
> "$LINENO" 5
> +  fi
>  fi
>  
>  
> diff --git a/configure.ac b/configure.ac
> index 9dd51c36e5a..d39019d7093 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -3452,6 +3452,10 @@ if test -n "$PLUGIN_OPTION"; then
>if $RANLIB --help 2>&1 | grep -q "\--plugin"; then
>  RANLIB_PLUGIN_OPTION="$PLUGIN_OPTION"
>fi
> +else
> +  if test "$enable_pgo_build" != "no"; then
> +AC_MSG_ERROR([AR with --plugin and rc is required for LTO build])
> +  fi
>  fi
>  AC_SUBST(AR_PLUGIN_OPTION)
>  AC_SUBST(RANLIB_PLUGIN_OPTION)
> -- 
> 2.29.2

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH v4] rs6000, vector integer multiply/divide/modulo instructions

2021-01-11 Thread will schmidt via Gcc-patches
On Mon, 2020-12-07 at 16:31 -0800, Carl Love wrote:
> Will:
> 
> I have addressed you comments with regards to the Change Log entries.  
> 
> The extra define vec_div was removed.
> 
> Added the missing entries for DIVU_V2DI  DIVS_V2DI in rs6000-call.c.
> 
> The extra MULLD_V2DI case statement entry was removed.
> 
> Added comment in rs6000.md about size for vector types per discussion
> with Pat.
> 
>   Carl
> 
> 
> GCC maintainers:
> 
> The following patch adds new builtins for the vector integer multiply,
> divide and modulo operations.  The builtins are: vec_mulh(),
> vec_dive(), vec_mod() for signed and unsigned integers and long
> longintegers. The existing support for the vec_div()and vec_mul()
> builtins emulate the vector operations with multiple scalar
> instructions.  This patch adds support for these builtins using the new
> vector instructions for Power 10.

Missing a couple spaces. 
"long integers"
 and 
"vec_div() and".


> 
> The patch was compiled and tested on:
> 
>   powerpc64le-unknown-linux-gnu (Power 9 LE)
>   powerpc64le-unknown-linux-gnu (Power 10 LE)
> 
> with no regressions. Additionally the new test case was compiled and
> executed by hand on Mambo to verify the test case passes.

May also be worth trying on Power8/BE, just for the variety.

> 
> Please let me know if this patch is acceptable for mainline.  Thanks.
> 
> Carl Love
> 
> -
> 
> From 15f9c090106c62af83cc405414466ad03d1a4c55 Mon Sep 17 00:00:00 2001
> From: Carl Love 
> Date: Fri, 4 Sep 2020 19:24:22 -0500
> Subject: [PATCH] rs6000, vector integer multiply/divide/modulo instructions
> 
> 2020-12-07  Carl Love  
> 
> gcc/
>   * config/rs6000/altivec.h (vec_mulh, vec_dive, vec_mod): Newdefines.

Embedded tab there.

>   * config/rs6000/altivec.md (VIlong): Move define to file vsx.md.

>   * config/rs6000/rs6000-builtin.def (DIVES_V4SI, DIVES_V2DI,
>   DIVEU_V4SI, DIVEU_V2DI, DIVS_V4SI, DIVS_V2DI, DIVU_V4SI,
>   DIVU_V2DI, MODS_V2DI, MODS_V4SI, MODU_V2DI, MODU_V4SI,
>   MULHS_V2DI, MULHS_V4SI, MULHU_V2DI, MULHU_V4SI, MULLD_V2DI):
>   Add builtin define.
>   (MULH, DIVE, MOD):  Add new BU_P10_OVERLOAD_2 definitions.



>   * config/rs6000/rs6000-call.c (altivec_overloaded_builtins): Add
>   VSX_BUILTIN_VEC_DIV, P10_BUILTIN_VEC_VDIVE,
>   P10_BUILTIN_VEC_VDIVE, P10_BUILTIN_VEC_VMOD, P10_BUILTIN_VEC_VMULH
>   overloaded definitions.

P10_BUILTIN_VEC_VDIVE is mentioned here twice.
I don't see it in the
patch body at all. 
I don't see P10_BUILTIN_VEC_VMOD either.
Also don't
see P10_BUILTIN_VEC_VMULH.


>   (builtin_function_type) [P10V_BUILTIN_DIVEU_V4SI,
>   P10V_BUILTIN_DIVEU_V2DI, P10V_BUILTIN_DIVU_V4SI,
>   P10V_BUILTIN_DIVU_V2DI, P10V_BUILTIN_MODU_V2DI,
>   P10V_BUILTIN_MODU_V4SI, P10V_BUILTIN_MULHU_V2DI,
>   P10V_BUILTIN_MULHU_V4SI, P10V_BUILTIN_MULLD_V2DI]: Add case
>   statements for builtins.

I don't see the P10V_BUILTIN_MULLD_V2DI case statement entry in the
patch below.   A previous review commented that there might have been a
missing altivec_overloaded_builtins entry for the MULLD_V2DI entry. 
Codegen for unsigned mull against v2di was correct?


>   * config/rs6000/rs6000.md (bits): Add new attribute sizes.

I'd be more verbose here to provide something searchable.
i.e. "Add V4SI,V2DI entries..."

>   * config/rs6000/vsx.md (VIlong): New define_mode_iterator.

Not new.  'Moved here from altivec.md' or something similar.


>   (UNSPEC_VDIVES, UNSPEC_VDIVEU): New unspec definitions.
>   (vsx_mul_v2di): Add if TARGET_POWER10 statement.
>   (vsx_udiv_v2di): Add if TARGET_POWER10 statement.
>   (dives_, diveu_, div3, uvdiv3,
>   mods_, modu_, mulhs_, mulhu_, mulv2di3):
>   Add define_insn, mode is VIlong.
>   doc/extend.texi (vec_mulh, vec_mul, vec_div, vec_dive, vec_mod): Add
>   builtin descriptions.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/builtins-1-p10-runnable.c: New test file.
> ---
>  gcc/config/rs6000/altivec.h   |   4 +
>  gcc/config/rs6000/altivec.md  |   2 -
>  gcc/config/rs6000/rs6000-builtin.def  |  22 +
>  gcc/config/rs6000/rs6000-call.c   |  53 +++
>  gcc/config/rs6000/rs6000.md   |   4 +-
>  gcc/config/rs6000/vsx.md  | 212 +++---
>  gcc/doc/extend.texi   | 120 ++
>  .../powerpc/builtins-1-p10-runnable.c | 398 ++
>  8 files changed, 762 insertions(+), 53 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c
> 
> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index e1884f51bd8..b678e5cf28d 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -750,6 +750,10 @@ __altivec_scalar_pred(vec_any_nle,
>  #defi

Re: [PATCH] c++: Fix ICE with CTAD in concept [PR98611]

2021-01-11 Thread Jason Merrill via Gcc-patches

On 1/9/21 5:23 PM, Patrick Palka wrote:

This patch teaches find_template_parameters to visit the template
represented by a CTAD placeholder, which is normally not visited by
for_each_template_parm.  This template may be a template template
parameter (as in the first testcase), or it may implicitly use the
template parameters of an enclosing class template (as in the second
testcase), and in either case we need to record the template parameters
used therein for later satisfaction.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps the 10 branch?  Also tested on range-v3 and cmcstl2.

gcc/cp/ChangeLog:

PR c++/98611
* pt.c (any_template_parm_r) : Visit
the template of a CTAD placeholder.


Did you consider doing this in cp_walk_subtrees instead of here?


gcc/testsuite/ChangeLog:

PR c++/98611
* g++.dg/cpp2a/concepts-ctad1.C: New test.
* g++.dg/cpp2a/concepts-ctad2.C: New test.
---
  gcc/cp/pt.c |  4 
  gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C | 16 
  gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C | 14 ++
  3 files changed, 34 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 51540ca35a5..d3bb6231926 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -10694,6 +10694,10 @@ any_template_parm_r (tree t, void *data)
if (is_auto (t))
if (tree constr = PLACEHOLDER_TYPE_CONSTRAINTS (t))
  WALK_SUBTREE (constr);
+  /* A use of a CTAD placeholder is also a use of the template it
+represents.  */
+  if (template_placeholder_p (t))
+ WALK_SUBTREE (CLASS_PLACEHOLDER_TEMPLATE (t));
break;
  
  case TEMPLATE_ID_EXPR:

diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C
new file mode 100644
index 000..ec2e4b014d7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad1.C
@@ -0,0 +1,16 @@
+// PR c++/98611
+// { dg-do compile { target c++20 } }
+
+template 
+concept IsSame = __is_same(T, U);
+
+template  class _Class>
+concept IsInstantiationOf = requires(T object) {
+ { _Class{object} } -> IsSame;
+};
+
+template  struct Degrees {};
+static_assert(IsInstantiationOf, Degrees>);
+
+template  struct NotDegrees {};
+static_assert(!IsInstantiationOf, NotDegrees>);
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C
new file mode 100644
index 000..de960487713
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad2.C
@@ -0,0 +1,14 @@
+// PR c++/98611
+// { dg-do compile { target c++20 } }
+
+template 
+struct S
+{
+  template  struct Tmpl { Tmpl(T); };
+
+  template 
+requires requires (T object) { Tmpl{object}; }
+  static int f(T);
+};
+
+int a = S::f(0);





Re: [PATCH] c++: private inheritance access diagnostics fix [PR17314]

2021-01-11 Thread Jason Merrill via Gcc-patches

On 1/8/21 7:38 PM, Anthony Sharp wrote:

Hi Jason,

Thank you!


To start with, do you have a copyright assignment on file or in the
works already?


Good point. I incorrectly assumed it would only be a minor
contribution copyright-wise. > Mr Edelsohn gave me a template which I've
now filled out and sent to ass...@gnu.org. I'm assuming I just need to
wait for them to send me the form. I'll update this thread when that's
sorted. In the meantime I've hopefully fixed some of the issues.


Great.


Second, your patch was mangled by word wrap so that it can't be applied
without manual repair.  If you can't prevent word wrap in your mail
client, please send it as an attachment rather than inline.


Oh yes I see where it's gotten mangled now. I'm attaching it as a
.patch file (I assume that's okay).


Also, there are a few whitespace issues in the patch; please run
contrib/check_GNU_style.sh on the patch before submitting.


Should be all fixed now (there is one style issue left but it's a
false positive). Visual Studio Code was lying to me about what the
file looks like so if there are any more formatting issues please let
me know.


If you use contrib/gcc-git-customization.sh and then git
gcc-commit-mklog you don't need to touch ChangeLog files at all, just
adjust the generated ChangeLog entries in the git commit message.  I
personally tend to commit first with a placeholder message and then use
git gcc-commit-mklog --amend to generate the ChangeLog entries.


Wouldn't that require read-write access? (Just from looking here
https://gcc.gnu.org/gitwrite.html.)


You don't need write access to the main repository to use these commands 
on your local copy.  One nice thing about git compared to svn is that 
you don't need to touch the server for anything but push and pull.


Incidentally, how are you producing your patch?  Maybe try git 
format-patch instead.



Probably.  Can you use sort/uniq/diff on the .sum testsuite output to
determine which passes are missing in the patched sources?


According to contrib/dg-cmp-results.sh ...

I get a bunch of these weird NA->PASSes (and vice-versa), for example:

PASS->NA: g++.dg/modules/alias-1_a.H module-cmi
(gcm.cache/home/anthony/Desktop/GCC/builds_and_source/source_clean/gcc/testsuite/g++.dg/modules/alias-1_a.H.gcm)
NA->PASS: g++.dg/modules/alias-1_a.H module-cmi
(gcm.cache/home/anthony/Desktop/GCC/builds_and_source/source_pr17314/gcc/testsuite/g++.dg/modules/alias-1_a.H.gcm)
PASS->NA: g++.dg/modules/alias-1_a.H module-cmi
(gcm.cache/home/anthony/Desktop/GCC/builds_and_source/source_clean/gcc/testsuite/g++.dg/modules/alias-1_a.H.gcm)
NA->PASS: g++.dg/modules/alias-1_a.H module-cmi
(gcm.cache/home/anthony/Desktop/GCC/builds_and_source/source_pr17314/gcc/testsuite/g++.dg/modules/alias-1_a.H.gcm)

They're weird because I haven't actually touched those files (so I'm
assuming this is normal). There are about ~400 of those and they're
all .gcm files. They seem to balance out.


The modules code and tests are very new and volatile, I wouldn't worry 
about them.



dr142.c reports:

NA->PASS: g++.dg/tc1/dr142.C  -std=c++14  (test for warnings, line 11)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++14  (test for warnings, line 5)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++14  (test for warnings, line 7)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++14  (test for warnings, line 8)
NA->PASS: g++.dg/tc1/dr142.C  -std=c++17  (test for warnings, line 11)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++17  (test for warnings, line 5)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++17  (test for warnings, line 7)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++17  (test for warnings, line 8)
NA->PASS: g++.dg/tc1/dr142.C  -std=c++2a  (test for warnings, line 11)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++2a  (test for warnings, line 5)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++2a  (test for warnings, line 7)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++2a  (test for warnings, line 8)
NA->PASS: g++.dg/tc1/dr142.C  -std=c++98  (test for warnings, line 11)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++98  (test for warnings, line 5)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++98  (test for warnings, line 7)
PASS->NA: g++.dg/tc1/dr142.C  -std=c++98  (test for warnings, line 8)


These changes are because your patch changes that test to expect 
warnings in different places.



In other words, there are 12 PASS->NAs and 4 NA->PASSes in this file,
meaning a net change of -8 (which explains why there are eight fewer).
My other changes also report PASS->NAs and vice-versa, but for those
the number of new NAs equals the number of new PASSes, so they don't
cause a change in quantity.

Thanks for being patient with me. I'll let you know when I've
completed the forms.

Also if I need to adjust the .patch to deal with the changelogs issue
please let me know.

Kind regards,
Anthony





[PATCH] binuitils: Check if AR is usable for LTO build

2021-01-11 Thread H.J. Lu via Gcc-patches
Check if AR is usable for LTO build with --enable-pgo-build=lto:

checking for -plugin option... ar: no operation specified
Failed: ar --plugin 
/usr/gcc-11.0.0-x32/libexec/gcc/x86_64-pc-linux-gnu/11.0.0/liblto_plugin.so rc
no
configure: error: AR with --plugin and rc is required for LTO build

instead of build failure later.

PR binutils/26766
* configure.ac:
* configure: Regenerated.
---
 configure| 4 
 configure.ac | 4 
 2 files changed, 8 insertions(+)

diff --git a/configure b/configure
index c44184f72ff..84285addafe 100755
--- a/configure
+++ b/configure
@@ -10240,6 +10240,10 @@ if test -n "$PLUGIN_OPTION"; then
   if $RANLIB --help 2>&1 | grep -q "\--plugin"; then
 RANLIB_PLUGIN_OPTION="$PLUGIN_OPTION"
   fi
+else
+  if test "$enable_pgo_build" != "no"; then
+as_fn_error $? "AR with --plugin and rc is required for LTO build" 
"$LINENO" 5
+  fi
 fi
 
 
diff --git a/configure.ac b/configure.ac
index 9dd51c36e5a..d39019d7093 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3452,6 +3452,10 @@ if test -n "$PLUGIN_OPTION"; then
   if $RANLIB --help 2>&1 | grep -q "\--plugin"; then
 RANLIB_PLUGIN_OPTION="$PLUGIN_OPTION"
   fi
+else
+  if test "$enable_pgo_build" != "no"; then
+AC_MSG_ERROR([AR with --plugin and rc is required for LTO build])
+  fi
 fi
 AC_SUBST(AR_PLUGIN_OPTION)
 AC_SUBST(RANLIB_PLUGIN_OPTION)
-- 
2.29.2



Re: [PATCH] match.pd: Add ~(X - Y) -> ~X + Y simplification [PR96685]

2021-01-11 Thread Jeff Law via Gcc-patches



On 1/9/21 5:43 PM, Maciej W. Rozycki wrote:
> On Mon, 21 Dec 2020, Jakub Jelinek wrote:
>
 This patch adds the ~(X - Y) -> ~X + Y simplification requested
 in the PR (plus also ~(X + C) -> ~X + (-C) for constants C that can
 be safely negated.
>>>  This regresses VAX code produced by the cmpelim-eq-notsi.c test case (and 
>>> its similar counterparts) with the `vax-netbsdelf' target.
>> The point of the match.pd changes is to canonicalize GIMPLE on some form
>> when there are several from GIMPLE POV equivalent or better forms of writing
>> the same thing.  The advantage of having one canonical way is that ICF,
>> SCCVN etc. optimizations can then understand the different forms are
>> equivalent.
>  Fair enough, though in cases like this I think it is unclear which of the 
> two forms is going to be ultimately better, especially as it may depend on 
> the exact form of the operands used, e.g. values of any immediates, so I 
> think a way to make the reverse transformation (whether to undo one made 
> here or genuinely) needs to be available at a later compilation stage.  
> One size doesn't fit all.
>
>  With this in mind...
So in this case the number of operations are the same before/after and
parallelism is the same before/after, register lifetimes, etc.   I doubt
either form is particularly better suited for CSE or gives better VRP
data, etc.   The fact that we can't always do ~(X +C) -> ~X + -C
probably argues against that form ever so slightly.


>
>> If another form is then better for a particular machine, it should be done
>> either during expansion (being able to produce both RTLs and computing their
>> costs), or during combine with either combine splitters or
>> define_insn_and_split in the backend, or, if it can't be done in RTL, during
>> the isel pass.
>  Hmm, maybe it has been discussed before, so please forgive me if I write 
> something silly, but it seems to me like this should be done in a generic 
> way like match.pd so that all the targets do not have to track the changes 
> made there and then perhaps repeat the same or similar code each.  So I 
> think it would make sense to make a change like this include that reverse 
> transformation as well, so that ultimately both forms are tried with RTL, 
> as there is no clear advantage to either here.
The idea we've kicked around in the past was to use the same syntax as
match.pd, but have it be target dependent to reform expressions in ways
that are beneficial to the target and have it run at the end of the
gimple/ssa pipeline.  Nobody's implemented this though.

jeff



Re: [PATCH] VAX/testsuite: Remove notsi comparison elimination regressions

2021-01-11 Thread Jeff Law via Gcc-patches



On 1/10/21 7:45 AM, Maciej W. Rozycki wrote:
> On Fri, 8 Jan 2021, Jeff Law wrote:
>
>>> gcc/testsuite/
>>> * gcc.target/vax/cmpelim-eq-notsi.c: Use subtraction from a 
>>> constant then rather than addition.
>>> * gcc.target/vax/cmpelim-le-notsi.c: Likewise.
>>> * gcc.target/vax/cmpelim-lt-notsi.c: Likewise.
>> OK
>  Thank you for your review.  I have applied this change now and the 
> remaining ones you have approved.  I'll be watching out for any further 
> concerns, but otherwise I consider VAX backend development complete for 
> this release cycle.
Sounds good.

>
>  Also I have now scheduled full regression testing of the `vax-netbsdelf' 
> target with the timeout extended to 7200 seconds.  Hopefully this will let 
> all cases complete that do not infinitely loop.  I can post results to 
> gcc-testresults if that would be desired (is there a dedicated format for 
> that mailing list?), and overall they need to be triaged before anything 
> can be decided about what to do next.
>
>  I have seen some failures coming from individual test cases' assumption 
> of the floating-point format being IEEE 754, so at least these can be 
> easily excluded, or variants for the alternative format provided, as 
> applicable.
I think most are posting the stdout from the check run.   So we don't
generally get all the pass/xfail messages, but we do get fail/xpass
messages.  They don't need to be triaged or anything.

jeff



Re: [PATCH] tree-optimization/98221 - fix wrong unpack operation used for big-endian

2021-01-11 Thread Jeff Law via Gcc-patches



On 1/11/21 3:02 AM, Andreas Krebbel via Gcc-patches wrote:
> The vec-abi-varargs-1.c testcase on IBM Z currently fails.
>
> While adding an SI mode vector to a DI mode vector the first is unpacked 
> using:
>
>   _28 = BIT_INSERT_EXPR <{ 0, 0, 0, 0 }, _2, 0>;
>   _34 = [vec_unpack_lo_expr] _28;
>
> However, on big endian targets lo refers to the right hand side of the vector 
> - in this case the zeroes.
>
> Bootstrap & regtest running on x86_64-unknown-linux-gnu.
>
> 2021-01-11  Andreas Krebbel  
>
>   * tree-ssa-forwprop.c (simplify_vector_constructor): For
>   big-endian, use UNPACK[_FLOAT]_HI.
OK
jeff



Re: libstdc++ PR 57272 Fancy pointer support in Hashtable

2021-01-11 Thread François Dumont via Gcc-patches

Hi

    I had another look to this attempt to properly support alloc fancy 
pointers.


    I consider all your remarks appart from the big one below about all 
this being a waste of time :-)


    I do not see why we should use the alloc fancy pointer type in our 
_Hashtable implementation details. It is not noticeable from a user 
stand point unless he wants to track all dereferencements or '->' 
operator usages which would be quite odd.


    For now I just consider that we should store the fancy pointer 
coming from the allocator::allocate calls as-is and return it to the 
allocator when needed without the pointer_traits::pointer_to as we used 
to do. This should preserve any additional data the allocator might 
associate to the raw pointer in the allocator.


    Even if the Standard is saying we should extend the fancy pointer 
usage this patch is still a good 1st step which is unavoidable to 
complete the potential final picture. We could still provide this for 
now and see if users have complains about it.


    This patch is implementing a small refinement by using fancy 
pointer move semantic in a couple of situations. I see that node_handle 
is not doing this but I consider it as a potential node handle enhancement.


    I am completing execution of tests but unordered ones are OK for 
both normal and debug modes.


libstdc++: Store allocator::pointer in hashtable implementation

    In _Hashtable implementation store the allocator::pointer returned 
by the allocate
    call as-is and return it on the deallocate when necessary. This is 
true for both

    allocate nodes and buckets.

    Note that internnally, as an implementation detail, we are still 
using raw pointers

    in iterators and buckets.

    libstdc++-v3/ChangeLog:

    * include/bits/hashtable_policy.h
    (__alloc_val_ptr<>): New template alias.
    (_ReuseOrAllocNode<>::__node_type): Remove.
    (_ReuseOrAllocNode<>::__node_ptr): New.
(_ReuseOrAllocNode<>::operator()(_Arg&&)): Return latter.
    (_ReuseOrAllocNode(__node_ptr, __hashtable_alloc&)): Adapt 
to use latter.

    (_ReuseOrAllocNode(_Hash_node_base*, __hashtable_alloc&)): New.
    (_AllocNode<>::__node_type): Remove.
    (_AllocNode<>::__node_ptr): New.
(_AllocNode<>::operator()<>(_Arg&&)): Return latter.
    (_Hash_pnode_base<>): New.
    (_Hash_node<>::__node_base): New.
    (_Hash_node<>::__node_ptr): New.
    (_Hash_node<>::__node_type): New.
    (_Hash_node<>::__node_value_cache_type): New.
    (_Hash_node<>::_M_next_ptr()): New.
    (_Hash_pnode): New.
    (__get_node_type<>): New, template alias to _Hash_node<> if 
allocator pointer

    type is a raw pointer, _Hash_pnode<> otherwise..
    (_Hashtable_iterator_base): New.
    (_Node_iterator_base<>): Inherits from latter.
    (_Hashtable_iterator__constant_iterators>):

    New.
    (_Hashtable_const_iterator__constant_iterators>):

    New.
    (_Insert_base<>::__alloc_ptr): New.
    (_Insert_base<>::__hashtable_alloc): Remove.
    (_Insert_base<>::__node_type): New.
    (_Insert_base<>::iterator): Define conditionally to 
_Node_iterator<>
    or _Hashtable_iterator<> depending on __alloc_ptr being a 
raw pointer.

    (_Insert_base<>::const_iterator): Define conditionally to
    _Node_const_iterator<> or _Hashtable_const_iterator<> 
depending on

    __alloc_ptr being a raw pointer.
    (_Hashtable_local_iter_base<>): New.
    (_Hashtable_local_iterator<>): New.
    (_Hashtable_const_local_iterator<>): New.
    (__local_iterator<>): New template alias.
    (__const_local_iterator<>): New template alias.
    (_Hashtable_base<>::_M_equals(const _Key&, __hash_code,
    const _Hash_node_cache_value<>&): New.
    (_Hashtable_base<>::_M_node_equals(const 
_Hash_node_cache_value<>&,

    const _Hash_node_cache_value<>&)): New.
    (_Hashtable_alloc<>::__value_alloc_traits): Remove.
    (_Hashtable_alloc<>::__node_base_ptr): Remove.
    * include/bits/hashtable.h (_Hashtable<>): Adapt.
    * 
testsuite/23_containers/unordered_map/allocator/ext_ptr.cc: New test.
    * 
testsuite/23_containers/unordered_multimap/allocator/ext_ptr.cc:

    New test.
    * 
testsuite/23_containers/unordered_multiset/allocator/ext_ptr.cc:

    New test.
    * 
testsuite/23_containers/unordered_set/allocator/ext_ptr.cc: Adapt.


Ok to commit ? (even if in a few months)

François


On 02/11/20 3:11 pm, Jonathan Wakely wrote:

On 01/11/20 22:48 +0100, François Dumont via Libstdc++ wrote:

Here is an other attempt.

This time I am storing the node using allocator pointer just in the 
singly linked list of nodes. Buckets are still __node_base* so that 
the

[pushed] aarch64: Add support for unpacked SVE ASRD

2021-01-11 Thread Richard Sandiford via Gcc-patches
This patch adds support for both conditional and unconditional unpacked
ASRD.  This meant adding a new define_insn for the unconditional form,
instead of reusing the conditional instructions.  It also meant
extending the current conditional patterns to support merging with
any independent value, not just zero.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
* config/aarch64/aarch64-sve.md (sdiv_pow23): Extend from
SVE_FULL_I to SVE_I.  Generate an UNSPEC_PRED_X.
(*sdiv_pow23): New pattern.
(@cond_): Extend from SVE_FULL_I to SVE_I.
Wrap the ASRD in an UNSPEC_PRED_X.
(*cond__2): Likewise.  Replace the UNSPEC_PRED_X
predicate with a constant PTRUE, if it isn't already.
(*cond__z): Replace with...
(*cond__any): ...this new pattern.

gcc/testsuite/
* gcc.target/aarch64/sve/asrdiv_4.c: New test.
* gcc.target/aarch64/sve/cond_asrd_1.c: Likewise.
* gcc.target/aarch64/sve/cond_asrd_1_run.c: Likewise.
* gcc.target/aarch64/sve/cond_asrd_2.c: Likewise.
* gcc.target/aarch64/sve/cond_asrd_2_run.c: Likewise.
* gcc.target/aarch64/sve/cond_asrd_3.c: Likewise.
* gcc.target/aarch64/sve/cond_asrd_3_run.c: Likewise.
---
 gcc/config/aarch64/aarch64-sve.md | 119 --
 .../gcc.target/aarch64/sve/asrdiv_4.c |  29 +
 .../gcc.target/aarch64/sve/cond_asrd_1.c  |  32 +
 .../gcc.target/aarch64/sve/cond_asrd_1_run.c  |  26 
 .../gcc.target/aarch64/sve/cond_asrd_2.c  |  35 ++
 .../gcc.target/aarch64/sve/cond_asrd_2_run.c  |  26 
 .../gcc.target/aarch64/sve/cond_asrd_3.c  |  35 ++
 .../gcc.target/aarch64/sve/cond_asrd_3_run.c  |  26 
 8 files changed, 290 insertions(+), 38 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/asrdiv_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1_run.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_2_run.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_3_run.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index b8259f24b3d..a6f8450f951 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4724,68 +4724,111 @@ (define_insn "*cond__z"
 ;; - URSHR (SVE2)
 ;; -
 
-;; Unpredicated .
+;; Unpredicated ASRD.
 (define_expand "sdiv_pow23"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+   (unspec:SVE_I
  [(match_dup 3)
-  (unspec:SVE_FULL_I
-[(match_operand:SVE_FULL_I 1 "register_operand")
+  (unspec:SVE_I
+[(match_operand:SVE_I 1 "register_operand")
  (match_operand 2 "aarch64_simd_rshift_imm")]
-UNSPEC_ASRD)
-  (match_dup 1)]
-UNSPEC_SEL))]
+UNSPEC_ASRD)]
+UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
 operands[3] = aarch64_ptrue_reg (mode);
   }
 )
 
-;; Predicated right shift with merging.
+;; Predicated ASRD.
+(define_insn "*sdiv_pow23"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+   (unspec:SVE_I
+ [(match_operand: 1 "register_operand" "Upl, Upl")
+  (unspec:SVE_I
+[(match_operand:SVE_I 2 "register_operand" "0, w")
+ (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
+UNSPEC_ASRD)]
+ UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  "@
+   asrd\t%0., %1/m, %0., #%3
+   movprfx\t%0, %2\;asrd\t%0., %1/m, %0., #%3"
+  [(set_attr "movprfx" "*,yes")])
+
+;; Predicated shift with merging.
 (define_expand "@cond_"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand")
-  (unspec:SVE_FULL_I
-[(match_operand:SVE_FULL_I 2 "register_operand")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_shift_imm")]
-SVE_INT_SHIFT_IMM)
-  (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
+  (unspec:SVE_I
+[(match_dup 5)
+ (unspec:SVE_I
+   [(match_operand:SVE_I 2 "register_operand")
+(match_operand:SVE_I 3 "aarch64_simd_shift_imm")]
+   SVE_INT_SHIFT_IMM)]
+UNSPEC_PRED_X)
+  (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
  UNSPEC_SEL))]
   "TARGET_SVE"
+  {
+operands[5] = aarch64_ptrue_reg (mode);
+  }
 )
 
-;; Predicated right shift, merging with the first input.
-(define_insn "*cond__2"
-  [(set

[pushed] aarch64: Add support for unpacked SVE conditional BIC

2021-01-11 Thread Richard Sandiford via Gcc-patches
This patch adds support for unpacked conditional BIC.  The type suffix
could be taken from the element size or the container size, so the
patch continues to use the element size.  This is consistent with
the existing support for unconditional BIC.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
* config/aarch64/aarch64-sve.md (*cond_bic_2): Extend from
SVE_FULL_I to SVE_I.
(*cond_bic_any): Likewise.

gcc/testsuite/
* g++.target/aarch64/sve/cond_bic_1.C: New test.
* g++.target/aarch64/sve/cond_bic_2.C: Likewise.
* g++.target/aarch64/sve/cond_bic_3.C: Likewise.
* g++.target/aarch64/sve/cond_bic_4.C: Likewise.
---
 gcc/config/aarch64/aarch64-sve.md | 26 ++--
 .../g++.target/aarch64/sve/cond_bic_1.C   | 40 +++
 .../g++.target/aarch64/sve/cond_bic_2.C   | 31 ++
 .../g++.target/aarch64/sve/cond_bic_3.C   | 36 +
 .../g++.target/aarch64/sve/cond_bic_4.C   | 36 +
 5 files changed, 156 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_bic_1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_bic_2.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_bic_3.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_bic_4.C

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 2e6128e6032..b8259f24b3d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4475,13 +4475,13 @@ (define_expand "@cond_bic"
 
 ;; Predicated integer BIC, merging with the first input.
 (define_insn "*cond_bic_2"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand" "Upl, Upl")
-  (and:SVE_FULL_I
-(not:SVE_FULL_I
-  (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
-(match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
+  (and:SVE_I
+(not:SVE_I
+  (match_operand:SVE_I 3 "register_operand" "w, w"))
+(match_operand:SVE_I 2 "register_operand" "0, w"))
   (match_dup 2)]
  UNSPEC_SEL))]
   "TARGET_SVE"
@@ -4493,14 +4493,14 @@ (define_insn "*cond_bic_2"
 
 ;; Predicated integer BIC, merging with an independent value.
 (define_insn_and_rewrite "*cond_bic_any"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, ?&w")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl")
-  (and:SVE_FULL_I
-(not:SVE_FULL_I
-  (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w"))
-(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w"))
-  (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 
w")]
+  (and:SVE_I
+(not:SVE_I
+  (match_operand:SVE_I 3 "register_operand" "w, w, w, w"))
+(match_operand:SVE_I 2 "register_operand" "0, w, w, w"))
+  (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
  UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   "@
diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_bic_1.C 
b/gcc/testsuite/g++.target/aarch64/sve/cond_bic_1.C
new file mode 100644
index 000..9f7cd75fd08
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/cond_bic_1.C
@@ -0,0 +1,40 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include 
+
+#define TEST_OP(TYPE) \
+  TYPE \
+  test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \
+  { \
+return c == 0 ? a & ~b : a; \
+  }
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, 
\[x0\]\n[^L]*\tbic\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, 
\[x0\]\n[^L]*\tbic\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, 
\[x0\]\n[^L]*\tbic\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, 
\[x0\]\n[^L]*\tbic\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, 
\[x0\]\n[^L]*\tbic\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7

[pushed] aarch64: Add support for unpacked SVE MULH

2021-01-11 Thread Richard Sandiford via Gcc-patches
This patch extends the SMULH and UMULH support to unpacked vectors.
The type suffix must be taken from the element size rather than the
container size.

The main use of these patterns is to support division and modulus
by a constant.  The conditional forms would be hard to trigger from
non-ACLE code, and ACLE code needs fully-packed vectors only.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
* config/aarch64/aarch64-sve.md (mul3_highpart)
(@aarch64_pred_): Extend from SVE_FULL_I
to SVE_I.

gcc/testsuite/
* gcc.target/aarch64/sve/mul_highpart_3.c: New test.
---
 gcc/config/aarch64/aarch64-sve.md | 20 +--
 .../gcc.target/aarch64/sve/mul_highpart_3.c   | 34 +++
 2 files changed, 44 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_3.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 8083749a07e..2e6128e6032 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4192,12 +4192,12 @@ (define_insn "@aarch64_sve_"
 
 ;; Unpredicated highpart multiplication.
 (define_expand "mul3_highpart"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+   (unspec:SVE_I
  [(match_dup 3)
-  (unspec:SVE_FULL_I
-[(match_operand:SVE_FULL_I 1 "register_operand")
- (match_operand:SVE_FULL_I 2 "register_operand")]
+  (unspec:SVE_I
+[(match_operand:SVE_I 1 "register_operand")
+ (match_operand:SVE_I 2 "register_operand")]
 MUL_HIGHPART)]
  UNSPEC_PRED_X))]
   "TARGET_SVE"
@@ -4208,12 +4208,12 @@ (define_expand "mul3_highpart"
 
 ;; Predicated highpart multiplication.
 (define_insn "@aarch64_pred_"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand" "Upl, Upl")
-  (unspec:SVE_FULL_I
-[(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
- (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+  (unspec:SVE_I
+[(match_operand:SVE_I 2 "register_operand" "%0, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w")]
 MUL_HIGHPART)]
  UNSPEC_PRED_X))]
   "TARGET_SVE"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_3.c
new file mode 100644
index 000..3aa6575e4ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_3.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include 
+
+#define TEST_OP(TYPE) \
+  TYPE test##_##TYPE##_reg (TYPE a, TYPE b) { return a % 17; }
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (int8_t, 32)
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (int8_t, 64)
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (int16_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+TEST_TYPE (int8_t, 128)
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (int16_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (int32_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.b, p[0-7]/m, 
z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.b, p[0-7]/m, 
z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */


[pushed] aarch64: Add support for unpacked SVE ABD

2021-01-11 Thread Richard Sandiford via Gcc-patches
This patch adds support for unpacked SVE SABD and UABD.
It also rewrites the patterns so that they match as combine
patterns without the need for REG_EQUAL notes.  Finally,
there was no pattern for merging with the second input,
which can be handled by reversing the operands.

The type suffix needs to be taken from the element size rather
than the container size.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
* config/aarch64/aarch64-sve.md (abd_3): Extend from
SVE_FULL_I to SVE_I.
(*aarch64_cond_abd_2): Likewise.
(*aarch64_cond_abd_any): Likewise.
(@aarch64_pred_abd): Likewise.  Use UNSPEC_PRED_X
for the max and min but not for the minus.
(*aarch64_cond_abd_3): New pattern.

gcc/testsuite/
* g++.target/aarch64/sve/abd_1.C: New test.
* g++.target/aarch64/sve/cond_abd_1.C: Likewise.
* g++.target/aarch64/sve/cond_abd_2.C: Likewise.
* g++.target/aarch64/sve/cond_abd_3.C: Likewise.
* g++.target/aarch64/sve/cond_abd_4.C: Likewise.
---
 gcc/config/aarch64/aarch64-sve.md | 100 --
 gcc/testsuite/g++.target/aarch64/sve/abd_1.C  |  38 +++
 .../g++.target/aarch64/sve/cond_abd_1.C   |  60 +++
 .../g++.target/aarch64/sve/cond_abd_2.C   |  60 +++
 .../g++.target/aarch64/sve/cond_abd_3.C   |  49 +
 .../g++.target/aarch64/sve/cond_abd_4.C   |  43 
 6 files changed, 317 insertions(+), 33 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/abd_1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_abd_1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_abd_2.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_abd_3.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_abd_4.C

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 27b7fd0fccd..8083749a07e 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3973,10 +3973,10 @@ (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
 
 ;; Unpredicated integer absolute difference.
 (define_expand "abd_3"
-  [(use (match_operand:SVE_FULL_I 0 "register_operand"))
-   (USMAX:SVE_FULL_I
- (match_operand:SVE_FULL_I 1 "register_operand")
- (match_operand:SVE_FULL_I 2 "register_operand"))]
+  [(use (match_operand:SVE_I 0 "register_operand"))
+   (USMAX:SVE_I
+ (match_operand:SVE_I 1 "register_operand")
+ (match_operand:SVE_I 2 "register_operand"))]
   "TARGET_SVE"
   {
 rtx pred = aarch64_ptrue_reg (mode);
@@ -3988,17 +3988,20 @@ (define_expand "abd_3"
 
 ;; Predicated integer absolute difference.
 (define_insn "@aarch64_pred_abd"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-   (unspec:SVE_FULL_I
- [(match_operand: 1 "register_operand" "Upl, Upl")
-  (minus:SVE_FULL_I
-(USMAX:SVE_FULL_I
-  (match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
-  (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
-(:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+   (minus:SVE_I
+ (unspec:SVE_I
+   [(match_operand: 1 "register_operand" "Upl, Upl")
+(USMAX:SVE_I
+  (match_operand:SVE_I 2 "register_operand" "%0, w")
+  (match_operand:SVE_I 3 "register_operand" "w, w"))]
+   UNSPEC_PRED_X)
+ (unspec:SVE_I
+   [(match_dup 1)
+(:SVE_I
   (match_dup 2)
-  (match_dup 3)))]
- UNSPEC_PRED_X))]
+  (match_dup 3))]
+   UNSPEC_PRED_X)))]
   "TARGET_SVE"
   "@
abd\t%0., %1/m, %0., %3.
@@ -4033,19 +4036,19 @@ (define_expand "@aarch64_cond_abd"
 
 ;; Predicated integer absolute difference, merging with the first input.
 (define_insn_and_rewrite "*aarch64_cond_abd_2"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand" "Upl, Upl")
-  (minus:SVE_FULL_I
-(unspec:SVE_FULL_I
+  (minus:SVE_I
+(unspec:SVE_I
   [(match_operand 4)
-   (USMAX:SVE_FULL_I
- (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
- (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
+   (USMAX:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "0, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w"))]
   UNSPEC_PRED_X)
-(unspec:SVE_FULL_I
+(unspec:SVE_I
   [(match_operand 5)
-   (:SVE_FULL_I
+   (:SVE_I
  (match_dup 2)
  (match_dup 3))]
   UNSPEC_PRED_X))
@@ -4062,25 +4065,56 @@ (define_insn_an

[pushed] aarch64: Add support for unpacked SVE ADR

2021-01-11 Thread Richard Sandiford via Gcc-patches
This patch extends the ADR patterns to handle unpacked vectors.
They would work with both elements and containers, but since
the instructions only support .s and .d, we get more coverage
by using containers.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
* config/aarch64/iterators.md (SVE_24I): New iterator.
* config/aarch64/aarch64-sve.md (*aarch64_adr_shift): Extend from
SVE_FULL_SDI to SVE_24I.  Use containers rather than elements.

gcc/testsuite/
* gcc.target/aarch64/sve/adr_6.c: New test.
---
 gcc/config/aarch64/aarch64-sve.md| 16 
 gcc/config/aarch64/iterators.md  |  4 ++
 gcc/testsuite/gcc.target/aarch64/sve/adr_6.c | 43 
 3 files changed, 55 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/adr_6.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 547f34134a1..27b7fd0fccd 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3901,17 +3901,17 @@ (define_expand "@aarch64_adr_shift"
 )
 
 (define_insn_and_rewrite "*aarch64_adr_shift"
-  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
-   (plus:SVE_FULL_SDI
- (unspec:SVE_FULL_SDI
+  [(set (match_operand:SVE_24I 0 "register_operand" "=w")
+   (plus:SVE_24I
+ (unspec:SVE_24I
[(match_operand 4)
-(ashift:SVE_FULL_SDI
-  (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
-  (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
+(ashift:SVE_24I
+  (match_operand:SVE_24I 2 "register_operand" "w")
+  (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
UNSPEC_PRED_X)
- (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))]
+ (match_operand:SVE_24I 1 "register_operand" "w")))]
   "TARGET_SVE"
-  "adr\t%0., [%1., %2., lsl %3]"
+  "adr\t%0., [%1., %2., lsl %3]"
   "&& !CONSTANT_P (operands[4])"
   {
 operands[4] = CONSTM1_RTX (mode);
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 5a82d9395f9..54a99d441b8 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -422,6 +422,10 @@ (define_mode_iterator SVE_24 [VNx2QI VNx2HI VNx2HF VNx2BF 
VNx2SI VNx2SF
  VNx2DI VNx2DF
  VNx4QI VNx4HI VNx4HF VNx4BF VNx4SI VNx4SF])
 
+;; SVE integer modes with 2 or 4 elements.
+(define_mode_iterator SVE_24I [VNx2QI VNx2HI VNx2SI VNx2DI
+  VNx4QI VNx4HI VNx4SI])
+
 ;; SVE modes with 2 elements.
 (define_mode_iterator SVE_2 [VNx2QI VNx2HI VNx2HF VNx2BF
 VNx2SI VNx2SF VNx2DI VNx2DF])
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_6.c 
b/gcc/testsuite/gcc.target/aarch64/sve/adr_6.c
new file mode 100644
index 000..1f927493a8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_6.c
@@ -0,0 +1,43 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include 
+
+#define TEST_OP_IMM(TYPE, AMT) \
+  TYPE test##_##TYPE##_##AMT (TYPE a, TYPE b) { return a + b * AMT; }
+
+#define TEST_OP(TYPE) \
+  TEST_OP_IMM (TYPE, 2) \
+  TEST_OP_IMM (TYPE, 4) \
+  TEST_OP_IMM (TYPE, 8)
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (int8_t, 32)
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (int8_t, 64)
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (int16_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+/* These two can't use ADR.  */
+TEST_TYPE (int8_t, 128)
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (int16_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (int32_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b,} 6 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b,} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]+\.s, \[z[0-9]+\.s, 
z[0-9]+\.s, lsl #?1\]\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]+\.s, \[z[0-9]+\.s, 
z[0-9]+\.s, lsl #?2\]\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]+\.s, \[z[0-9]+\.s, 
z[0-9]+\.s, lsl #?3\]\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]+\.d, \[z[0-9]+\.d, 
z[0-9]+\.d, lsl #?1\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]+\.d, \[z[0-9]+\.d, 
z[0-9]+\.d, lsl #?2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]+\.d, \[z[0-9]+\.d, 
z[0-9]+\.d, lsl #?3\]\n} 6 } } */


[pushed] aarch64: Add general unpacked SVE conditional binary arithmetic

2021-01-11 Thread Richard Sandiford via Gcc-patches
This patch adds support for conditional binary ADD, SUB, MUL, SMAX,
UMAX, SMIN, UMIN, LSL, LSR, ASR, AND, ORR and EOR.  It's not really
possible to split it up further given how the patterns are written.

Min, max and right-shift need the element size rather than the container
size.  The others would work with both, although MUL should be more
efficient when applied to elements instead of containers.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
* config/aarch64/aarch64-sve.md (@cond_)
(*cond__2): Extend from SVE_FULL_I
to SVE_I.
(*cond__3): Likewise.
(*cond__any): Likewise.
(*cond__2_const): Likewise.
(*cond__any_const): Likewise.

gcc/testsuite/
* g++.target/aarch64/sve/cond_arith_1.C: New test.
* g++.target/aarch64/sve/cond_arith_2.C: Likewise.
* g++.target/aarch64/sve/cond_arith_3.C: Likewise.
* g++.target/aarch64/sve/cond_arith_4.C: Likewise.
* g++.target/aarch64/sve/cond_shift_1.C: New test.
* g++.target/aarch64/sve/cond_shift_2.C: Likewise.
* g++.target/aarch64/sve/cond_shift_3.C: Likewise.
* g++.target/aarch64/sve/cond_shift_4.C: Likewise.
---
 gcc/config/aarch64/aarch64-sve.md | 66 ++---
 .../g++.target/aarch64/sve/cond_arith_1.C | 89 ++
 .../g++.target/aarch64/sve/cond_arith_2.C | 89 ++
 .../g++.target/aarch64/sve/cond_arith_3.C | 91 ++
 .../g++.target/aarch64/sve/cond_arith_4.C | 91 ++
 .../g++.target/aarch64/sve/cond_shift_1.C | 92 ++
 .../g++.target/aarch64/sve/cond_shift_2.C | 91 ++
 .../g++.target/aarch64/sve/cond_shift_3.C | 94 +++
 .../g++.target/aarch64/sve/cond_shift_4.C | 94 +++
 9 files changed, 764 insertions(+), 33 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_shift_1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_shift_2.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_shift_3.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_shift_4.C

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 697a55e1cad..547f34134a1 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3687,25 +3687,25 @@ (define_insn "*post_ra_3"
 
 ;; Predicated integer operations with merging.
 (define_expand "@cond_"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand")
-  (SVE_INT_BINARY:SVE_FULL_I
-(match_operand:SVE_FULL_I 2 "register_operand")
-(match_operand:SVE_FULL_I 3 ""))
-  (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
+  (SVE_INT_BINARY:SVE_I
+(match_operand:SVE_I 2 "register_operand")
+(match_operand:SVE_I 3 ""))
+  (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
  UNSPEC_SEL))]
   "TARGET_SVE"
 )
 
 ;; Predicated integer operations, merging with the first input.
 (define_insn "*cond__2"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand" "Upl, Upl")
-  (SVE_INT_BINARY:SVE_FULL_I
-(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
-(match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
+  (SVE_INT_BINARY:SVE_I
+(match_operand:SVE_I 2 "register_operand" "0, w")
+(match_operand:SVE_I 3 "register_operand" "w, w"))
   (match_dup 2)]
  UNSPEC_SEL))]
   "TARGET_SVE"
@@ -3717,12 +3717,12 @@ (define_insn "*cond__2"
 
 ;; Predicated integer operations, merging with the second input.
 (define_insn "*cond__3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand" "Upl, Upl")
-  (SVE_INT_BINARY:SVE_FULL_I
-(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
-(match_operand:SVE_FULL_I 3 "register_operand" "0, w"))
+  (SVE_INT_BINARY:SVE_I
+(match_operand:SVE_I 2 "register_operand" "w, w")
+(match_operand:SVE_I 3 "register_operand" "0, w"))
   (match_dup 3)]
  UNSPEC_SEL))]
   "TARGET_SVE"
@@ -3734,13 +3

[pushed] aarch64: Add support for unpacked SVE mult, max and min

2021-01-11 Thread Richard Sandiford via Gcc-patches
This patch makes the SVE_INT_BINARY_IMM patterns support
unpacked arithmetic, covering MUL, SMAX, SMIN, UMAX and UMIN.
For min and max, the type suffix must be taken from the element
size rather than the container size.

The XFAILs are due to PR98602.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
* config/aarch64/aarch64-sve.md (3)
(@aarch64_pred_)
(*post_ra_3): Extend from SVE_FULL_I
to SVE_I.

gcc/testsuite/
PR testsuite/98602
* g++.target/aarch64/sve/max_1.C: New test.
* g++.target/aarch64/sve/min_1.C: Likewise.
* gcc.target/aarch64/sve/mul_2.c: Likewise.
---
 gcc/config/aarch64/aarch64-sve.md| 30 
 gcc/testsuite/g++.target/aarch64/sve/max_1.C | 73 
 gcc/testsuite/g++.target/aarch64/sve/min_1.C | 73 
 gcc/testsuite/gcc.target/aarch64/sve/mul_2.c | 52 ++
 4 files changed, 213 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/max_1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/min_1.C
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mul_2.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index a58324da869..697a55e1cad 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3628,12 +3628,12 @@ (define_insn "*one_cmpl3"
 
 ;; Unpredicated integer binary operations that have an immediate form.
 (define_expand "3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+   (unspec:SVE_I
  [(match_dup 3)
-  (SVE_INT_BINARY_IMM:SVE_FULL_I
-(match_operand:SVE_FULL_I 1 "register_operand")
-(match_operand:SVE_FULL_I 2 "aarch64_sve__operand"))]
+  (SVE_INT_BINARY_IMM:SVE_I
+(match_operand:SVE_I 1 "register_operand")
+(match_operand:SVE_I 2 "aarch64_sve__operand"))]
  UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
@@ -3647,12 +3647,12 @@ (define_expand "3"
 ;; and would make the instruction seem less uniform to the register
 ;; allocator.
 (define_insn_and_split "@aarch64_pred_"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl")
-  (SVE_INT_BINARY_IMM:SVE_FULL_I
-(match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w")
-(match_operand:SVE_FULL_I 3 "aarch64_sve__operand" 
", w, , w"))]
+  (SVE_INT_BINARY_IMM:SVE_I
+(match_operand:SVE_I 2 "register_operand" "%0, 0, w, w")
+(match_operand:SVE_I 3 "aarch64_sve__operand" 
", w, , w"))]
  UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
@@ -3665,7 +3665,7 @@ (define_insn_and_split "@aarch64_pred_"
   "&& reload_completed
&& !register_operand (operands[3], mode)"
   [(set (match_dup 0)
-   (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))]
+   (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
   ""
   [(set_attr "movprfx" "*,*,yes,yes")]
 )
@@ -3674,10 +3674,10 @@ (define_insn_and_split "@aarch64_pred_"
 ;; These are generated by splitting a predicated instruction whose
 ;; predicate is unused.
 (define_insn "*post_ra_3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-   (SVE_INT_BINARY_IMM:SVE_FULL_I
- (match_operand:SVE_FULL_I 1 "register_operand" "0, w")
- (match_operand:SVE_FULL_I 2 "aarch64_sve__immediate")))]
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+   (SVE_INT_BINARY_IMM:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "0, w")
+ (match_operand:SVE_I 2 "aarch64_sve__immediate")))]
   "TARGET_SVE && reload_completed"
   "@
\t%0., %0., #%2
diff --git a/gcc/testsuite/g++.target/aarch64/sve/max_1.C 
b/gcc/testsuite/g++.target/aarch64/sve/max_1.C
new file mode 100644
index 000..caf9d7cd9bb
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/max_1.C
@@ -0,0 +1,73 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include 
+
+#define TEST_OP_IMM(TYPE, OP, NAME, AMT) \
+  TYPE test##_##TYPE##_##NAME (TYPE a) { return a > AMT ? a : AMT; }
+
+#define TEST_OP(TYPE, MINV, MAXV) \
+  TYPE test##_##TYPE##_reg (TYPE a, TYPE b) { return a > b ? a : b; } \
+  TEST_OP_IMM (TYPE, OP, a, MINV) \
+  TEST_OP_IMM (TYPE, OP, b, 50) \
+  TEST_OP_IMM (TYPE, OP, c, MAXV)
+
+#define TEST_TYPE(TYPE, SIZE, MINV, MAXV) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE, MINV, MAXV)
+
+TEST_TYPE (int8_t, 32, -100, 100)
+TEST_TYPE (uint8_t, 32, 2, 250)
+
+TEST_TYPE (int8_t, 64, -110, 110)
+TEST_TYPE (uint8_t, 64, 3, 253)
+TEST_TYPE (int

[pushed] aarch64: Add support for unpacked SVE shifts

2021-01-11 Thread Richard Sandiford via Gcc-patches
This patch adds support for unpacked SVE LSL, ASR and LSR.
For right shifts, the type suffix needs to be taken from the
element size rather than the container size.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
* config/aarch64/aarch64-sve.md (3)
(v3, @aarch64_pred_)
(*post_ra_v3): Extend from SVE_FULL_I to SVE_I.

gcc/testsuite/
* gcc.target/aarch64/sve/shift_2.c: New test.
---
 gcc/config/aarch64/aarch64-sve.md | 36 -
 .../gcc.target/aarch64/sve/shift_2.c  | 81 +++
 2 files changed, 99 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/shift_2.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 2f5a5e3c914..a58324da869 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4500,9 +4500,9 @@ (define_insn_and_rewrite "*cond_bic_any"
 ;; Unpredicated shift by a scalar, which expands into one of the vector
 ;; shifts below.
 (define_expand "3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-   (ASHIFT:SVE_FULL_I
- (match_operand:SVE_FULL_I 1 "register_operand")
+  [(set (match_operand:SVE_I 0 "register_operand")
+   (ASHIFT:SVE_I
+ (match_operand:SVE_I 1 "register_operand")
  (match_operand: 2 "general_operand")))]
   "TARGET_SVE"
   {
@@ -4527,12 +4527,12 @@ (define_expand "3"
 
 ;; Unpredicated shift by a vector.
 (define_expand "v3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+   (unspec:SVE_I
  [(match_dup 3)
-  (ASHIFT:SVE_FULL_I
-(match_operand:SVE_FULL_I 1 "register_operand")
-(match_operand:SVE_FULL_I 2 "aarch64_sve_shift_operand"))]
+  (ASHIFT:SVE_I
+(match_operand:SVE_I 1 "register_operand")
+(match_operand:SVE_I 2 "aarch64_sve_shift_operand"))]
  UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
@@ -4545,12 +4545,12 @@ (define_expand "v3"
 ;; likely to gain much and would make the instruction seem less uniform
 ;; to the register allocator.
 (define_insn_and_split "@aarch64_pred_"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl")
-  (ASHIFT:SVE_FULL_I
-(match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")
-(match_operand:SVE_FULL_I 3 "aarch64_sve_shift_operand" 
"D, w, 0, w"))]
+  (ASHIFT:SVE_I
+(match_operand:SVE_I 2 "register_operand" "w, 0, w, w")
+(match_operand:SVE_I 3 "aarch64_sve_shift_operand" "D, w, 
0, w"))]
  UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
@@ -4560,7 +4560,7 @@ (define_insn_and_split "@aarch64_pred_"
movprfx\t%0, %2\;\t%0., %1/m, %0., %3."
   "&& reload_completed
&& !register_operand (operands[3], mode)"
-  [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))]
+  [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
   ""
   [(set_attr "movprfx" "*,*,*,yes")]
 )
@@ -4569,10 +4569,10 @@ (define_insn_and_split "@aarch64_pred_"
 ;; These are generated by splitting a predicated instruction whose
 ;; predicate is unused.
 (define_insn "*post_ra_v3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
-   (ASHIFT:SVE_FULL_I
- (match_operand:SVE_FULL_I 1 "register_operand" "w")
- (match_operand:SVE_FULL_I 2 "aarch64_simd_shift_imm")))]
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+   (ASHIFT:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "w")
+ (match_operand:SVE_I 2 "aarch64_simd_shift_imm")))]
   "TARGET_SVE && reload_completed"
   "\t%0., %1., #%2"
 )
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/shift_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/shift_2.c
new file mode 100644
index 000..b7462c47db9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/shift_2.c
@@ -0,0 +1,81 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include 
+
+#define TEST_SHIFT_IMM(TYPE, NAME, OP, AMT) \
+  TYPE NAME##_##TYPE##_##AMT (TYPE a) { return a OP AMT; }
+
+#define TEST_SHIFT(TYPE, NAME, OP, LIMIT) \
+  TYPE NAME##_##TYPE##_reg (TYPE a, TYPE b) { return a OP b; } \
+  TEST_SHIFT_IMM (TYPE, NAME, OP, 1) \
+  TEST_SHIFT_IMM (TYPE, NAME, OP, 5) \
+  TEST_SHIFT_IMM (TYPE, NAME, OP, LIMIT)
+
+#define TEST_TYPE(TYPE, SIZE, LIMIT) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_SHIFT (TYPE##SIZE, shl, <<, LIMIT) \
+  TEST_SHIFT (TYPE##SIZE, shr, >>, LIMIT) \
+
+TEST_TYPE (int8_t, 32, 7)
+TEST_TYPE (uint8_t, 32, 7)
+
+TEST_TYPE (int8_t, 64, 7)
+TEST_TYPE (uint8_t, 64, 7)
+TEST_TYPE (in

Re: [PATCH] Properly release symtab::m_clones.

2021-01-11 Thread Martin Liška

On 1/11/21 5:55 PM, Jan Hubicka wrote:

The patch is about not using delete for a memory that
is allocated by GGC.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

PR jit/98615
* symtab-clones.h (clone_info::release): Release
symtab::m_clones with ggc_delete as it's a GGC memory.


OK, so this does not cause regular crashes because it is run only with
JIT? Sorry for that.


Yep :)



I was thinking of making the finalizers to be run also with checking
enabled - that should catch such errors soner.


Be my quest. May I install the suggested patch after testing?

Thanks,
Martin



Honza

---
  gcc/symtab-clones.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/symtab-clones.h b/gcc/symtab-clones.h
index cdb19cb6c8c..5695a434f6a 100644
--- a/gcc/symtab-clones.h
+++ b/gcc/symtab-clones.h
@@ -83,7 +83,7 @@ inline void
  clone_info::release ()
  {
if (symtab->m_clones)
-delete (symtab->m_clones);
+ggc_delete (symtab->m_clones);
symtab->m_clones = NULL;
  }
--
2.29.2



>From 8e9e59c2636f1802951a983c603af15fd46c5106 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Mon, 11 Jan 2021 18:12:54 +0100
Subject: [PATCH] Call toplev::finalize in CHECKING_P mode.

gcc/ChangeLog:

	PR jit/98615
	* main.c (main): Call toplev::finalize in CHECKING_P mode.
---
 gcc/main.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/main.c b/gcc/main.c
index ab0244b1851..093e63954f8 100644
--- a/gcc/main.c
+++ b/gcc/main.c
@@ -36,5 +36,10 @@ main (int argc, char **argv)
   toplev toplev (NULL, /* external_timer */
 		 true /* init_signals */);
 
-  return toplev.main (argc, argv);
+  int r = toplev.main (argc, argv);
+#if CHECKING_P
+  toplev.finalize ();
+#endif
+
+  return r;
 }
-- 
2.29.2



Re: [patch] fix -Wformat-diag warnings in rs6000-call.c

2021-01-11 Thread Martin Sebor via Gcc-patches

On 1/11/21 9:30 AM, Matthias Klose wrote:

On 1/10/21 10:18 PM, Martin Sebor wrote:

On 1/10/21 3:29 AM, Matthias Klose wrote:

is the newline intended? It's followed by a debug_rtx call.


To avoid the warning there shouldn't be any trailing punctuation
or whitespace in the message.  The GCC quoting directives should
be preferred over the literal characters (as per GCC Coding
Conventions).  %qc and %qs are preferable to %<%c%>.

Symbols/identifiers should be formatted using the appropriate
directives or quoted in %< %>.  Underscores in words like
emit_insn are taken as indicators that the word is an identifier
and to trigger warnings.


is this?
https://gcc.gnu.org/codingconventions.html#Diagnostics

I think that's a bit terse, and grepping sources for %< shows many more
occurences than %qX.


Agreed, the section could stand to be expanded.  I don't know about
the metrics.  There are lots of % but only a few %<%[cs]%>:

$ grep " %<%[cs]%> " /src/gcc/master/gcc/po/gcc.pot
"cannot apply %<%s%> to %qD, which has also been marked with an OpenMP "
"incompatible %qs clause when applying %<%s%> to %qD, which has already 
been "

"missing %qs clause when applying %<%s%> to %qD, which has already been "
msgid "unbalanced punctuation character %<%c%> in format"
msgid "unterminated quote character %<%c%> in format"
msgid "bad value %<%s%> for %<-mtls-size=%> switch"
msgid "unexpected %<%s%> after %<%s%>"
msgid "invalid argument %<%s%> for %<-mharden-sls=%>"
msgid "invalid argument %<%s%> for %<-mbranch-protection=%>"
msgid "Unexpected %<%c%> for nonderived-type variable %qs at %C"

These should still be changed to %qc and %qs at some point.




../../src/gcc/rtl.c:860:42: warning: unquoted sequence of 2 consecutive
punctuation characters '',' in format [-Wformat-diag]
    860 | ("RTL check: expected elt %d type '%c', have '%c' (rtx %s) in %s, 
at
%s:%d",

`%c', or some %q quoting?


The purpose of the -Wformat-diag warnings is to improve the consistency
of user-visible messages and make them easier to translate.  There was
a discussion some time back about whether internal errors should fall
into this category.  I'm not sure if it reached a conclusion one way
or the other but in similar situations elsewhere in GCC we have
suppressed the warning via #pragma GCC diagnostic.  If it takes too
much effort to clean them up it might make sense to do the same here
(the downside is that it doesn't help translators).  Otherwise,
the messages are not really phrased in a way that's comprehensible
either to users or to tranlators (acronyms like elt or rtx aren't universally
understood).


[...]


again, `' quotes, or some %q option?



The latter: %qs with an argument is best in general (it can reduce
translation effort between repeated messages parameterized on
the quoted string).


if the URL above is the correct place for the conventions, then maybe make it
more explicit there about the preferred choice.


Yes, I can make that change.  My hope is that the warning would
make the preferred choice clear in the followup note so if/where
it doesn't please let me know (or open a bug) and I'll adjust it.
(Otherwise, if there's no warning, there's no preference :)

Martin


[gcc-10 backport] libiberty: Support the new ("v0") mangling scheme in rust-demangle

2021-01-11 Thread Amanieu d'Antras via Gcc-patches
Could this commit be backported to the gcc-10 branch? We would like to
switch the default name mangling scheme in Rust but are currently
blocked on upstream tool support.

commit 84096498a7bd788599d4a7ca63543fc7c297645e
libiberty: Support the new ("v0") mangling scheme in rust-demangle


[PATCH 0/2] Check if AR works with --plugin and rc

2021-01-11 Thread H.J. Lu via Gcc-patches
AR from older binutils doesn't work with --plugin and rc:

[hjl@gnu-cfl-2 bin]$ touch foo.c
[hjl@gnu-cfl-2 bin]$ ar --plugin 
/usr/libexec/gcc/x86_64-redhat-linux/10/liblto_plugin.so rc libfoo.a foo.c
[hjl@gnu-cfl-2 bin]$ ./ar --plugin 
/usr/libexec/gcc/x86_64-redhat-linux/10/liblto_plugin.so rc libfoo.a foo.c
./ar: no operation specified
[hjl@gnu-cfl-2 bin]$ ./ar --version
GNU ar (Linux/GNU Binutils) 2.29.51.0.1.20180112
Copyright (C) 2018 Free Software Foundation, Inc.
This program is free software; you may redistribute it under the terms of
the GNU General Public License version 3 or (at your option) any later version.
This program has absolutely no warranty.
[hjl@gnu-cfl-2 bin]$

Check if AR works with --plugin and rc before passing --plugin to AR and
RANLIB.

Tested:

1. Binutils build
2. ./src-release.sh gdb
3. ./src-release.sh binutils 

with binutils 2.30.

H.J. Lu (2):
  GCC: Check if AR works with --plugin and rc
  Binutils: Check if AR works with --plugin and rc

 bfd/configure|  13 --
 binutils/configure   |  13 --
 config/gcc-plugin.m4 |  12 +
 configure| 102 +++
 gas/configure|  13 --
 gprof/configure  |  13 --
 ld/configure |  13 --
 libctf/configure |  13 --
 libiberty/configure  | 102 +++
 libtool.m4   |   9 +++-
 opcodes/configure|  13 --
 zlib/configure   |  13 --
 12 files changed, 304 insertions(+), 25 deletions(-)

-- 
2.29.2



[PATCH 2/2] Binutils: Check if AR works with --plugin and rc

2021-01-11 Thread H.J. Lu via Gcc-patches
Check if AR works with --plugin and rc before passing --plugin to AR and
RANLIB.

bfd/

PR ld/27173

binutils/

PR ld/27173
* configure: Regenerated.

gas/

PR ld/27173
* configure: Regenerated.

gprof/

PR ld/27173
* configure: Regenerated.

ld/

PR ld/27173
* configure: Regenerated.

libctf/

PR ld/27173
* configure: Regenerated.

opcodes/

PR ld/27173
* configure: Regenerated.
---
 bfd/configure  | 13 ++---
 binutils/configure | 13 ++---
 gas/configure  | 13 ++---
 gprof/configure| 13 ++---
 ld/configure   | 13 ++---
 libctf/configure   | 13 ++---
 opcodes/configure  | 13 ++---
 7 files changed, 70 insertions(+), 21 deletions(-)

diff --git a/bfd/configure b/bfd/configure
index 4b8dede2150..6142cb26f50 100755
--- a/bfd/configure
+++ b/bfd/configure
@@ -6932,7 +6932,14 @@ fi
 test -z "$AR" && AR=ar
 if test -n "$plugin_option"; then
   if $AR --help 2>&1 | grep -q "\--plugin"; then
-AR="$AR $plugin_option"
+touch conftest.c
+$AR $plugin_option rc conftest.a conftest.c
+if test "$?" != 0; then
+  echo "Failed: $AR $plugin_option rc"
+else
+  AR="$AR $plugin_option"
+fi
+rm -f conftest.*
   fi
 fi
 test -z "$AR_FLAGS" && AR_FLAGS=cru
@@ -11742,7 +11749,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11745 "configure"
+#line 11752 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11848,7 +11855,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11851 "configure"
+#line 11858 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/binutils/configure b/binutils/configure
index f1b80648d3f..249aa16c6fc 100755
--- a/binutils/configure
+++ b/binutils/configure
@@ -6728,7 +6728,14 @@ fi
 test -z "$AR" && AR=ar
 if test -n "$plugin_option"; then
   if $AR --help 2>&1 | grep -q "\--plugin"; then
-AR="$AR $plugin_option"
+touch conftest.c
+$AR $plugin_option rc conftest.a conftest.c
+if test "$?" != 0; then
+  echo "Failed: $AR $plugin_option rc"
+else
+  AR="$AR $plugin_option"
+fi
+rm -f conftest.*
   fi
 fi
 test -z "$AR_FLAGS" && AR_FLAGS=cru
@@ -11569,7 +11576,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11572 "configure"
+#line 11579 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11675,7 +11682,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11678 "configure"
+#line 11685 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/gas/configure b/gas/configure
index 43ac48491db..57aef39de28 100755
--- a/gas/configure
+++ b/gas/configure
@@ -6516,7 +6516,14 @@ fi
 test -z "$AR" && AR=ar
 if test -n "$plugin_option"; then
   if $AR --help 2>&1 | grep -q "\--plugin"; then
-AR="$AR $plugin_option"
+touch conftest.c
+$AR $plugin_option rc conftest.a conftest.c
+if test "$?" != 0; then
+  echo "Failed: $AR $plugin_option rc"
+else
+  AR="$AR $plugin_option"
+fi
+rm -f conftest.*
   fi
 fi
 test -z "$AR_FLAGS" && AR_FLAGS=cru
@@ -11357,7 +11364,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11360 "configure"
+#line 11367 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11463,7 +11470,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11466 "configure"
+#line 11473 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/gprof/configure b/gprof/configure
index ce3a1d5bd11..30dc9257368 100755
--- a/gprof/configure
+++ b/gprof/configure
@@ -6363,7 +6363,14 @@ fi
 test -z "$AR" && AR=ar
 if test -n "$plugin_option"; then
   if $AR --help 2>&1 | grep -q "\--plugin"; then
-AR="$AR $plugin_option"
+touch conftest.c
+$AR $plugin_option rc conftest.a conftest.c
+if test "$?" != 0; then
+  echo "Failed: $AR $plugin_option rc"
+else
+  AR="$AR $plugin_option"
+fi
+rm -f conftest.*
   fi
 fi
 test -z "$AR_FLAGS" && AR_FLAGS=cru
@@ -11204,7 +11211,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11207 "configure"
+#line 11214 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11310,7 +11317,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11313 "configure"
+#line 11320 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/ld/configure b/ld/configu

[PATCH 1/2] GCC: Check if AR works with --plugin and rc

2021-01-11 Thread H.J. Lu via Gcc-patches
AR from older binutils doesn't work with --plugin and rc:

[hjl@gnu-cfl-2 bin]$ touch foo.c
[hjl@gnu-cfl-2 bin]$ ar --plugin 
/usr/libexec/gcc/x86_64-redhat-linux/10/liblto_plugin.so rc libfoo.a foo.c
[hjl@gnu-cfl-2 bin]$ ./ar --plugin 
/usr/libexec/gcc/x86_64-redhat-linux/10/liblto_plugin.so rc libfoo.a foo.c
./ar: no operation specified
[hjl@gnu-cfl-2 bin]$ ./ar --version
GNU ar (Linux/GNU Binutils) 2.29.51.0.1.20180112
Copyright (C) 2018 Free Software Foundation, Inc.
This program is free software; you may redistribute it under the terms of
the GNU General Public License version 3 or (at your option) any later version.
This program has absolutely no warranty.
[hjl@gnu-cfl-2 bin]$

Check if AR works with --plugin and rc before passing --plugin to AR and
RANLIB.

PR ld/27173
* configure: Regenerated.

config/

PR ld/27173
* gcc-plugin.m4 (GCC_PLUGIN_OPTION): Check if AR works with
--plugin and rc before enabling --plugin.

libiberty/

PR ld/27173
* configure: Regenerated.

zlib/

PR ld/27173
* configure: Regenerated.
---
 config/gcc-plugin.m4 |  12 +
 configure| 102 +++
 libiberty/configure  | 102 +++
 libtool.m4   |   9 +++-
 zlib/configure   |  13 --
 5 files changed, 234 insertions(+), 4 deletions(-)

diff --git a/config/gcc-plugin.m4 b/config/gcc-plugin.m4
index c5b72e9a13d..798a2054edd 100644
--- a/config/gcc-plugin.m4
+++ b/config/gcc-plugin.m4
@@ -145,6 +145,18 @@ for plugin in $plugin_names; do
 break
   fi
 done
+dnl Check if ${AR} $plugin_option rc works.
+AC_CHECK_TOOL(AR, ar)
+if test "${AR}" = "" ; then
+  AC_MSG_ERROR([Required archive tool 'ar' not found on PATH.])
+fi
+touch conftest.c
+${AR} $plugin_option rc conftest.a conftest.c
+if test "$?" != 0; then
+  echo "Failed: ${AR} $plugin_option rc"
+  plugin_option=
+fi
+rm -f conftest.*
 if test -n "$plugin_option"; then
   $1="$plugin_option"
   AC_MSG_RESULT($plugin_option)
diff --git a/configure b/configure
index a75bc26978c..c44184f72ff 100755
--- a/configure
+++ b/configure
@@ -10120,6 +10120,108 @@ for plugin in $plugin_names; do
 break
   fi
 done
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ar", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AR"; then
+  ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ac_cv_prog_AR="${ac_tool_prefix}ar"
+$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" 
>&5
+break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5
+$as_echo "$AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_AR"; then
+  ac_ct_AR=$AR
+  # Extract the first word of "ar", so it can be a program name with args.
+set dummy ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_AR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_AR"; then
+  ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ac_cv_prog_ac_ct_AR="ar"
+$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" 
>&5
+break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AR=$ac_cv_prog_ac_ct_AR
+if test -n "$ac_ct_AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5
+$as_echo "$ac_ct_AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_AR" = x; then
+AR=""
+  else
+case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" 
>&2;}
+ac_tool_warned=yes ;;
+esac
+AR=$ac_ct_AR
+  fi
+else
+  AR="$ac_cv_prog_AR"
+fi
+
+if test "${AR}" = "" ; then
+  as_fn_error $? "Required archive tool 'ar' not found on PATH." "$LINENO" 5
+fi
+touch conftest.c
+${AR} $plugin_op

Re: [PATCH] Properly release symtab::m_clones.

2021-01-11 Thread Jan Hubicka
> The patch is about not using delete for a memory that
> is allocated by GGC.
> 
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Ready to be installed?
> Thanks,
> Martin
> 
> gcc/ChangeLog:
> 
>   PR jit/98615
>   * symtab-clones.h (clone_info::release): Release
>   symtab::m_clones with ggc_delete as it's a GGC memory.

OK, so this does not cause regular crashes because it is run only with
JIT? Sorry for that.

I was thinking of making the finalizers to be run also with checking
enabled - that should catch such errors soner.

Honza
> ---
>  gcc/symtab-clones.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/symtab-clones.h b/gcc/symtab-clones.h
> index cdb19cb6c8c..5695a434f6a 100644
> --- a/gcc/symtab-clones.h
> +++ b/gcc/symtab-clones.h
> @@ -83,7 +83,7 @@ inline void
>  clone_info::release ()
>  {
>if (symtab->m_clones)
> -delete (symtab->m_clones);
> +ggc_delete (symtab->m_clones);
>symtab->m_clones = NULL;
>  }
> -- 
> 2.29.2
> 


[PATCH] Properly release symtab::m_clones.

2021-01-11 Thread Martin Liška

The patch is about not using delete for a memory that
is allocated by GGC.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

PR jit/98615
* symtab-clones.h (clone_info::release): Release
symtab::m_clones with ggc_delete as it's a GGC memory.
---
 gcc/symtab-clones.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/symtab-clones.h b/gcc/symtab-clones.h
index cdb19cb6c8c..5695a434f6a 100644
--- a/gcc/symtab-clones.h
+++ b/gcc/symtab-clones.h
@@ -83,7 +83,7 @@ inline void
 clone_info::release ()
 {
   if (symtab->m_clones)
-delete (symtab->m_clones);
+ggc_delete (symtab->m_clones);
   symtab->m_clones = NULL;
 }
 
--

2.29.2



Re: [PATCH 1/3] GCC: Pass --plugin to AR and RANLIB

2021-01-11 Thread Hans-Peter Nilsson via Gcc-patches
> From: Luis Machado via Gcc-patches 
> Date: Mon, 11 Jan 2021 15:58:43 +0100

> This seems to have broken the builds on AArch64-Linux Ubuntu 18.04.
> 
> make[2]: Entering directory 'binutils-gdb-master-bionic/libiberty'
> rm -f ./libiberty.a pic/./libiberty.a noasan/./libiberty.a
> ar --plugin /usr/lib/gcc/aarch64-linux-gnu/7/liblto_plugin.so rc 
> ./libiberty.a \
>./regex.o ./cplus-dem.o ./cp-demangle.o ./md5.o ./sha1.o ./alloca.o 
> ./argv.o ./bsearch_r.o ./choose-temp.o ./concat.o ./cp-demint.o 
> ./crc32.o ./d-demangle.o ./dwarfnames.o ./dyn-string.o ./fdmatch.o 
> ./fibheap.o ./filedescriptor.o ./filename_cmp.o ./floatformat.o 
> ./fnmatch.o ./fopen_unlocked.o ./getopt.o ./getopt1.o ./getpwd.o 
> ./getruntime.o ./hashtab.o ./hex.o ./lbasename.o ./lrealpath.o 
> ./make-relative-prefix.o ./make-temp-file.o ./objalloc.o ./obstack.o 
> ./partition.o ./pexecute.o ./physmem.o ./pex-common.o ./pex-one.o 
> ./pex-unix.o ./vprintf-support.o ./rust-demangle.o ./safe-ctype.o 
> ./simple-object.o ./simple-object-coff.o ./simple-object-elf.o 
> ./simple-object-mach-o.o ./simple-object-xcoff.o ./sort.o ./spaces.o 
> ./splay-tree.o ./stack-limit.o ./strerror.o ./strsignal.o 
> ./timeval-utils.o ./unlink-if-ordinary.o ./xasprintf.o ./xatexit.o 
> ./xexit.o ./xmalloc.o ./xmemdup.o ./xstrdup.o ./xstrerror.o ./xstrndup.o 
> ./xvasprintf.o  ./setproctitle.o
> ar: no operation specified
> Makefile:252: recipe for target 'libiberty.a' failed
> make[2]: *** [libiberty.a] Error 1

Also seen in my binutils autotester testing
a9fd212a2416..af019bfde9b1, for "all" cross-targets
(mmix-knuth-mmixware, cris-axis-linux-gnu, cris-axis-elf) *and*
for native, stock Debian 9 x86_64.

The native build attempt was configured with
/binutils-gdb-git/configure --enable-plugins \
 --disable-gdb --disable-gdbserver --disable-libdecnumber --disable-readline 
--disable-sim \
 --with-sysroot=/ --with-system-zlib --prefix=/usr/local 
--with-local-prefix=/usr/local
and the build failed with "make", as quoted.

brgds, H-P


Re: [PATCH] libphobos: Allow building libphobos using Solaris/x86 assembler

2021-01-11 Thread Iain Buclaw via Gcc-patches
Excerpts from Iain Buclaw's message of January 7, 2021 6:48 pm:
> Excerpts from Rainer Orth's message of January 7, 2021 5:17 pm:
>> Hi Iain,
>> 
 The Solaris assemblers don't support UTF-8 identifiers.  Unless gdc can
 encode them in some way for toolchains like this (no idea if this is
 worth the effort), it may be possible to guard the tests with the ucn
 effective-target keyword.
 
 Apart from that, it seems strange that the failing tests should only
 show up as UNSUPPORTED.  I'd have expected the compilation to FAIL, but
 IIRC the gdc testsuite has to ignore all output, so the test for excess
 errors which would usually catch this is disabled effectively.
>>>
>>> Indeed, the testsuite is far too verbose.  Although many tests have a
>>> TEST_OUTPUT directive, converting them to a Dejagnu style is probably
>>> too much effort for the gain.
>>>
>>> Those tests can just be explicitly disabled, I'll look into that.
>> 
>> Great, thanks.
>> 

Having a look, actually I can just use the presence of TEST_OUTPUT to be
a gate for whether to prune all output or not.

Maybe this can be improved later to extract the contents of TEST_OUTPUT,
but for now, it has caught a few hidden bugs in the tests that I have
handled in the patch below (no changelog entry yet).

Iain.

---

diff --git a/gcc/testsuite/lib/gdc-utils.exp b/gcc/testsuite/lib/gdc-utils.exp
index 6d4a15e9a67..3d9fd401ee8 100644
--- a/gcc/testsuite/lib/gdc-utils.exp
+++ b/gcc/testsuite/lib/gdc-utils.exp
@@ -173,6 +173,27 @@ proc gdc-copy-extra { base extra } {
 return $extra
 }
 
+#
+# Some tests in the DMD testsuite have specific target requirements,
+# handle them explicitly here.
+#
+
+proc gdc-extra-test-options { fdout test } {
+switch $test {
+   "runnable/mangle.d" -
+   "runnable/testmodule.d" -
+   "runnable/ufcs.d" {
+   # Tests that require effective-target-ucn
+   puts $fdout "// { dg-skip-if \"\" { ! ucn } }"
+   }
+
+   "runnable/test42.d" {
+   # Tests that overflow line limits of older assemblers.
+   puts $fdout "// { dg-xfail-if \"Lines exceed 10240 characters\" { 
*-*-solaris2.* && { ! gas } } }"
+   }
+}
+}
+
 #
 # Translate DMD test directives to dejagnu equivalent.
 #
@@ -209,8 +230,10 @@ proc gdc-convert-test { base test } {
 
 set extra_sources ""
 set extra_files ""
+set ddoc_options ""
 set needs_phobos 0
 set saw_test_flags 0
+set saw_test_output 0
 
 upvar 1 compilable_do_what compilable_do_what
 set compilable_output_file_ext ""
@@ -243,6 +266,14 @@ proc gdc-convert-test { base test } {
# LINK sets dg-do-what-default "link"
set compilable_do_what "link"
 
+   } elseif [regexp -- {TEST_OUTPUT} $copy_line] {
+   # TEST_OUTPUT contents are ignored, but it might be possible to
+   # convert it into a series of either dg-prune or dg-errors.
+   # Currently, only saw_test_output is set so that dg-prune is
+   # added before running the test.
+   regsub -- {TEST_OUTPUT.*$} $copy_line "" out_line
+   set saw_test_output 1
+
} elseif [regexp -- {POST_SCRIPT} $copy_line] {
# POST_SCRIPT is not handled
regsub -- {POST_SCRIPT.*$} $copy_line "" out_line
@@ -276,7 +307,15 @@ proc gdc-convert-test { base test } {
} elseif [regexp -- {EXTRA_SOURCES\s*:\s*(.*)} $copy_line match 
sources] {
# EXTRA_SOURCES are appended to extra_sources list
foreach srcfile $sources {
-   lappend extra_sources $srcfile
+   # Ddoc files are not handled by the compiler directly, they are
+   # instead passed in using -fdoc-inc=
+   if [regexp -- {\.ddoc$} $srcfile extmatch] {
+   lappend extra_files $srcfile
+   lappend ddoc_options "-fdoc-inc=$type/$srcfile"
+
+   } else {
+   lappend extra_sources $srcfile
+   }
}
regsub -- {EXTRA_SOURCES.*$} $copy_line "" out_line
 
@@ -311,7 +350,7 @@ proc gdc-convert-test { base test } {
} elseif [regexp -- {COMPILABLE_MATH_TEST} $copy_line match sources] {
# COMPILABLE_MATH_TEST annotates tests that import the std.math
# module.  Which will need skipping if not available on the target.
-   regsub -- {RUNNABLE_PHOBOS_TEST.*$} $copy_line "" out_line
+   regsub -- {COMPILABLE_MATH_TEST.*$} $copy_line "" out_line
set needs_phobos 1
}
 
@@ -334,11 +373,17 @@ proc gdc-convert-test { base test } {
puts $fdout "// { dg-additional-files \"$extra_files\" }"
 }
 
+if { [llength $ddoc_options] > 0 } {
+   puts $fdout "// { dg-additional-options \"$ddoc_options\" }"
+}
+
 # Add specific options for test type
 
 # DMD's testsuite is extremely verbose, compiler messages from constructs
 # such as pragma(msg, ...) would otherw

Re: [PATCH v3] libgcc: Thumb-1 Floating-Point Library for Cortex M0

2021-01-11 Thread Christophe Lyon via Gcc-patches
On Mon, 11 Jan 2021 at 17:18, Daniel Engel  wrote:
>
> On Mon, Jan 11, 2021, at 8:07 AM, Christophe Lyon wrote:
> > On Sat, 9 Jan 2021 at 14:09, Christophe Lyon  
> > wrote:
> > >
> > > On Sat, 9 Jan 2021 at 13:27, Daniel Engel  wrote:
> > > >
> > > > On Thu, Jan 7, 2021, at 4:56 AM, Richard Earnshaw wrote:
> > > > > On 07/01/2021 00:59, Daniel Engel wrote:
> > > > > > --snip--
> > > > > >
> > > > > > On Wed, Jan 6, 2021, at 9:05 AM, Richard Earnshaw wrote:
> > > > > > --snip--
> > > > > >
> > > > > >> - finally, your popcount implementations have data in the code 
> > > > > >> segment.
> > > > > >>  That's going to cause problems when we have compilation options 
> > > > > >> such as
> > > > > >> -mpure-code.
> > > > > >
> > > > > > I am just following the precedent of existing lib1funcs (e.g. 
> > > > > > __clz2si).
> > > > > > If this matters, you'll need to point in the right direction for the
> > > > > > fix.  I'm not sure it does matter, since these functions are PIC 
> > > > > > anyway.
> > > > >
> > > > > That might be a bug in the clz implementations - Christophe: Any 
> > > > > thoughts?
> > > >
> > > > __clzsi2() has test coverage in 
> > > > "gcc.c-torture/execute/builtin-bitops-1.c"
> > > Thanks, I'll have a closer look at why I didn't see problems.
> > >
> >
> > So, that's because the code goes to the .text section (as opposed to
> > .text.noread)
> > and does not have the PURECODE flag. The compiler takes care of this
> > when generating code with -mpure-code.
> > And the simulator does not complain because it only checks loads from
> > the segment with the PURECODE flag set.
> >
> This is far out of my depth, but can something like:
>
> ifeq (,$(findstring __symbian__,$(shell $(gcc_compile_bare) -dM -E - 
> 
> be adapted to:
>
> a) detect the state of the -mpure-code switch, and
> b) pass that flag to the preprocessor?
>
> If so, I can probably fix both the target section and the data usage.
> Just have to add a few instructions to finish unrolling the loop.

I must confess I never checked libgcc's Makefile deeply before,
but it looks like you can probably detect whether -mpure-code is
part of $CFLAGS.

However, it might be better to write pure-code-safe code
unconditionally because the toolchain will probably not
be rebuilt with -mpure-code as discussed before.
Or that could mean adding a -mpure-code multilib

>
> > > > The 'clzs' and 'ctz' functions should never have problems.   -mpure-code
> > > > appears to be valid only when the 'movt' instruction is available, which
> > > > means that the 'clz' instruction will also be available, so no array 
> > > > loads.
> > > No, -mpure-code is also supported with v6m.
> > >
> > > > Is the -mpure-code state detectable as a preprocessor flag?  While
> > > No.
> > >
> > > > 'movw'/'movt' appears to be the canonical solution, I'm not sure it
> > > > should be the default just because a processor supports Thumb-2.
> > > >
> > > > Do users wanting to use -mpure-code recompile the toolchain to avoid
> > > > constant data in compiled C functions?  I don't think this is the
> > > > default for the typical toolchain scripts.
> > > No, users of -mpure-code do not recompile the toolchain.
> > >
> > > --snip --
>
> >


Re: [PATCH v4 06/29] Import replacement 'ctz' functions from CM0 library

2021-01-11 Thread Richard Earnshaw via Gcc-patches
On 11/01/2021 11:10, g...@danielengel.com wrote:
> From: Daniel Engel 
> 
> This version combines __ctzdi2() with __ctzsi2() into a single object with
> an efficient tail call.  The former implementation of __ctzdi2() was in C.
> 
> On architectures without a clz instruction, this version merges the formerly
> separate Thumb and ARM code sequences into a unified instruction sequence.
> This change significantly improves the Thumb performance without affecting ARM
> performance.  Finally, this version adds a new __OPTIMIZE_SIZE__ build option.
> 
> On architectures with a clz instruction, __ctzsi2() now return 32 instead
> of -1 when the argument is 0.  This costs an extra 2 instructions, branchless.
> Although the output of this function is technically undefined when the 
> argument
> is 0, this makes the behavior consistent with __clzsi2().
> 
> Likewise, __ctzdi2() now returns '64' on a zero argument instead of '31'
> 

I think almost all of the comments on the CLZ support apply equally here
as well.

R.

> gcc/libgcc/ChangeLog:
> 2021-01-07 Daniel Engel 
> 
>   * config/arm/bits/ctz2.S: Size-optimized __ctzsi2(), new function 
> __ctzdi2();
>   added logic to return '32' for x=0 when using hardware clz instruction.
>   * config/arm/t-elf: Add _ctzdi2, move _clzsi2 to weak LIB1ASMFUNCS 
> group.
> ---
>  libgcc/config/arm/bits/ctz2.S | 287 ++
>  libgcc/config/arm/t-elf   |   3 +-
>  2 files changed, 228 insertions(+), 62 deletions(-)
> 
> diff --git a/libgcc/config/arm/bits/ctz2.S b/libgcc/config/arm/bits/ctz2.S
> index f0422d1fbba..4241fdad283 100644
> --- a/libgcc/config/arm/bits/ctz2.S
> +++ b/libgcc/config/arm/bits/ctz2.S
> @@ -1,65 +1,230 @@
> +/* ctz2.S: ARM optimized 'ctz' functions
>  
> -#ifdef L_ctzsi2
> -#ifdef NOT_ISA_TARGET_32BIT
> -FUNC_START ctzsi2
> - negsr1, r0
> - andsr0, r0, r1
> - movsr1, #28
> - movsr3, #1
> - lslsr3, r3, #16
> - cmp r0, r3 /* 0x1 */
> - bcc 2f
> - lsrsr0, r0, #16
> - subsr1, r1, #16
> -2:   lsrsr3, r3, #8
> - cmp r0, r3 /* #0x100 */
> - bcc 2f
> - lsrsr0, r0, #8
> - subsr1, r1, #8
> -2:   lsrsr3, r3, #4
> - cmp r0, r3 /* #0x10 */
> - bcc 2f
> - lsrsr0, r0, #4
> - subsr1, r1, #4
> -2:   adr r2, 1f
> - ldrbr0, [r2, r0]
> - subsr0, r0, r1
> - bx lr
> -.align 2
> -1:
> -.byte27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
> - FUNC_END ctzsi2
> +   Copyright (C) 2020-2021 Free Software Foundation, Inc.
> +   Contributed by Daniel Engel (g...@danielengel.com)
> +
> +   This file is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by the
> +   Free Software Foundation; either version 3, or (at your option) any
> +   later version.
> +
> +   This file is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   General Public License for more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   .  */
> +
> +
> +// When the hardware 'ctz' function is available, an efficient version
> +//  of __ctzsi2(x) can be created by calculating '31 - __ctzsi2(lsb(x))',
> +//  where lsb(x) is 'x' with only the least-significant '1' bit set.
> +// The following offset applies to all of the functions in this file.
> +#if defined(__ARM_FEATURE_CLZ) && __ARM_FEATURE_CLZ
> +  #define CTZ_RESULT_OFFSET 1
>  #else
> -ARM_FUNC_START ctzsi2
> - rsb r1, r0, #0
> - and r0, r0, r1
> -# if defined (__ARM_FEATURE_CLZ)
> - clz r0, r0
> - rsb r0, r0, #31
> - RET
> -# else
> - mov r1, #28
> - cmp r0, #0x1
> - do_it   cs, t
> - movcs   r0, r0, lsr #16
> - subcs   r1, r1, #16
> - cmp r0, #0x100
> - do_it   cs, t
> - movcs   r0, r0, lsr #8
> - subcs   r1, r1, #8
> - cmp r0, #0x10
> - do_it   cs, t
> - movcs   r0, r0, lsr #4
> - subcs   r1, r1, #4
> - adr r2, 1f
> - ldrbr0, [r2, r0]
> - sub r0, r0, r1
> - RET
> -.align 2
> -1:
> -.byte27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
> -# endif /* !defined (__ARM_FEATURE_CLZ) */
> - FUNC_END ctzsi2
> +  #define CTZ_RESULT_OFFSET 0
>  #endif
> -#endif /* L_clzsi2 */
> +
> +
> +#ifdef L_ctzdi2
> +
> +// int __ctzdi2(long long)
> +// C

Re: [PATCH v4 05/29] Import replacement 'clz' functions from CM0 library

2021-01-11 Thread Richard Earnshaw via Gcc-patches
A general comment before we start:

CLZ was added to the Arm ISA in Armv5.  So all subsequent Arm versions
(and all versions implementing thumb2) will have this instruction.  So
the only cases where you'll need a fallback are armv6m (and derivatives)
and pre-armv5 (Arm or thumb1).  So there's no need in your code to try
to use a synthesized CLZ operation when compiling for thumb2.


On 11/01/2021 11:10, g...@danielengel.com wrote:
> From: Daniel Engel 
> 
> On architectures with no clz instruction, this version combines __clzdi2()
> with __clzsi2() into a single object with an efficient tail call.  Also, this
> version merges the formerly separate for Thumb and ARM code implementations
> into a unified instruction sequence.  This change significantly improves the
> Thumb performance with affecting ARM performance.  Finally, this version adds
> a new __OPTIMIZE_SIZE__ build option (using a loop).
> 
> On architectures with a clz instruction, functionality is unchanged.
> 
> gcc/libgcc/ChangeLog:
> 2021-01-07 Daniel Engel 
> 
>   * config/arm/bits/clz2.S: Size-optimized bitwise versions of __clzsi2()
>   and __clzdi2() (i.e. __ARM_FEATURE_CLZ not available).
>   * config/arm/lib1funcs.S: Moved CFI_FUNCTION macros, added 
> __ARM_FEATURE_IT.
>   * config/arm/t-elf: Move _clzsi2 to new group of weak LIB1ASMFUNCS.
> ---
>  libgcc/config/arm/bits/clz2.S | 342 ++
>  libgcc/config/arm/lib1funcs.S |  25 ++-
>  libgcc/config/arm/t-elf   |   8 +-
>  3 files changed, 248 insertions(+), 127 deletions(-)
> 
> diff --git a/libgcc/config/arm/bits/clz2.S b/libgcc/config/arm/bits/clz2.S
> index 1c8f10a5b29..d0a1fbec4d0 100644
> --- a/libgcc/config/arm/bits/clz2.S
> +++ b/libgcc/config/arm/bits/clz2.S
> @@ -1,124 +1,234 @@
> +/* clz2.S: Cortex M0 optimized 'clz' functions
> +
> +   Copyright (C) 2018-2021 Free Software Foundation, Inc> +   Contributed by 
> Daniel Engel, Senva Inc (g...@danielengel.com)
> +
> +   This file is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by the
> +   Free Software Foundation; either version 3, or (at your option) any
> +   later version.
> +
> +   This file is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   General Public License for more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   .  */
> +
> +
> +#if defined(__ARM_FEATURE_CLZ) && __ARM_FEATURE_CLZ

Writing the test this way is pointless.  Either test for
__ARM_FEATURE_CLZ being defined, or test for it being non-zero; but not
both.  C Macros default to a value of zero if not defined.

In this case #ifdef is just fine - it won't be defined if the
instruction doesn't exist.

Similar simplification should be used everywhere else you've used this
type of construct.

> +
> +#ifdef L_clzdi2
> +
> +// int __clzdi2(long long)
> +// Counts leading zero bits in $r1:$r0.
> +// Returns the result in $r0.
> +FUNC_START_SECTION clzdi2 .text.sorted.libgcc.clz2.clzdi2
> +CFI_START_FUNCTION
> +
> +// Moved here from lib1funcs.S
> +cmp xxh,#0
> +do_it   eq, et
> +clzeq   r0, xxl
> +clzne   r0, xxh
> +addeq   r0, #32
> +RET
> +
> +CFI_END_FUNCTION
> +FUNC_END clzdi2
> +
> +#endif /* L_clzdi2 */
> +
>  
>  #ifdef L_clzsi2
> -#ifdef NOT_ISA_TARGET_32BIT
> -FUNC_START clzsi2
> - movsr1, #28
> - movsr3, #1
> - lslsr3, r3, #16
> - cmp r0, r3 /* 0x1 */
> - bcc 2f
> - lsrsr0, r0, #16
> - subsr1, r1, #16
> -2:   lsrsr3, r3, #8
> - cmp r0, r3 /* #0x100 */
> - bcc 2f
> - lsrsr0, r0, #8
> - subsr1, r1, #8
> -2:   lsrsr3, r3, #4
> - cmp r0, r3 /* #0x10 */
> - bcc 2f
> - lsrsr0, r0, #4
> - subsr1, r1, #4
> -2:   adr r2, 1f
> - ldrbr0, [r2, r0]
> - addsr0, r0, r1
> - bx lr
> -.align 2
> -1:
> -.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
> - FUNC_END clzsi2
> -#else
> -ARM_FUNC_START clzsi2
> -# if defined (__ARM_FEATURE_CLZ)
> - clz r0, r0
> - RET
> -# else
> - mov r1, #28
> - cmp r0, #0x1
> - do_it   cs, t
> - movcs   r0, r0, lsr #16
> - subcs   r1, r1, #16
> - cmp r0, #0x100
> - do_it   cs, t
> - movcs   r0, r0, lsr #8
> - subcs   r1, r1, #8
> - cmp 

Re: [patch] fix -Wformat-diag warnings in rs6000-call.c

2021-01-11 Thread Matthias Klose
On 1/10/21 10:18 PM, Martin Sebor wrote:
> On 1/10/21 3:29 AM, Matthias Klose wrote:
>> is the newline intended? It's followed by a debug_rtx call.
> 
> To avoid the warning there shouldn't be any trailing punctuation
> or whitespace in the message.  The GCC quoting directives should
> be preferred over the literal characters (as per GCC Coding
> Conventions).  %qc and %qs are preferable to %<%c%>.
> 
> Symbols/identifiers should be formatted using the appropriate
> directives or quoted in %< %>.  Underscores in words like
> emit_insn are taken as indicators that the word is an identifier
> and to trigger warnings.

is this?
https://gcc.gnu.org/codingconventions.html#Diagnostics

I think that's a bit terse, and grepping sources for %< shows many more
occurences than %qX.

>> ../../src/gcc/rtl.c:860:42: warning: unquoted sequence of 2 consecutive
>> punctuation characters '',' in format [-Wformat-diag]
>>    860 | ("RTL check: expected elt %d type '%c', have '%c' (rtx %s) in 
>> %s, at
>> %s:%d",
>>
>> `%c', or some %q quoting?
> 
> The purpose of the -Wformat-diag warnings is to improve the consistency
> of user-visible messages and make them easier to translate.  There was
> a discussion some time back about whether internal errors should fall
> into this category.  I'm not sure if it reached a conclusion one way
> or the other but in similar situations elsewhere in GCC we have
> suppressed the warning via #pragma GCC diagnostic.  If it takes too
> much effort to clean them up it might make sense to do the same here
> (the downside is that it doesn't help translators).  Otherwise,
> the messages are not really phrased in a way that's comprehensible
> either to users or to tranlators (acronyms like elt or rtx aren't universally
> understood).

[...]

>> again, `' quotes, or some %q option?
>>
> 
> The latter: %qs with an argument is best in general (it can reduce
> translation effort between repeated messages parameterized on
> the quoted string).

if the URL above is the correct place for the conventions, then maybe make it
more explicit there about the preferred choice.

Matthias


Re: [PATCH v3] libgcc: Thumb-1 Floating-Point Library for Cortex M0

2021-01-11 Thread Daniel Engel
On Mon, Jan 11, 2021, at 8:07 AM, Christophe Lyon wrote:
> On Sat, 9 Jan 2021 at 14:09, Christophe Lyon  
> wrote:
> >
> > On Sat, 9 Jan 2021 at 13:27, Daniel Engel  wrote:
> > >
> > > On Thu, Jan 7, 2021, at 4:56 AM, Richard Earnshaw wrote:
> > > > On 07/01/2021 00:59, Daniel Engel wrote:
> > > > > --snip--
> > > > >
> > > > > On Wed, Jan 6, 2021, at 9:05 AM, Richard Earnshaw wrote:
> > > > > --snip--
> > > > >
> > > > >> - finally, your popcount implementations have data in the code 
> > > > >> segment.
> > > > >>  That's going to cause problems when we have compilation options 
> > > > >> such as
> > > > >> -mpure-code.
> > > > >
> > > > > I am just following the precedent of existing lib1funcs (e.g. 
> > > > > __clz2si).
> > > > > If this matters, you'll need to point in the right direction for the
> > > > > fix.  I'm not sure it does matter, since these functions are PIC 
> > > > > anyway.
> > > >
> > > > That might be a bug in the clz implementations - Christophe: Any 
> > > > thoughts?
> > >
> > > __clzsi2() has test coverage in "gcc.c-torture/execute/builtin-bitops-1.c"
> > Thanks, I'll have a closer look at why I didn't see problems.
> >
> 
> So, that's because the code goes to the .text section (as opposed to
> .text.noread)
> and does not have the PURECODE flag. The compiler takes care of this
> when generating code with -mpure-code.
> And the simulator does not complain because it only checks loads from
> the segment with the PURECODE flag set.
> 
This is far out of my depth, but can something like: 

ifeq (,$(findstring __symbian__,$(shell $(gcc_compile_bare) -dM -E - 
 > > The 'clzs' and 'ctz' functions should never have problems.   -mpure-code
> > > appears to be valid only when the 'movt' instruction is available, which
> > > means that the 'clz' instruction will also be available, so no array 
> > > loads.
> > No, -mpure-code is also supported with v6m.
> >
> > > Is the -mpure-code state detectable as a preprocessor flag?  While
> > No.
> >
> > > 'movw'/'movt' appears to be the canonical solution, I'm not sure it
> > > should be the default just because a processor supports Thumb-2.
> > >
> > > Do users wanting to use -mpure-code recompile the toolchain to avoid
> > > constant data in compiled C functions?  I don't think this is the
> > > default for the typical toolchain scripts.
> > No, users of -mpure-code do not recompile the toolchain.
> >
> > --snip --

>


Re: [PATCH v3] libgcc: Thumb-1 Floating-Point Library for Cortex M0

2021-01-11 Thread Christophe Lyon via Gcc-patches
On Sat, 9 Jan 2021 at 14:09, Christophe Lyon  wrote:
>
> On Sat, 9 Jan 2021 at 13:27, Daniel Engel  wrote:
> >
> > On Thu, Jan 7, 2021, at 4:56 AM, Richard Earnshaw wrote:
> > > On 07/01/2021 00:59, Daniel Engel wrote:
> > > > --snip--
> > > >
> > > > On Wed, Jan 6, 2021, at 9:05 AM, Richard Earnshaw wrote:
> > > >
> > > >>
> > > >> Thanks for working on this, Daniel.
> > > >>
> > > >> This is clearly stage1 material, so we've got time for a couple of
> > > >> iterations to sort things out.
> > > >
> > > > I appreciate your feedback.  I had been hoping that with no regressions
> > > > this might still be eligible for stage2.  Christophe never indicated
> > > > either way. but the fact that he was looking at it seemed positive.
> > > > I thought I would be a couple of weeks faster with this last
> > > > iteration, but holidays got in the way.
> > >
> > > GCC doesn't have a stage 2 any more (historical wart).  We were in
> > > (late) stage3 when this was first posted, and because of the significant
> > > impact this might have on not just CM0 but other targets as well, I
> > > don't think it's something we should try to squeeze in at the last
> > > minute.  We're now in stage 4, so that is doubly the case.
> >
> > Of course I meant stage3.  Oops.  I actually thought stage 3 would
> > continue through next week based on the average historical dates.
>
> I expected stage4 to start on Jan 1st :-)
>
> > It would have been nice to get this feedback when I emailed you a
> > preview version of this patch (2020-Nov-11).  Christophe's logs have
> > been very helpful on the technical integration, but it's proving a chore
> > to go back and re-create some of the intermediate chunks.
> >
> > Regardless, I still have free time for at least a little while longer to
> > work on this, so I'll push forward with any further feedback you are
> > willing to give me.  I have failed to free up any time during the last 2
> > years to actually work on this during stage1, and I have no guarantee
> > this coming year will be different.
> >
> > >
> > > Christophe is a very valuable member of our community, but he's not a
> > > port maintainer and thus cannot really rule on what can go into the
> > > tools, or when.
> > >
> > > >
> > > > I actually think your comments below could all be addressable within a
> > > > couple of days.  But, I'm not accounting for the review process.
> > > >
> > > >> Firstly, the patch is very large, but contains a large number of
> > > >> distinct changes, so it would really benefit from being broken down 
> > > >> into
> > > >> a number of distinct patches.  This will make reviewing the individual
> > > >> changes much more straight-forward.
>
> And if you can generate the patch with git, that will help: the reason for the
> previous build errors was because I had to "reformat" your patch before
> submitting it for validation, and I messed things up.
>
> > > >
> > > > I have no context for "large" or "small" with respect to gcc.  This
> > > > patch comprises about 30% of a previously-monolithic library that's
> > > > been shipping since ~2016 (the rest is libm material).  Other than
> > > > (1) the aforementioned change to div0(), (2) a nascent adaptation
> > > > for __truncdfsf2() (not enabled), and (3) the gratuitous addition of
> > > > the bitwise functions, the library remains pretty much as it was
> > > > originally released.
> > >
> > > Large, like many other terms is relative.  For assembler file changes,
> > > which this is primarily, the overall size can be much smaller and still
> > > be considered 'large'.
> > >
> > > >
> > > > The driving force in the development of this library was small size,
> > > > which of course was never possible with the softfp routines.  It's not
> > > > half-slow, either, for the limitations of the M0 architecture.   And,
> > > > it's IEEE compliant.  But, that means that most of the functions are
> > > > highly interconnected.  So, some of it can be broken up as you outline
> > > > below, but that last patch is still worth more than half of the total.
> > >
> > > Nevertheless, having the floating-point code separated out will make
> > > reviewing more straight forward.  I'll likely need to ask one of our FP
> > > experts to have a specific look at that part and that will be easier if
> > > it is disentangled from the other changes.
> > > >
> > > > I also have ~70k lines of test vectors that seem mostly redundant, but
> > > > not completely.  I haven't decided what to do here.  For example, I have
> > > > coverage for __aeabi_u/ldivmod, while GCC does not.  If I do anything
> > > > with this code it will be in a separate thread.
> > >
> > > Publishing the test code, even if it isn't integrated into the GCC
> > > testsuite would be useful.  Perhaps someone else could then help with 
> > > that.
> >
> > Very brute force stuff, not production quality:
> >  (160 kb)
> >
> > > >> I'd suggest:
> > > >>
> > > >> 1) Some basic

Re: [PATCH v4 02/29] Refactor 'clz' functions into a new file.

2021-01-11 Thread Daniel Engel


On Mon, Jan 11, 2021, at 7:39 AM, Richard Earnshaw wrote:
> On 11/01/2021 15:26, Richard Earnshaw wrote:
> > On 11/01/2021 11:10, g...@danielengel.com wrote:
> >> From: Daniel Engel 
> >>
> >> gcc/libgcc/ChangeLog:
> >> 2021-01-07 Daniel Engel 
> >>
> >>* config/arm/lib1funcs.S: Move __clzsi2() and __clzdi2() to
> >>* config/arm/bits/clz2.S: New file.
> > 
> > No, please don't push these down into a subdirectory.  They do not
> > represent a clear subfunctional distinction, so creating a load of disk
> > hierarcy is just confusing.  Just put the code in config/arm/clz.S
> > 
> > Otherwise this is just a re-org, so it's OK.
> 
> Oops, missed that as a new file, this needs to copy over the original
> copyright message.
> 
> Same with the other re-orgs that split code up.

This is not a hard change, just noisy, so I'm checking ... the estimated
lifetime of this particular content is approximately 15 minutes.  There
is a copyright message in 05/29, and similar for the other re-orgs.

> R.
> 
> > 
> > R.
> > 
> >> ---
> >>  libgcc/config/arm/bits/clz2.S | 124 ++
> >>  libgcc/config/arm/lib1funcs.S | 123 +
> >>  2 files changed, 125 insertions(+), 122 deletions(-)
> >>  create mode 100644 libgcc/config/arm/bits/clz2.S
> >>
> >> diff --git a/libgcc/config/arm/bits/clz2.S b/libgcc/config/arm/bits/clz2.S
> >> new file mode 100644
> >> index 000..1c8f10a5b29
> >> --- /dev/null
> >> +++ b/libgcc/config/arm/bits/clz2.S
> >> @@ -0,0 +1,124 @@
> >> +
> >> +#ifdef L_clzsi2
> >> +#ifdef NOT_ISA_TARGET_32BIT
> >> +FUNC_START clzsi2
> >> +  movsr1, #28
> >> +  movsr3, #1
> >> +  lslsr3, r3, #16
> >> +  cmp r0, r3 /* 0x1 */
> >> +  bcc 2f
> >> +  lsrsr0, r0, #16
> >> +  subsr1, r1, #16
> >> +2:lsrsr3, r3, #8
> >> +  cmp r0, r3 /* #0x100 */
> >> +  bcc 2f
> >> +  lsrsr0, r0, #8
> >> +  subsr1, r1, #8
> >> +2:lsrsr3, r3, #4
> >> +  cmp r0, r3 /* #0x10 */
> >> +  bcc 2f
> >> +  lsrsr0, r0, #4
> >> +  subsr1, r1, #4
> >> +2:adr r2, 1f
> >> +  ldrbr0, [r2, r0]
> >> +  addsr0, r0, r1
> >> +  bx lr
> >> +.align 2
> >> +1:
> >> +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
> >> +  FUNC_END clzsi2
> >> +#else
> >> +ARM_FUNC_START clzsi2
> >> +# if defined (__ARM_FEATURE_CLZ)
> >> +  clz r0, r0
> >> +  RET
> >> +# else
> >> +  mov r1, #28
> >> +  cmp r0, #0x1
> >> +  do_it   cs, t
> >> +  movcs   r0, r0, lsr #16
> >> +  subcs   r1, r1, #16
> >> +  cmp r0, #0x100
> >> +  do_it   cs, t
> >> +  movcs   r0, r0, lsr #8
> >> +  subcs   r1, r1, #8
> >> +  cmp r0, #0x10
> >> +  do_it   cs, t
> >> +  movcs   r0, r0, lsr #4
> >> +  subcs   r1, r1, #4
> >> +  adr r2, 1f
> >> +  ldrbr0, [r2, r0]
> >> +  add r0, r0, r1
> >> +  RET
> >> +.align 2
> >> +1:
> >> +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
> >> +# endif /* !defined (__ARM_FEATURE_CLZ) */
> >> +  FUNC_END clzsi2
> >> +#endif
> >> +#endif /* L_clzsi2 */
> >> +
> >> +#ifdef L_clzdi2
> >> +#if !defined (__ARM_FEATURE_CLZ)
> >> +
> >> +# ifdef NOT_ISA_TARGET_32BIT
> >> +FUNC_START clzdi2
> >> +  push{r4, lr}
> >> +  cmp xxh, #0
> >> +  bne 1f
> >> +#  ifdef __ARMEB__
> >> +  movsr0, xxl
> >> +  bl  __clzsi2
> >> +  addsr0, r0, #32
> >> +  b 2f
> >> +1:
> >> +  bl  __clzsi2
> >> +#  else
> >> +  bl  __clzsi2
> >> +  addsr0, r0, #32
> >> +  b 2f
> >> +1:
> >> +  movsr0, xxh
> >> +  bl  __clzsi2
> >> +#  endif
> >> +2:
> >> +  pop {r4, pc}
> >> +# else /* NOT_ISA_TARGET_32BIT */
> >> +ARM_FUNC_START clzdi2
> >> +  do_push {r4, lr}
> >> +  cmp xxh, #0
> >> +  bne 1f
> >> +#  ifdef __ARMEB__
> >> +  mov r0, xxl
> >> +  bl  __clzsi2
> >> +  add r0, r0, #32
> >> +  b 2f
> >> +1:
> >> +  bl  __clzsi2
> >> +#  else
> >> +  bl  __clzsi2
> >> +  add r0, r0, #32
> >> +  b 2f
> >> +1:
> >> +  mov r0, xxh
> >> +  bl  __clzsi2
> >> +#  endif
> >> +2:
> >> +  RETLDM  r4
> >> +  FUNC_END clzdi2
> >> +# endif /* NOT_ISA_TARGET_32BIT */
> >> +
> >> +#else /* defined (__ARM_FEATURE_CLZ) */
> >> +
> >> +ARM_FUNC_START clzdi2
> >> +  cmp xxh, #0
> >> +  do_it   eq, et
> >> +  clzeq   r0, xxl
> >> +  clzne   r0, xxh
> >> +  addeq   r0, r0, #32
> >> +  RET
> >> +  FUNC_END clzdi2
> >> +
> >> +#endif
> >> +#endif /* L_clzdi2 */
> >> +
> >> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
> >> index b4541bae791..f5aa5505e9d 100644
> >> --- a/libgcc/config/arm/lib1funcs.S
> >> +++ b/libgcc/config/arm/lib1funcs.S
> >> @@ -1722,128 +1722,7 @@ LSYM(Lover12):
> >>  
> >>  #endif /* __symbian__ */
> >>  
> >> -#ifdef L_clzsi2
> >> -#ifdef NOT_ISA_TARGET_32BIT
> >> -FUNC_START clzsi2
> >> -  movsr1, #28
> >> -  movsr3, #1
> >> -  lslsr3, r3, #16
> >> -  cmp r0, r3 /* 0x1 */
> >> -  bcc 2f
> >> -  lsrsr0, r0, #16
> >> -  subsr1, r1, #16

Re: [PATCH] libstdc++: implement locale support for AIX

2021-01-11 Thread CHIGOT, CLEMENT via Gcc-patches
>> Hi David, Clement,
>>
>>> The patch is local to libstdc++ AIX support, so I believe that I can 
>>> approve it.
>>
>>have you considered merging the dragonfly and aix trees?  I'm asking
>>because it seems prudent to try and avoid creating more and more
>>almost-but-not-quite-similar configurations (Solaris might be able to
>>use the same code, at least in 11.4 which has XPG7 support).
>
>Agreed.
>
>See also 
>https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgcc.gnu.org%2Fbugzilla%2Fshow_bug.cgi%3Fid%3D57585&data=04%7C01%7Cclement.chigot%40atos.net%7Cb9820136976149e643b408d8b64756c2%7C33440fc6b7c7412cbb730e70b0198d5a%7C0%7C0%7C637459764803442555%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=qinNdbwsbyUdvijD76PanMf0Y1iSfn1FhRAt2T2dl%2B0%3D&reserved=0url=https%3A%2F%2Fgcc.gnu.org%2Fbugzilla%2Fshow_bug.cgi%3Fid%3D57585&data=04%7C01%7Cclement.chigot%40atos.net%7Cb9820136976149e643b408d8b64756c2%7C33440fc6b7c7412cbb730e70b0198d5a%7C0%7C0%7C637459764803442555%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=qinNdbwsbyUdvijD76PanMf0Y1iSfn1FhRAt2T2dl%2B0%3D&reserved=0>

I haven't thought about that. It should be possible.
The main problem with AIX are the few missing locale functions
(strtof_l, localeconv_l, etc). I've defined some in c_locale.h, some
are simply wrapped by uselocale in the code itself.
The question is if we merged dragonfly, aix and allow others OS
to use this new locale support, what should go in c_locale.h ?
Do we want it to define all missing functions or do we rather
have some #ifdef wrapping "uselocale" when a *_l function
is missing ?
Both way, I'm fearing there will be a lot of #ifdef.

> As I'm sure I've said before, all patches for libstdc++ need to be
> sent to the libstdc++ list.
Sorry, I've forgotten that. I'll try to be more careful next time !

Clément




Re: [PATCH] libstdc++: implement locale support for AIX

2021-01-11 Thread Jonathan Wakely via Gcc-patches

On 11/01/21 16:35 +0100, Rainer Orth wrote:

Hi David, Clement,


The patch is local to libstdc++ AIX support, so I believe that I can approve it.


have you considered merging the dragonfly and aix trees?  I'm asking
because it seems prudent to try and avoid creating more and more
almost-but-not-quite-similar configurations (Solaris might be able to
use the same code, at least in 11.4 which has XPG7 support).


Agreed.

See also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57585

As I'm sure I've said before, all patches for libstdc++ need to be
sent to the libstdc++ list.




Re: [PATCH v4 02/29] Refactor 'clz' functions into a new file.

2021-01-11 Thread Richard Earnshaw via Gcc-patches
On 11/01/2021 15:26, Richard Earnshaw wrote:
> On 11/01/2021 11:10, g...@danielengel.com wrote:
>> From: Daniel Engel 
>>
>> gcc/libgcc/ChangeLog:
>> 2021-01-07 Daniel Engel 
>>
>>  * config/arm/lib1funcs.S: Move __clzsi2() and __clzdi2() to
>>  * config/arm/bits/clz2.S: New file.
> 
> No, please don't push these down into a subdirectory.  They do not
> represent a clear subfunctional distinction, so creating a load of disk
> hierarcy is just confusing.  Just put the code in config/arm/clz.S
> 
> Otherwise this is just a re-org, so it's OK.

Oops, missed that as a new file, this needs to copy over the original
copyright message.

Same with the other re-orgs that split code up.

R.

> 
> R.
> 
>> ---
>>  libgcc/config/arm/bits/clz2.S | 124 ++
>>  libgcc/config/arm/lib1funcs.S | 123 +
>>  2 files changed, 125 insertions(+), 122 deletions(-)
>>  create mode 100644 libgcc/config/arm/bits/clz2.S
>>
>> diff --git a/libgcc/config/arm/bits/clz2.S b/libgcc/config/arm/bits/clz2.S
>> new file mode 100644
>> index 000..1c8f10a5b29
>> --- /dev/null
>> +++ b/libgcc/config/arm/bits/clz2.S
>> @@ -0,0 +1,124 @@
>> +
>> +#ifdef L_clzsi2
>> +#ifdef NOT_ISA_TARGET_32BIT
>> +FUNC_START clzsi2
>> +movsr1, #28
>> +movsr3, #1
>> +lslsr3, r3, #16
>> +cmp r0, r3 /* 0x1 */
>> +bcc 2f
>> +lsrsr0, r0, #16
>> +subsr1, r1, #16
>> +2:  lsrsr3, r3, #8
>> +cmp r0, r3 /* #0x100 */
>> +bcc 2f
>> +lsrsr0, r0, #8
>> +subsr1, r1, #8
>> +2:  lsrsr3, r3, #4
>> +cmp r0, r3 /* #0x10 */
>> +bcc 2f
>> +lsrsr0, r0, #4
>> +subsr1, r1, #4
>> +2:  adr r2, 1f
>> +ldrbr0, [r2, r0]
>> +addsr0, r0, r1
>> +bx lr
>> +.align 2
>> +1:
>> +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
>> +FUNC_END clzsi2
>> +#else
>> +ARM_FUNC_START clzsi2
>> +# if defined (__ARM_FEATURE_CLZ)
>> +clz r0, r0
>> +RET
>> +# else
>> +mov r1, #28
>> +cmp r0, #0x1
>> +do_it   cs, t
>> +movcs   r0, r0, lsr #16
>> +subcs   r1, r1, #16
>> +cmp r0, #0x100
>> +do_it   cs, t
>> +movcs   r0, r0, lsr #8
>> +subcs   r1, r1, #8
>> +cmp r0, #0x10
>> +do_it   cs, t
>> +movcs   r0, r0, lsr #4
>> +subcs   r1, r1, #4
>> +adr r2, 1f
>> +ldrbr0, [r2, r0]
>> +add r0, r0, r1
>> +RET
>> +.align 2
>> +1:
>> +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
>> +# endif /* !defined (__ARM_FEATURE_CLZ) */
>> +FUNC_END clzsi2
>> +#endif
>> +#endif /* L_clzsi2 */
>> +
>> +#ifdef L_clzdi2
>> +#if !defined (__ARM_FEATURE_CLZ)
>> +
>> +# ifdef NOT_ISA_TARGET_32BIT
>> +FUNC_START clzdi2
>> +push{r4, lr}
>> +cmp xxh, #0
>> +bne 1f
>> +#  ifdef __ARMEB__
>> +movsr0, xxl
>> +bl  __clzsi2
>> +addsr0, r0, #32
>> +b 2f
>> +1:
>> +bl  __clzsi2
>> +#  else
>> +bl  __clzsi2
>> +addsr0, r0, #32
>> +b 2f
>> +1:
>> +movsr0, xxh
>> +bl  __clzsi2
>> +#  endif
>> +2:
>> +pop {r4, pc}
>> +# else /* NOT_ISA_TARGET_32BIT */
>> +ARM_FUNC_START clzdi2
>> +do_push {r4, lr}
>> +cmp xxh, #0
>> +bne 1f
>> +#  ifdef __ARMEB__
>> +mov r0, xxl
>> +bl  __clzsi2
>> +add r0, r0, #32
>> +b 2f
>> +1:
>> +bl  __clzsi2
>> +#  else
>> +bl  __clzsi2
>> +add r0, r0, #32
>> +b 2f
>> +1:
>> +mov r0, xxh
>> +bl  __clzsi2
>> +#  endif
>> +2:
>> +RETLDM  r4
>> +FUNC_END clzdi2
>> +# endif /* NOT_ISA_TARGET_32BIT */
>> +
>> +#else /* defined (__ARM_FEATURE_CLZ) */
>> +
>> +ARM_FUNC_START clzdi2
>> +cmp xxh, #0
>> +do_it   eq, et
>> +clzeq   r0, xxl
>> +clzne   r0, xxh
>> +addeq   r0, r0, #32
>> +RET
>> +FUNC_END clzdi2
>> +
>> +#endif
>> +#endif /* L_clzdi2 */
>> +
>> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
>> index b4541bae791..f5aa5505e9d 100644
>> --- a/libgcc/config/arm/lib1funcs.S
>> +++ b/libgcc/config/arm/lib1funcs.S
>> @@ -1722,128 +1722,7 @@ LSYM(Lover12):
>>  
>>  #endif /* __symbian__ */
>>  
>> -#ifdef L_clzsi2
>> -#ifdef NOT_ISA_TARGET_32BIT
>> -FUNC_START clzsi2
>> -movsr1, #28
>> -movsr3, #1
>> -lslsr3, r3, #16
>> -cmp r0, r3 /* 0x1 */
>> -bcc 2f
>> -lsrsr0, r0, #16
>> -subsr1, r1, #16
>> -2:  lsrsr3, r3, #8
>> -cmp r0, r3 /* #0x100 */
>> -bcc 2f
>> -lsrsr0, r0, #8
>> -subsr1, r1, #8
>> -2:  lsrsr3, r3, #4
>> -cmp r0, r3 /* #0x10 */
>> -bcc 2f
>> -lsrsr0, r0, #4
>> -subsr1, r1, #4
>> -2:  adr r2, 1f
>> -ldrbr0, [r2, r0]
>> -addsr0, r0, r1
>> -bx lr
>> -.align 2
>> -1:
>> -.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
>> -FUNC_END clzsi2
>> -#else
>> -AR

Re: [PATCH] libstdc++: implement locale support for AIX

2021-01-11 Thread Rainer Orth
Hi Clement,

> Would "Skip if target = aix" be ok, especially for the few that I have
> absolutely no idea why they are failing ?

you should try to avoid such skips if at all possible.  Should the tests
start to PASS in the future, either due to AIX changes or changes in
libstdc++, this would go unnoticed.  Use xfail instead.

Besides, you should file PRs for the failing tests and preferably
mention the relevant PR in the xfail.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] libstdc++: implement locale support for AIX

2021-01-11 Thread Rainer Orth
Hi David, Clement,

> The patch is local to libstdc++ AIX support, so I believe that I can approve 
> it.

have you considered merging the dragonfly and aix trees?  I'm asking
because it seems prudent to try and avoid creating more and more
almost-but-not-quite-similar configurations (Solaris might be able to
use the same code, at least in 11.4 which has XPG7 support).

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH v4 04/29] Refactor 64-bit shift functions into a new file.

2021-01-11 Thread Richard Earnshaw via Gcc-patches
On 11/01/2021 11:10, g...@danielengel.com wrote:
> From: Daniel Engel 
> 
> gcc/libgcc/ChangeLog:
> 2021-01-07 Daniel Engel 
> 
>   * config/arm/lib1funcs.S: Move __ashldi3(), __ashrdi3(), __lshldi3() to
>   * config/arm/bits/shift.S: New file.

Again, drop the bits/

R.
> ---
>  libgcc/config/arm/bits/shift.S | 102 
>  libgcc/config/arm/lib1funcs.S  | 103 +
>  2 files changed, 103 insertions(+), 102 deletions(-)
>  create mode 100644 libgcc/config/arm/bits/shift.S
> 
> diff --git a/libgcc/config/arm/bits/shift.S b/libgcc/config/arm/bits/shift.S
> new file mode 100644
> index 000..94e466ac0d2
> --- /dev/null
> +++ b/libgcc/config/arm/bits/shift.S
> @@ -0,0 +1,102 @@
> +
> +#ifdef L_lshrdi3
> +
> + FUNC_START lshrdi3
> + FUNC_ALIAS aeabi_llsr lshrdi3
> + 
> +#ifdef __thumb__
> + lsrsal, r2
> + movsr3, ah
> + lsrsah, r2
> + mov ip, r3
> + subsr2, #32
> + lsrsr3, r2
> + orrsal, r3
> + negsr2, r2
> + mov r3, ip
> + lslsr3, r2
> + orrsal, r3
> + RET
> +#else
> + subsr3, r2, #32
> + rsb ip, r2, #32
> + movmi   al, al, lsr r2
> + movpl   al, ah, lsr r3
> + orrmi   al, al, ah, lsl ip
> + mov ah, ah, lsr r2
> + RET
> +#endif
> + FUNC_END aeabi_llsr
> + FUNC_END lshrdi3
> +
> +#endif
> + 
> +#ifdef L_ashrdi3
> + 
> + FUNC_START ashrdi3
> + FUNC_ALIAS aeabi_lasr ashrdi3
> + 
> +#ifdef __thumb__
> + lsrsal, r2
> + movsr3, ah
> + asrsah, r2
> + subsr2, #32
> + @ If r2 is negative at this point the following step would OR
> + @ the sign bit into all of AL.  That's not what we want...
> + bmi 1f
> + mov ip, r3
> + asrsr3, r2
> + orrsal, r3
> + mov r3, ip
> +1:
> + negsr2, r2
> + lslsr3, r2
> + orrsal, r3
> + RET
> +#else
> + subsr3, r2, #32
> + rsb ip, r2, #32
> + movmi   al, al, lsr r2
> + movpl   al, ah, asr r3
> + orrmi   al, al, ah, lsl ip
> + mov ah, ah, asr r2
> + RET
> +#endif
> +
> + FUNC_END aeabi_lasr
> + FUNC_END ashrdi3
> +
> +#endif
> +
> +#ifdef L_ashldi3
> +
> + FUNC_START ashldi3
> + FUNC_ALIAS aeabi_llsl ashldi3
> + 
> +#ifdef __thumb__
> + lslsah, r2
> + movsr3, al
> + lslsal, r2
> + mov ip, r3
> + subsr2, #32
> + lslsr3, r2
> + orrsah, r3
> + negsr2, r2
> + mov r3, ip
> + lsrsr3, r2
> + orrsah, r3
> + RET
> +#else
> + subsr3, r2, #32
> + rsb ip, r2, #32
> + movmi   ah, ah, lsl r2
> + movpl   ah, al, lsl r3
> + orrmi   ah, ah, al, lsr ip
> + mov al, al, lsl r2
> + RET
> +#endif
> + FUNC_END aeabi_llsl
> + FUNC_END ashldi3
> +
> +#endif
> +
> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
> index acafff62448..c7a3b85bf2b 100644
> --- a/libgcc/config/arm/lib1funcs.S
> +++ b/libgcc/config/arm/lib1funcs.S
> @@ -1618,108 +1618,7 @@ LSYM(Lover12):
>  
>  /* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
>  #ifndef __symbian__
> -
> -#ifdef L_lshrdi3
> -
> - FUNC_START lshrdi3
> - FUNC_ALIAS aeabi_llsr lshrdi3
> - 
> -#ifdef __thumb__
> - lsrsal, r2
> - movsr3, ah
> - lsrsah, r2
> - mov ip, r3
> - subsr2, #32
> - lsrsr3, r2
> - orrsal, r3
> - negsr2, r2
> - mov r3, ip
> - lslsr3, r2
> - orrsal, r3
> - RET
> -#else
> - subsr3, r2, #32
> - rsb ip, r2, #32
> - movmi   al, al, lsr r2
> - movpl   al, ah, lsr r3
> - orrmi   al, al, ah, lsl ip
> - mov ah, ah, lsr r2
> - RET
> -#endif
> - FUNC_END aeabi_llsr
> - FUNC_END lshrdi3
> -
> -#endif
> - 
> -#ifdef L_ashrdi3
> - 
> - FUNC_START ashrdi3
> - FUNC_ALIAS aeabi_lasr ashrdi3
> - 
> -#ifdef __thumb__
> - lsrsal, r2
> - movsr3, ah
> - asrsah, r2
> - subsr2, #32
> - @ If r2 is negative at this point the following step would OR
> - @ the sign bit into all of AL.  That's not what we want...
> - bmi 1f
> - mov ip, r3
> - asrsr3, r2
> - orrsal, r3
> - mov r3, ip
> -1:
> - negsr2, r2
> - lslsr3, r2
> - orrsal, r3
> - RET
> -#else
> - subsr3, r2, #32
> - rsb ip, r2, #32
> - movmi   al, al, lsr r2
> - movpl   al, ah, asr r3
> - orrmi   al, al, ah, lsl ip
> - mov ah, ah, asr r2
> - RET
> -#endif
> -
> - FUNC_END aeabi_lasr
> - FUNC_END ashrdi3
> -
> -#endif
> -
> -#ifdef L_ashldi3
> -
> - FUNC_START ashldi3
> - FUNC_ALIAS aeabi_llsl ashldi3
> - 
> -#ifdef __thumb__
> - lslsah, r2
> - movsr3, al
> - lslsal, r2
> -

Re: [PATCH v4 03/29] Refactor 'ctz' functions into a new file.

2021-01-11 Thread Richard Earnshaw via Gcc-patches
On 11/01/2021 11:10, g...@danielengel.com wrote:
> From: Daniel Engel 
> 
> gcc/libgcc/ChangeLog:
> 2021-01-07 Daniel Engel 
> 
>   * config/arm/lib1funcs.S: Move __ctzsi2() to
>   * config/arm/bits/ctz2.S: New file.

Similar to the previous patch, just use ctz.S without the bits subdir.

> ---
>  libgcc/config/arm/bits/ctz2.S | 65 +++
>  libgcc/config/arm/lib1funcs.S | 65 +--
>  2 files changed, 66 insertions(+), 64 deletions(-)
>  create mode 100644 libgcc/config/arm/bits/ctz2.S
> 
> diff --git a/libgcc/config/arm/bits/ctz2.S b/libgcc/config/arm/bits/ctz2.S
> new file mode 100644
> index 000..f0422d1fbba
> --- /dev/null
> +++ b/libgcc/config/arm/bits/ctz2.S
> @@ -0,0 +1,65 @@
> +
> +#ifdef L_ctzsi2
> +#ifdef NOT_ISA_TARGET_32BIT
> +FUNC_START ctzsi2
> + negsr1, r0
> + andsr0, r0, r1
> + movsr1, #28
> + movsr3, #1
> + lslsr3, r3, #16
> + cmp r0, r3 /* 0x1 */
> + bcc 2f
> + lsrsr0, r0, #16
> + subsr1, r1, #16
> +2:   lsrsr3, r3, #8
> + cmp r0, r3 /* #0x100 */
> + bcc 2f
> + lsrsr0, r0, #8
> + subsr1, r1, #8
> +2:   lsrsr3, r3, #4
> + cmp r0, r3 /* #0x10 */
> + bcc 2f
> + lsrsr0, r0, #4
> + subsr1, r1, #4
> +2:   adr r2, 1f
> + ldrbr0, [r2, r0]
> + subsr0, r0, r1
> + bx lr
> +.align 2
> +1:
> +.byte27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
> + FUNC_END ctzsi2
> +#else
> +ARM_FUNC_START ctzsi2
> + rsb r1, r0, #0
> + and r0, r0, r1
> +# if defined (__ARM_FEATURE_CLZ)
> + clz r0, r0
> + rsb r0, r0, #31
> + RET
> +# else
> + mov r1, #28
> + cmp r0, #0x1
> + do_it   cs, t
> + movcs   r0, r0, lsr #16
> + subcs   r1, r1, #16
> + cmp r0, #0x100
> + do_it   cs, t
> + movcs   r0, r0, lsr #8
> + subcs   r1, r1, #8
> + cmp r0, #0x10
> + do_it   cs, t
> + movcs   r0, r0, lsr #4
> + subcs   r1, r1, #4
> + adr r2, 1f
> + ldrbr0, [r2, r0]
> + sub r0, r0, r1
> + RET
> +.align 2
> +1:
> +.byte27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
> +# endif /* !defined (__ARM_FEATURE_CLZ) */
> + FUNC_END ctzsi2
> +#endif
> +#endif /* L_clzsi2 */
> +
> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
> index f5aa5505e9d..acafff62448 100644
> --- a/libgcc/config/arm/lib1funcs.S
> +++ b/libgcc/config/arm/lib1funcs.S
> @@ -1723,70 +1723,7 @@ LSYM(Lover12):
>  #endif /* __symbian__ */
>  
>  #include "bits/clz2.S"
> -
> -#ifdef L_ctzsi2
> -#ifdef NOT_ISA_TARGET_32BIT
> -FUNC_START ctzsi2
> - negsr1, r0
> - andsr0, r0, r1
> - movsr1, #28
> - movsr3, #1
> - lslsr3, r3, #16
> - cmp r0, r3 /* 0x1 */
> - bcc 2f
> - lsrsr0, r0, #16
> - subsr1, r1, #16
> -2:   lsrsr3, r3, #8
> - cmp r0, r3 /* #0x100 */
> - bcc 2f
> - lsrsr0, r0, #8
> - subsr1, r1, #8
> -2:   lsrsr3, r3, #4
> - cmp r0, r3 /* #0x10 */
> - bcc 2f
> - lsrsr0, r0, #4
> - subsr1, r1, #4
> -2:   adr r2, 1f
> - ldrbr0, [r2, r0]
> - subsr0, r0, r1
> - bx lr
> -.align 2
> -1:
> -.byte27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
> - FUNC_END ctzsi2
> -#else
> -ARM_FUNC_START ctzsi2
> - rsb r1, r0, #0
> - and r0, r0, r1
> -# if defined (__ARM_FEATURE_CLZ)
> - clz r0, r0
> - rsb r0, r0, #31
> - RET
> -# else
> - mov r1, #28
> - cmp r0, #0x1
> - do_it   cs, t
> - movcs   r0, r0, lsr #16
> - subcs   r1, r1, #16
> - cmp r0, #0x100
> - do_it   cs, t
> - movcs   r0, r0, lsr #8
> - subcs   r1, r1, #8
> - cmp r0, #0x10
> - do_it   cs, t
> - movcs   r0, r0, lsr #4
> - subcs   r1, r1, #4
> - adr r2, 1f
> - ldrbr0, [r2, r0]
> - sub r0, r0, r1
> - RET
> -.align 2
> -1:
> -.byte27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
> -# endif /* !defined (__ARM_FEATURE_CLZ) */
> - FUNC_END ctzsi2
> -#endif
> -#endif /* L_clzsi2 */
> +#include "bits/ctz2.S"
>  
>  /*  
> */
>  /* These next two sections are here despite the fact that they contain Thumb 
> 



Re: [PATCH v4 02/29] Refactor 'clz' functions into a new file.

2021-01-11 Thread Richard Earnshaw via Gcc-patches
On 11/01/2021 11:10, g...@danielengel.com wrote:
> From: Daniel Engel 
> 
> gcc/libgcc/ChangeLog:
> 2021-01-07 Daniel Engel 
> 
>   * config/arm/lib1funcs.S: Move __clzsi2() and __clzdi2() to
>   * config/arm/bits/clz2.S: New file.

No, please don't push these down into a subdirectory.  They do not
represent a clear subfunctional distinction, so creating a load of disk
hierarcy is just confusing.  Just put the code in config/arm/clz.S

Otherwise this is just a re-org, so it's OK.

R.

> ---
>  libgcc/config/arm/bits/clz2.S | 124 ++
>  libgcc/config/arm/lib1funcs.S | 123 +
>  2 files changed, 125 insertions(+), 122 deletions(-)
>  create mode 100644 libgcc/config/arm/bits/clz2.S
> 
> diff --git a/libgcc/config/arm/bits/clz2.S b/libgcc/config/arm/bits/clz2.S
> new file mode 100644
> index 000..1c8f10a5b29
> --- /dev/null
> +++ b/libgcc/config/arm/bits/clz2.S
> @@ -0,0 +1,124 @@
> +
> +#ifdef L_clzsi2
> +#ifdef NOT_ISA_TARGET_32BIT
> +FUNC_START clzsi2
> + movsr1, #28
> + movsr3, #1
> + lslsr3, r3, #16
> + cmp r0, r3 /* 0x1 */
> + bcc 2f
> + lsrsr0, r0, #16
> + subsr1, r1, #16
> +2:   lsrsr3, r3, #8
> + cmp r0, r3 /* #0x100 */
> + bcc 2f
> + lsrsr0, r0, #8
> + subsr1, r1, #8
> +2:   lsrsr3, r3, #4
> + cmp r0, r3 /* #0x10 */
> + bcc 2f
> + lsrsr0, r0, #4
> + subsr1, r1, #4
> +2:   adr r2, 1f
> + ldrbr0, [r2, r0]
> + addsr0, r0, r1
> + bx lr
> +.align 2
> +1:
> +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
> + FUNC_END clzsi2
> +#else
> +ARM_FUNC_START clzsi2
> +# if defined (__ARM_FEATURE_CLZ)
> + clz r0, r0
> + RET
> +# else
> + mov r1, #28
> + cmp r0, #0x1
> + do_it   cs, t
> + movcs   r0, r0, lsr #16
> + subcs   r1, r1, #16
> + cmp r0, #0x100
> + do_it   cs, t
> + movcs   r0, r0, lsr #8
> + subcs   r1, r1, #8
> + cmp r0, #0x10
> + do_it   cs, t
> + movcs   r0, r0, lsr #4
> + subcs   r1, r1, #4
> + adr r2, 1f
> + ldrbr0, [r2, r0]
> + add r0, r0, r1
> + RET
> +.align 2
> +1:
> +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
> +# endif /* !defined (__ARM_FEATURE_CLZ) */
> + FUNC_END clzsi2
> +#endif
> +#endif /* L_clzsi2 */
> +
> +#ifdef L_clzdi2
> +#if !defined (__ARM_FEATURE_CLZ)
> +
> +# ifdef NOT_ISA_TARGET_32BIT
> +FUNC_START clzdi2
> + push{r4, lr}
> + cmp xxh, #0
> + bne 1f
> +#  ifdef __ARMEB__
> + movsr0, xxl
> + bl  __clzsi2
> + addsr0, r0, #32
> + b 2f
> +1:
> + bl  __clzsi2
> +#  else
> + bl  __clzsi2
> + addsr0, r0, #32
> + b 2f
> +1:
> + movsr0, xxh
> + bl  __clzsi2
> +#  endif
> +2:
> + pop {r4, pc}
> +# else /* NOT_ISA_TARGET_32BIT */
> +ARM_FUNC_START clzdi2
> + do_push {r4, lr}
> + cmp xxh, #0
> + bne 1f
> +#  ifdef __ARMEB__
> + mov r0, xxl
> + bl  __clzsi2
> + add r0, r0, #32
> + b 2f
> +1:
> + bl  __clzsi2
> +#  else
> + bl  __clzsi2
> + add r0, r0, #32
> + b 2f
> +1:
> + mov r0, xxh
> + bl  __clzsi2
> +#  endif
> +2:
> + RETLDM  r4
> + FUNC_END clzdi2
> +# endif /* NOT_ISA_TARGET_32BIT */
> +
> +#else /* defined (__ARM_FEATURE_CLZ) */
> +
> +ARM_FUNC_START clzdi2
> + cmp xxh, #0
> + do_it   eq, et
> + clzeq   r0, xxl
> + clzne   r0, xxh
> + addeq   r0, r0, #32
> + RET
> + FUNC_END clzdi2
> +
> +#endif
> +#endif /* L_clzdi2 */
> +
> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
> index b4541bae791..f5aa5505e9d 100644
> --- a/libgcc/config/arm/lib1funcs.S
> +++ b/libgcc/config/arm/lib1funcs.S
> @@ -1722,128 +1722,7 @@ LSYM(Lover12):
>  
>  #endif /* __symbian__ */
>  
> -#ifdef L_clzsi2
> -#ifdef NOT_ISA_TARGET_32BIT
> -FUNC_START clzsi2
> - movsr1, #28
> - movsr3, #1
> - lslsr3, r3, #16
> - cmp r0, r3 /* 0x1 */
> - bcc 2f
> - lsrsr0, r0, #16
> - subsr1, r1, #16
> -2:   lsrsr3, r3, #8
> - cmp r0, r3 /* #0x100 */
> - bcc 2f
> - lsrsr0, r0, #8
> - subsr1, r1, #8
> -2:   lsrsr3, r3, #4
> - cmp r0, r3 /* #0x10 */
> - bcc 2f
> - lsrsr0, r0, #4
> - subsr1, r1, #4
> -2:   adr r2, 1f
> - ldrbr0, [r2, r0]
> - addsr0, r0, r1
> - bx lr
> -.align 2
> -1:
> -.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
> - FUNC_END clzsi2
> -#else
> -ARM_FUNC_START clzsi2
> -# if defined (__ARM_FEATURE_CLZ)
> - clz r0, r0
> - RET
> -# else
> - mov r1, #28
> - cmp r0, #0x1
> - do_it   cs, t
> - movcs   r0, r0, lsr #16
> - subcs   r1, r1, #16
> - cmp r0, #0x100
> - do_it   cs, t

Re: [PATCH] libstdc++: implement locale support for AIX

2021-01-11 Thread CHIGOT, CLEMENT via Gcc-patches
Hi David,

Alright, I'll see what I can do.
Would "Skip if target = aix" be ok, especially for the few that I have
absolutely no idea why they are failing ?

Clément

From: David Edelsohn 
Sent: Monday, January 11, 2021 4:03 PM
To: CHIGOT, CLEMENT 
Cc: gcc-patches@gcc.gnu.org 
Subject: Re: [PATCH] libstdc++: implement locale support for AIX

Caution! External email. Do not open attachments or click links, unless this 
email comes from a known sender and you know the content is safe.

Hi, Clement

The patch is local to libstdc++ AIX support, so I believe that I can approve it.

libstdc++ loads the gcc testsuite target-supports.exp extensions. The
patch needs to update the libstdc++ testcases to require wchar32 or
utf-8 as appropriate to avoid the failures.

Thanks, David

On Mon, Jan 11, 2021 at 4:14 AM CHIGOT, CLEMENT  wrote:
>
> Hi everyone,
>
> This patch adds a partial locale support in libstdc++
> for AIX.
> The message_members part isn't yet implemented.
>
> The tests have been launched in AIX7.2 and AIX7.1.
> There are a few new tests failing (~20), but most of them
> weren't launched before and they don't seem too much
> related to locale itself. Some are failing in 32bit because
> wchar_t is 16bit on AIX. Some others are failing because
> of some slightly AIX differences.
>
> Please apply for me if approved.
> If possible, could it be backported as is in gcc-10, gcc-9
> and maybe gcc-8 branch ?
> For gcc-8 branch, there is a few changes to be made in
> check_v3_target_namedlocale where the indentation isn't
> the same, and the configure needs to be regenerated too.
> I've already made the patch if needed.
>
> Clément Chigot
> ATOS Bull SAS
> 1 rue de Provence - 38432 Échirolles - France
>


Re: [PATCH v4 01/29] Add and organize macros.

2021-01-11 Thread Richard Earnshaw via Gcc-patches
Some initial comments.

On 11/01/2021 11:10, g...@danielengel.com wrote:
> From: Daniel Engel 
> 
> These definitions facilitate subsequent patches in this series.
> 
> gcc/libgcc/ChangeLog:
> 2021-01-07 Daniel Engel 
> 
>   * config/arm/t-elf: Organize functions into logical groups.
>   * config/arm/lib1funcs.S: Add FUNC_START macro variations for
>   weak functions and manual control of the target section;
>   rename THUMB_FUNC_START as THUMB_FUNC_ENTRY for consistency;
>   removed unused macros THUMB_SYNTAX, ARM_SYM_START, SYM_END;
>   removed redundant syntax directives.

This needs to be re-formatted using the correct ChangeLog style, which
is in most cases

*  (): .

You can repeat for multiple functions in the same file, but leave off
the "* " part as long as they are contiguous in the log.


> ---
>  libgcc/config/arm/lib1funcs.S | 114 +++---
>  libgcc/config/arm/t-elf   |  55 +---
>  2 files changed, 110 insertions(+), 59 deletions(-)
> 
> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
> index c2fcfc503ec..b4541bae791 100644
> --- a/libgcc/config/arm/lib1funcs.S
> +++ b/libgcc/config/arm/lib1funcs.S
> @@ -69,11 +69,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
> If not, see
>  #define TYPE(x) .type SYM(x),function
>  #define SIZE(x) .size SYM(x), . - SYM(x)
>  #define LSYM(x) .x
> +#define LLSYM(x) .L##x
>  #else
>  #define __PLT__
>  #define TYPE(x)
>  #define SIZE(x)
>  #define LSYM(x) x
> +#define LLSYM(x) x
>  #endif

I can live with this.

>  
>  /* Function end macros.  Variants for interworking.  */
> @@ -247,6 +249,14 @@ LSYM(Lend_fde):
>  
>  #define COND(op1, op2, cond) op1 ## op2 ## cond
>  
> +#ifdef __ARM_FEATURE_IT
> +  #define IT(ins,c) ins##c
> +#else
> +  // Assume default Thumb-1 flags-affecting suffix 's'.
> +  // Almost all instructions require this in unified syntax.
> +  #define IT(ins,c) ins##s

This simply doesn't make much sense, at least, not enough to make it
generally available.  It seems it would be invariably wrong to replace a
conditional instruction in arm/thumb2 code with a non-conditional flag
setting instruction in thumb1.  So please don't do this as it's likely
to be a source of bugs going forwards if folk don't understand exactly
when it is safe.

> +#endif
> +
>  #ifdef __ARM_EABI__
>  .macro ARM_LDIV0 name signed
>   cmp r0, #0
> @@ -280,7 +290,6 @@ LSYM(Lend_fde):
>   pop {r1, pc}
>  
>  #elif defined(__thumb2__)
> - .syntax unified

OK - I think this is now set unconditionally at the top level.

>   .ifc \signed, unsigned
>   cbz r0, 1f
>   mov r0, #0x
> @@ -324,10 +333,6 @@ LSYM(Lend_fde):
>  .endm
>  #endif
>  
> -.macro FUNC_END name
> - SIZE (__\name)
> -.endm
> -

Moved later, OK.

>  .macro DIV_FUNC_END name signed
>   cfi_start   __\name, LSYM(Lend_div0)
>  LSYM(Ldiv0):
> @@ -340,48 +345,64 @@ LSYM(Ldiv0):
>   FUNC_END \name
>  .endm
>  
> -.macro THUMB_FUNC_START name
> - .globl  SYM (\name)
> - TYPE(\name)
> - .thumb_func
> -SYM (\name):
> -.endm

I'm not really sure what you reasoning is for removing the *FUNC_START
macros and then adding almost identical macros of the form *FUNC_ENTRY.
 It seems to me like unnecessary churn.

> -
>  /* Function start macros.  Variants for ARM and Thumb.  */
> -
>  #ifdef __thumb__
>  #define THUMB_FUNC .thumb_func
>  #define THUMB_CODE .force_thumb
> -# if defined(__thumb2__)
> -#define THUMB_SYNTAX
> -# else
> -#define THUMB_SYNTAX
> -# endif
>  #else
>  #define THUMB_FUNC
>  #define THUMB_CODE
> -#define THUMB_SYNTAX

OK.  Dead code.

>  #endif
>  
> -.macro FUNC_START name
> - .text
> +.macro THUMB_FUNC_ENTRY name
> + .globl  SYM (\name)
> + TYPE(\name)
> + .force_thumb
> + .thumb_func
> +SYM (\name):
> +.endm
> +
> +/* Strong global export, no section change. */
> +.macro FUNC_ENTRY name
>   .globl SYM (__\name)
>   TYPE (__\name)
> - .align 0

It's often wrong to assume the section is correctly aligned on entry -
so removing this is suspect, unless you're adding explicit alignment
before every function instance.  I know the issue of litterals in the
code segments is currently still under discussion; but if they do exist,
they have to be 32-bit aligned because the thumb1 adr instruction will
not work correctly otherwise.  Arm code must always be (at least) 32-bit
aligned, and thumb code 16-bit - but there are often significant
performance wins from being more aligned than that on entry.


>   THUMB_CODE
>   THUMB_FUNC
> - THUMB_SYNTAX
>  SYM (__\name):
>  .endm
>  
> -.macro ARM_SYM_START name
> -   TYPE (\name)
> -   .align 0
> -SYM (\name):
> +/* Weak global export, no section change. */
> +.macro WEAK_ENTRY name
> + .weak SYM(__\name)
> + FUNC_ENTRY \name
> +.endm
> +
> +/* Strong global export, explicit section. */
> +.macro FUNC_START_SECTION

Re: [PATCH] libstdc++: implement locale support for AIX

2021-01-11 Thread David Edelsohn via Gcc-patches
Hi, Clement

The patch is local to libstdc++ AIX support, so I believe that I can approve it.

libstdc++ loads the gcc testsuite target-supports.exp extensions. The
patch needs to update the libstdc++ testcases to require wchar32 or
utf-8 as appropriate to avoid the failures.

Thanks, David

On Mon, Jan 11, 2021 at 4:14 AM CHIGOT, CLEMENT  wrote:
>
> Hi everyone,
>
> This patch adds a partial locale support in libstdc++
> for AIX.
> The message_members part isn't yet implemented.
>
> The tests have been launched in AIX7.2 and AIX7.1.
> There are a few new tests failing (~20), but most of them
> weren't launched before and they don't seem too much
> related to locale itself. Some are failing in 32bit because
> wchar_t is 16bit on AIX. Some others are failing because
> of some slightly AIX differences.
>
> Please apply for me if approved.
> If possible, could it be backported as is in gcc-10, gcc-9
> and maybe gcc-8 branch ?
> For gcc-8 branch, there is a few changes to be made in
> check_v3_target_namedlocale where the indentation isn't
> the same, and the configure needs to be regenerated too.
> I've already made the patch if needed.
>
> Clément Chigot
> ATOS Bull SAS
> 1 rue de Provence - 38432 Échirolles - France
>


Re: [PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2021-01-11 Thread Kwok Cheung Yeung

Hello

Thanks for the review. Due to the Christmas holidays I have not finished 
addressing all these issues yet, but I expect to be done by the end of this 
week. Can this patch still make it for GCC 10, as I believe stage 4 is starting 
soon?


Thanks

Kwok

On 10/12/2020 2:38 pm, Jakub Jelinek wrote:

On Wed, Dec 09, 2020 at 05:37:24PM +, Kwok Cheung Yeung wrote:

--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -14942,6 +14942,11 @@ c_finish_omp_clauses (tree clauses, enum 
c_omp_region_type ort)
  pc = &OMP_CLAUSE_CHAIN (c);
  continue;
  
+	case OMP_CLAUSE_DETACH:

+ t = OMP_CLAUSE_DECL (c);
+ pc = &OMP_CLAUSE_CHAIN (c);
+ continue;
+


If you wouldn't need to do anything for C for the detach clause, just would
just add:
case OMP_CLAUSE_DETACH:
at the end of the case list that starts below:

case OMP_CLAUSE_IF:
case OMP_CLAUSE_NUM_THREADS:
case OMP_CLAUSE_NUM_TEAMS:


But you actually do need to do something, even for C.

There are two restrictions:
- At most one detach clause can appear on the directive.
- If a detach clause appears on the directive, then a mergeable clause cannot 
appear on the same directive.
that should be checked and diagnosed.  One place to do that would be
like usually in all the FEs separately, that would mean adding
   bool mergeable_seen = false, detach_seen = false;
vars and for those clauses setting the *_seen, plus for DETACH
already complain if detach_seen is already true and remove the clause.
And at the end of the loop if mergeable_seen && detach_seen, diagnose
and remove one of them (perhaps better detach clause).
There is the optional second loop that can be used for the removal...

Testcase coverage should include:
   #pragma omp task detach (x) detach (y)
as well as
   #pragma omp task mergeable detach (x)
and
   #pragma omp task detach (x) mergeable
(and likewise for Fortran).


+  if (cp_lexer_next_token_is_not (parser->lexer, CPP_NAME))
+{
+  cp_parser_error (parser, "expected identifier");
+  return list;
+}
+
+  location_t id_loc = cp_lexer_peek_token (parser->lexer)->location;
+  tree t, identifier = cp_parser_identifier (parser);
+
+  if (identifier == error_mark_node)
+t = error_mark_node;
+  else
+{
+  t = cp_parser_lookup_name_simple
+   (parser, identifier,
+cp_lexer_peek_token (parser->lexer)->location);
+  if (t == error_mark_node)
+   cp_parser_name_lookup_error (parser, identifier, t, NLE_NULL,
+id_loc);


The above doesn't match what cp_parser_omp_var_list_no_open does,
in particular it should use cp_parser_id_expression
instead of cp_parser_identifier etc.


+  else
+   {
+ tree type = TYPE_MAIN_VARIANT (TREE_TYPE (t));
+ if (!INTEGRAL_TYPE_P (type)
+ || TREE_CODE (type) != ENUMERAL_TYPE
+ || DECL_NAME (TYPE_NAME (type))
+  != get_identifier ("omp_event_handle_t"))
+   {
+ error_at (id_loc, "% clause event handle "
+   "has type %qT rather than "
+   "%",
+   type);
+ return list;


You can't do this here for C++, it needs to be done in finish_omp_clauses
instead and only be done if the type is not a dependent type.
Consider (e.g. should be in testsuite)
template 
void
foo ()
{
   T t;
   #pragma omp task detach (t)
   ;
}

template 
void
bar ()
{
   T t;
   #pragma omp task detach (t)
   ;
}

void
baz ()
{
   foo  ();
   bar  (); // Instantiating this should error
}


@@ -7394,6 +7394,9 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type 
ort)
}
}
  break;
+   case OMP_CLAUSE_DETACH:
+ t = OMP_CLAUSE_DECL (c);
+ break;
  


Again, restriction checking here, plus check the type if it is
non-dependent, otherwise defer that checking for finish_omp_clauses when
it will not be dependent anymore.

I think you need to handle OMP_CLAUSE_DETACH in cp/pt.c too.


--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -9733,6 +9733,19 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
*pre_p,
}
  break;
  
+	case OMP_CLAUSE_DETACH:

+ decl = OMP_CLAUSE_DECL (c);
+ if (outer_ctx)
+   {
+ splay_tree_node on
+   = splay_tree_lookup (outer_ctx->variables,
+(splay_tree_key)decl);
+ if (on == NULL || (on->value & GOVD_DATA_SHARE_CLASS) == 0)
+   omp_firstprivatize_variable (outer_ctx, decl);
+ omp_notice_variable (outer_ctx, decl, true);
+   }
+ break;


I don't understand this.  My reading of:
"The event-handle will be considered as if it was specified on a
firstprivate clause. The use of a variable in a detach clause expression of a 
task
construct causes an implicit reference to the variable in all enclosing
constructs

Re: [PATCH 1/3] GCC: Pass --plugin to AR and RANLIB

2021-01-11 Thread Luis Machado via Gcc-patches

This seems to have broken the builds on AArch64-Linux Ubuntu 18.04.

make[2]: Entering directory 'binutils-gdb-master-bionic/libiberty'
rm -f ./libiberty.a pic/./libiberty.a noasan/./libiberty.a
ar --plugin /usr/lib/gcc/aarch64-linux-gnu/7/liblto_plugin.so rc 
./libiberty.a \
  ./regex.o ./cplus-dem.o ./cp-demangle.o ./md5.o ./sha1.o ./alloca.o 
./argv.o ./bsearch_r.o ./choose-temp.o ./concat.o ./cp-demint.o 
./crc32.o ./d-demangle.o ./dwarfnames.o ./dyn-string.o ./fdmatch.o 
./fibheap.o ./filedescriptor.o ./filename_cmp.o ./floatformat.o 
./fnmatch.o ./fopen_unlocked.o ./getopt.o ./getopt1.o ./getpwd.o 
./getruntime.o ./hashtab.o ./hex.o ./lbasename.o ./lrealpath.o 
./make-relative-prefix.o ./make-temp-file.o ./objalloc.o ./obstack.o 
./partition.o ./pexecute.o ./physmem.o ./pex-common.o ./pex-one.o 
./pex-unix.o ./vprintf-support.o ./rust-demangle.o ./safe-ctype.o 
./simple-object.o ./simple-object-coff.o ./simple-object-elf.o 
./simple-object-mach-o.o ./simple-object-xcoff.o ./sort.o ./spaces.o 
./splay-tree.o ./stack-limit.o ./strerror.o ./strsignal.o 
./timeval-utils.o ./unlink-if-ordinary.o ./xasprintf.o ./xatexit.o 
./xexit.o ./xmalloc.o ./xmemdup.o ./xstrdup.o ./xstrerror.o ./xstrndup.o 
./xvasprintf.o  ./setproctitle.o

ar: no operation specified
Makefile:252: recipe for target 'libiberty.a' failed
make[2]: *** [libiberty.a] Error 1

Reverting that patch makes the build OK again.

On 10/29/20 4:11 PM, H.J. Lu via Binutils wrote:

Detect GCC LTO plugin.  Pass --plugin to AR and RANLIB to support LTO
build.

* Makefile.tpl (AR): Add @AR_PLUGIN_OPTION@
(RANLIB): Add @RANLIB_PLUGIN_OPTION@.
* configure.ac: Include config/gcc-plugin.m4.
AC_SUBST AR_PLUGIN_OPTION and RANLIB_PLUGIN_OPTION.
* libtool.m4 (_LT_CMD_OLD_ARCHIVE): Pass --plugin to AR and
RANLIB if possible.
* Makefile.in: Regenerated.
* configure: Likewise.

config/

* gcc-plugin.m4 (GCC_PLUGIN_OPTION): New.

libiberty/

* Makefile.in (AR): Add @AR_PLUGIN_OPTION@
(RANLIB): Add @RANLIB_PLUGIN_OPTION@.
(configure_deps): Depend on ../config/gcc-plugin.m4.
* aclocal.m4: Include ../config/gcc-plugin.m4.
* configure.ac: AC_SUBST AR_PLUGIN_OPTION and
RANLIB_PLUGIN_OPTION.
* configure: Regenerated.

zlib/

* configure: Regenerated.
---
  Makefile.in|   5 +-
  Makefile.tpl   |   5 +-
  config/gcc-plugin.m4   |  28 ++
  configure  |  39 
  configure.ac   |  15 +++
  libiberty/Makefile.in  |   5 +-
  libiberty/aclocal.m4   |   1 +
  libiberty/configure|  37 
  libiberty/configure.ac |  12 +++
  libtool.m4 |  25 -
  zlib/configure | 206 -
  11 files changed, 368 insertions(+), 10 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index fe34132f9e..978e777338 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -387,7 +387,7 @@ MAKEINFOFLAGS = --split-size=500
  # -
  
  AS = @AS@

-AR = @AR@
+AR = @AR@ @AR_PLUGIN_OPTION@
  AR_FLAGS = rc
  CC = @CC@
  CXX = @CXX@
@@ -396,7 +396,7 @@ LD = @LD@
  LIPO = @LIPO@
  NM = @NM@
  OBJDUMP = @OBJDUMP@
-RANLIB = @RANLIB@
+RANLIB = @RANLIB@ @RANLIB_PLUGIN_OPTION@
  READELF = @READELF@
  STRIP = @STRIP@
  WINDRES = @WINDRES@
@@ -52633,6 +52633,7 @@ AUTOCONF = autoconf
  $(srcdir)/configure: @MAINT@ $(srcdir)/configure.ac $(srcdir)/config/acx.m4 \
$(srcdir)/config/override.m4 $(srcdir)/config/proginstall.m4 \
$(srcdir)/config/elf.m4 $(srcdir)/config/isl.m4 \
+   $(srcdir)/config/gcc-plugin.m4 \
$(srcdir)/libtool.m4 $(srcdir)/ltoptions.m4 $(srcdir)/ltsugar.m4 \
$(srcdir)/ltversion.m4 $(srcdir)/lt~obsolete.m4
cd $(srcdir) && $(AUTOCONF)
diff --git a/Makefile.tpl b/Makefile.tpl
index 5b118a8ba4..a280a1498c 100644
--- a/Makefile.tpl
+++ b/Makefile.tpl
@@ -390,7 +390,7 @@ MAKEINFOFLAGS = --split-size=500
  # -
  
  AS = @AS@

-AR = @AR@
+AR = @AR@ @AR_PLUGIN_OPTION@
  AR_FLAGS = rc
  CC = @CC@
  CXX = @CXX@
@@ -399,7 +399,7 @@ LD = @LD@
  LIPO = @LIPO@
  NM = @NM@
  OBJDUMP = @OBJDUMP@
-RANLIB = @RANLIB@
+RANLIB = @RANLIB@ @RANLIB_PLUGIN_OPTION@
  READELF = @READELF@
  STRIP = @STRIP@
  WINDRES = @WINDRES@
@@ -1967,6 +1967,7 @@ AUTOCONF = autoconf
  $(srcdir)/configure: @MAINT@ $(srcdir)/configure.ac $(srcdir)/config/acx.m4 \
$(srcdir)/config/override.m4 $(srcdir)/config/proginstall.m4 \
$(srcdir)/config/elf.m4 $(srcdir)/config/isl.m4 \
+   $(srcdir)/config/gcc-plugin.m4 \
$(srcdir)/libtool.m4 $(srcdir)/ltoptions.m4 $(srcdir)/ltsugar.m4 \
$(srcdir)/ltversion.m4 $(srcdir)/lt~obsolete.m4
cd $(srcdir) && $(AUTOCONF)
diff --git a/config/gcc-plugin.m4 b/config/gcc-plugin.m4
index 8f27871911..c5b72e9a13 100644
--- a/config/gcc-plugin.m4
+++ b/config/gcc-plugin.m4
@@ -124,3 +124,31 @@ AC_DEFUN([G

Re: --enable-link-serialization=1 doesn't work

2021-01-11 Thread Matthias Klose
On 1/9/21 7:52 PM, Matthias Klose wrote:
> The attached patch makes the link targets a little bit more verbose. Ok to 
> commit?

approved by Jakub on IRC, checked in.

> It shows that --enable-link-serialization=1 doesn't work:
> 
> $ grep ^Linking ../log
> Linking gnat1 |==--  | 9%
> Linking cc1 |--| 0%
> Linking cc1 |==| 9%
> Linking gnat1 |  | 18%
> Linking brig1 |--| 18%
> Linking brig1 |==| 27%
> Linking cc1plus |==--  | 27%
> Linking cc1plus |  | 36%
> Linking d21 |--| 36%
> Linking d21 |==| 45%
> Linking f951 |==--  | 45%
> Linking f951 |  | 54%
> Linking go1 |--| 54%
> Linking go1 |==| 63%
> Linking lto1 |==--  | 63%
> Linking lto1 |  | 72%
> Linking lto-dump |--| 72%
> Linking lto-dump |==| 81%
> Linking cc1obj |==--  | 81%
> Linking cc1obj |  | 90%
> Linking cc1objplus |--| 90%
> Linking cc1objplus |==| 100%
> 
> At least the gnat1 link is started before the cc1 link finishes.

Jakub has a patch pending for that. Thanks for debugging.

Matthias


Re: [PATCH v3] libgcc: Thumb-1 Floating-Point Library for Cortex M0

2021-01-11 Thread Richard Earnshaw via Gcc-patches
On 09/01/2021 18:04, Daniel Engel wrote:
> On Sat, Jan 9, 2021, at 5:09 AM, Christophe Lyon wrote:
>> On Sat, 9 Jan 2021 at 13:27, Daniel Engel  wrote:
>>>
>>> On Thu, Jan 7, 2021, at 4:56 AM, Richard Earnshaw wrote:
 On 07/01/2021 00:59, Daniel Engel wrote:
> --snip--
>
> On Wed, Jan 6, 2021, at 9:05 AM, Richard Earnshaw wrote:
>
>> -- snip --
>>
>> - finally, your popcount implementations have data in the code segment.
>>  That's going to cause problems when we have compilation options such as
>> -mpure-code.
>
> I am just following the precedent of existing lib1funcs (e.g. __clz2si).
> If this matters, you'll need to point in the right direction for the
> fix.  I'm not sure it does matter, since these functions are PIC anyway.

 That might be a bug in the clz implementations - Christophe: Any thoughts?
>>>
>>> __clzsi2() has test coverage in "gcc.c-torture/execute/builtin-bitops-1.c"
>> Thanks, I'll have a closer look at why I didn't see problems.
>>
>>> The 'clzs' and 'ctz' functions should never have problems.   -mpure-code
>>> appears to be valid only when the 'movt' instruction is available, which
>>> means that the 'clz' instruction will also be available, so no array loads.
>> No, -mpure-code is also supported with v6m.
>>
>>> Is the -mpure-code state detectable as a preprocessor flag?  While
>> No.
>>
>>> 'movw'/'movt' appears to be the canonical solution, I'm not sure it
>>> should be the default just because a processor supports Thumb-2.
>>>
>>> Do users wanting to use -mpure-code recompile the toolchain to avoid
>>> constant data in compiled C functions?  I don't think this is the
>>> default for the typical toolchain scripts.
>> No, users of -mpure-code do not recompile the toolchain.
> 
> I won't claim that my use of inline constants is correct.  It was not
> hard to find references to high security model processors that block
> reading from executable sections.
> 
> However, if all of the above is true, I think libgcc as a whole
> will have much bigger problems.  I count over 500 other instances
> in the disassembled v6m *.a file where functions load pc-relative
> data from '.text'.

The difference is that when the data-in-text references come from C
code, they can be eliminated simply by rebuilding the library with
-mpure-code on.  That's difficult, if not impossible to fix when the
source for a function is written in assembler.

> 
> For example:
> * C version of popcount
> * __powidf2 (0x3FF0)
> * __mulsc3 (0x7F7F)
> * Most soft-float functions.
> 
> Still not seeing a clear resolution here.  Is it acceptable to use the 
> 
> "ldr rD, =const" 

No, that's just short-hand for an LDR from a literal pool that is
generated auto-magically by the assembler.  I also wouldn't trust that
when using any section other than .text for code, unless you add
explicit .ltorg directives to state where the currently pending literals
are to be dumped.

> 
> pattern?
> 
> Thanks,
> Daniel
> 

R.


RE: [PATCH 1/8 v9]middle-end slp: Support optimizing load distribution

2021-01-11 Thread Richard Biener
On Mon, 11 Jan 2021, Tamar Christina wrote:

> Hi Richi,
> 
> Attached is the updated patch.
> 
> Note that testcases for all of these will be committed with the patch but I'm
> Finishing up the 32-bit Arm changes to mirror the changes the AArch64 
> maintainer
> wanted and then have to do bootstrap which will take the majority of the day 
> so
> wanted to get these patches out first.
> 
> I also built spec with the matcher on and off and noticed no meaningful 
> change in
> Compile time but replacements in several benchmarks.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * tree-vect-slp.c (optimize_load_redistribution_1): New.
>   (optimize_load_redistribution, vect_is_slp_load_node): New.
>   (vect_match_slp_patterns): Use it.
> 
> -- inline copy of patch --
> 
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index 
> 2a58e54fe51471df5f55ce4a524d0022744054b0..89e226ca3a25a6c77b86d46ba234ce54bd3cb83b
>  100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -2228,6 +2228,114 @@ calculate_unrolling_factor (poly_uint64 nunits, 
> unsigned int group_size)
>return exact_div (common_multiple (nunits, group_size), group_size);
>  }
>  
> +/* Helper that checks to see if a node is a load node. This is done based on
> +   two criterias:
> +   1) The node is internal
> +   2) The node has no childen.  */
> +
> +static inline bool
> +vect_is_slp_load_node  (slp_tree root)
> +{
> +  return (SLP_TREE_DEF_TYPE (root) == vect_internal_def
> +   && !SLP_TREE_CHILDREN (root).exists ());

this would return true for induction defs as well (the SLP_TREE_DEF_TYPE
only distinguishes between vect_internal_def and constant/external 
def...).  It would also not match masked loads.  A more close match
would be

  SLP_TREE_DEF_TYPE (root) == vect_internal_def
  && STMT_VINFO_GROUPED_ACCESS (SLP_TREE_REPRESENTATIVE (root))
  && DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root)))

but not sure whether you handle masked loads OK (so you could
do the !SLP_TREE_CHILDREN (root).exists () in the caller if not).

> +}
> +
> +
> +/* Helper function of optimize_load_redistribution that performs the 
> operation
> +   recursively.  */
> +
> +static slp_tree
> +optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
> + vec_info *vinfo, unsigned int group_size,
> + hash_set *visited, slp_tree root)
> +{
> +  if (visited->add (root))
> +return NULL;
> +
> +  slp_tree node;
> +  unsigned i;
> +
> +  /* For now, we don't know anything about externals so do not do anything.  
> */
> +  if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
> +return NULL;
> +  else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
> +{
> +  /* First convert this node into a load node and add it to the leaves
> + list and flatten the permute from a lane to a load one.  If it's
> + unneeded it will be elided later.  */
> +  vec stmts;
> +  stmts.create (SLP_TREE_LANES (root));
> +  lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION (root);
> +  for (unsigned j = 0; j < lane_perm.length (); j++)
> +{
> +  std::pair perm = lane_perm[j];
> +  node = SLP_TREE_CHILDREN (root)[perm.first];
> +
> +   if (!vect_is_slp_load_node (node))

stmts leaks here - I think you also want to still recurse to the SLP
children, there can be two_operator nodes consuming the complex
ops.  So maybe a break and guard the rest with j == lane_perm.length ().

> +return NULL;
> +
> +   stmts.quick_push (SLP_TREE_SCALAR_STMTS (node)[perm.second]);
> +}
> +
> +  if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> +  "converting stmts on permute node %p\n", root);
> +
> +  bool *matches = XALLOCAVEC (bool, group_size);
> +  poly_uint64 max_nunits = 1;
> +  unsigned tree_size = 0, limit = 1;
> +  node = vect_build_slp_tree (vinfo, stmts, group_size, &max_nunits,
> +   matches, &limit, &tree_size, bst_map);
> +  if (!node)
> + stmts.release ();
> +
> +  return node;
> +}
> +
> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
> +{
> +  slp_tree value
> + = optimize_load_redistribution_1 (bst_map, vinfo, group_size, visited,
> +   node);
> +  if (value)
> + {
> +  SLP_TREE_CHILDREN (root)[i] = value;
> +  vect_free_slp_tree (node);
> + }
> +}
> +
> +  return NULL;
> +}
> +
> +/* Temporary workaround for loads not being CSEd during SLP build.  This
> +   function will traverse the SLP tree rooted in ROOT for INSTANCE and find
> +   VEC_PERM nodes that blend vectors from multiple nodes that all read from 
> the
> +   same DR such that the final operation is equal to a permuted load.  Such
> +   NODES are then directly converted into LOADS themselves.  The nodes are
> +   C

Re: [PATCH] Optimize macro: make it more predictable

2021-01-11 Thread Martin Liška

I'm suggesting postponing this to GCC 12 as I'm planning a bigger
target/optimize attribute (pragma) overhaul.

Martin

On 12/7/20 12:03 PM, Martin Liška wrote:

PING^2

On 11/26/20 2:56 PM, Martin Liška wrote:

PING^1

On 11/9/20 11:35 AM, Martin Liška wrote:

On 11/3/20 2:34 PM, Jakub Jelinek wrote:

On Tue, Nov 03, 2020 at 02:27:52PM +0100, Richard Biener wrote:

On Fri, Oct 23, 2020 at 1:47 PM Martin Liška  wrote:

This is a follow-up of the discussion that happened in thread about 
no_stack_protector
attribute: https://gcc.gnu.org/pipermail/gcc-patches/2020-May/545916.html

The current optimize attribute works in the following way:
- 1) we take current global_options as base
- 2) maybe_default_options is called for the currently selected optimization 
level, which
   means all rules in default_options_table are executed
- 3) attribute values are applied (via decode_options)

So the step 2) is problematic: in case of -O2 -fno-omit-frame-pointer and 
__attribute__((optimize("-fno-stack-protector")))
ends basically with -O2 -fno-stack-protector because -fno-omit-frame-pointer is 
default:
  /* -O1 and -Og optimizations.  */
  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },

My patch handled and the current optimize attribute really behaves that same as 
appending attribute value
to the command line. So far so good. We should also reflect that in 
documentation entry which is quite
vague right now:

"""
The optimize attribute is used to specify that a function is to be compiled 
with different optimization options than specified on the command line.
"""

and we may want to handle -Ox in the attribute in a special way. I guess many 
macro/pragma users expect that

-O2 -ftree-vectorize and __attribute__((optimize(1))) will end with -O1 and not
with -ftree-vectorize -O1 ?


Hmm.  I guess the only two reasonable options are to append to the active set
and thus end up with -ftree-vectorize -O1 or to start from an empty set and thus
end up with -O1.


I'd say we always want to append, but only take into account explicit
options.


Yes, I also prefer to always append and basically drop the "reset" 
functionality.


So basically get the effect of
take the command line, append to that options from the optimize/target
pragmas in effect and append to that options from optimize/target
attributes and only from that figure out the implicit options.


Few notes here:
- target and optimize attributes are separate so parsing happens independently; 
however
   they use global_options and global_options_set as a starting point
- you can have a series of wrapped optimize/pragma macros and again information 
is shared
in global_options/global_options_set
- target and optimize options interact, but in a controlled way with 
SET_OPTION_IF_UNSET

That said, I hope the biggest offender is right now the handling of -Olevel.

@Jakub: Do you see a situation with my patch where it breaks?

Thanks,
Martin



Jakub











Re: [PATCH] Fix UBSAN bootstrap

2021-01-11 Thread Nathan Sidwell

On 1/11/21 5:38 AM, Jakub Jelinek wrote:

On Mon, Jan 11, 2021 at 11:26:42AM +0100, Martin Liška wrote:

Problem here was that GCC-related options are not applied in stage2 (and later 
stages).
It's caused by fact that CXX is xg++ in stage2 (and later stages).
Fixed with the following patch.

Ready to be installed?
Thanks,
Martin

libcody/ChangeLog:

PR bootstrap/98414
* Makefile.in: In stage2 in UBSAN bootstrap the CXX is called
xg++, so findstring g++ should be used instead of exact string
comparison.


I think better would be to follow gcc/{configure*,Make*} practice and don't
judge compilers based on names, but on what they actually are and test
perhaps not each individual flag separately, but their whole sets together.


Just so you know, it checks the name, because Clang identifies as GCC, 
but can be insufficiently GCC-like at times (not here though).


I think replacing more bits with libcpp configure goup is probably the 
better.


nathan

--
Nathan Sidwell


[PATCH] tree-optimization/91403 - avoid excessive code-generation

2021-01-11 Thread Richard Biener
The vectorizer, for large permuted grouped loads, generates
inefficient intermediate code (cleaned up only later) that runs
into complexity issues in SCEV analysis and elsewhere.  For the
non-single-element interleaving case we already put a hard limit
in place, this applies the same limit to the missing case.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-01-11  Richard Biener  

PR tree-optimization/91403
* tree-vect-data-refs.c (vect_analyze_group_access_1): Cap
single-element interleaving group size at 4096 elements.

* gcc.dg/vect/pr91403.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr91403.c | 11 +++
 gcc/tree-vect-data-refs.c   |  6 +-
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr91403.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr91403.c 
b/gcc/testsuite/gcc.dg/vect/pr91403.c
new file mode 100644
index 000..5b9b76060ab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr91403.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+extern int a[][100];
+int b;
+void c()
+{
+  for (int d = 2; d <= 9; d++)
+for (int e = 32; e <= 41; e++)
+  b += a[d][5];
+}
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index c71ff7378d2..97c8577ebe7 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2538,7 +2538,11 @@ vect_analyze_group_access_1 (vec_info *vinfo, 
dr_vec_info *dr_info)
 size.  */
   if (DR_IS_READ (dr)
  && (dr_step % type_size) == 0
- && groupsize > 0)
+ && groupsize > 0
+ /* This could be UINT_MAX but as we are generating code in a very
+inefficient way we have to cap earlier.
+See PR91403 for example.  */
+ && groupsize <= 4096)
{
  DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
  DR_GROUP_SIZE (stmt_info) = groupsize;
-- 
2.26.2


Re: [PATCH v2] aarch64: Add cpu cost tables for A64FX

2021-01-11 Thread Richard Sandiford via Gcc-patches
"Qian, Jianhua"  writes:
> Hi Richard
>
>> -Original Message-
>> From: Richard Sandiford 
>> Sent: Friday, January 8, 2021 7:04 PM
>> To: Qian, Jianhua/钱 建华 
>> Cc: gcc-patches@gcc.gnu.org
>> Subject: Re: [PATCH v2] aarch64: Add cpu cost tables for A64FX
>> 
>> Qian Jianhua  writes:
>> > This patch add cost tables for A64FX.
>> >
>> > ChangeLog:
>> > 2021-01-08 Qian jianhua 
>> >
>> > gcc/
>> >* config/aarch64/aarch64-cost-tables.h (a64fx_extra_costs): New.
>> >* config/aarch64/aarch64.c (a64fx_addrcost_table): New.
>> >(a64fx_regmove_cost, a64fx_vector_cost): New.
>> >(a64fx_tunings): Use the new added cost tables.
>> 
>> OK for trunk, thanks.  The v1 patch is OK for branches that support
>> -mcpu=a64fx.
>> 
>> Would you like commit access, so that you can commit it yourself?
>> If so, please fill out the form mentioned at the beginning of
>> https://gcc.gnu.org/gitwrite.html listing me as sponsor.
>> 
> It‘s my pleasure. I've applied it.

Great!

> Thank you so much.
>
> I don't quite know the process of gcc source committing.
> If I have the commit access, how will process be different?

The patch submission process is pretty much the same: patches need
to be sent to the list and most patches need to be approved by a
reviewer or maintainer.  The main differences are:

- If a patch is “obviously correct”, you can apply it without going
  through the approval process.  (Please still send the patch to the
  list though.)

- Once a patch has been approved, you can commit the patch yourself,
  rather than rely on someone else to do it for you.  The main benefits
  of this are:

  - You can commit from the tree that you actually tested.

  - You can deal with any merge conflicts caused by other people's
patches without having to go through another review cycle.  (Most
merge conflict resolutions are “obvious” and so don't need approval.)

  - A typical workflow is to test a patch on trunk, post it for review,
and ask for approval to apply the patch to both trunk and whichever
branches are appropriate.  If the patch is approved, you can later
test the patch on the approved branches (at your own pace) and
apply it if the tests pass.

In terms of the mechanics of committing, just “git push” should work.
The server hooks will check for things like a well-formed changelog.

https://gcc.gnu.org/gitwrite.html has more info about the process
in general.  Quoting from that page, the next step is:

  Check out a tree using the instructions below and add yourself to the
  MAINTAINERS file. Note: Your first and last names must be exactly the
  same between your account on gcc.gnu.org and the MAINTAINERS file.
  Place your name in the correct section following the conventions
  specified in the file (e.g. "Write After Approval" is "last name
  alphabetical order").

  Then produce a diff to that file and circulate it to the gcc-patches
  list, whilst also checking in your change to test write access
  (approval from the mailing list is not needed in this one case). For
  all other changes, please be sure to follow the write access policies
  below.

> And which branch, which range(aarch64?) could I commit patches to?

This patch should go to master.  The v1 patch should go to
releases/gcc-10 and releases/gcc-9.

You might need to remove some lines from the cost tables when
backporting to gcc-10 and gcc-9 (I haven't checked).  If so, that kind
of change counts as “obviously correct” and so doesn't need approval.

Hope this helps.  Please let me know if you have any questions.

Thanks,
Richard


Re: [PATCH v2] testsuite: Fix test failures from outputs.exp [PR98225]

2021-01-11 Thread Rainer Orth
Hi Bernd,

> On 1/8/21 8:27 PM, David Edelsohn wrote:
>> Hi, Bernd
>> 
>> Thanks for investigating this and creating a revised version of the
>> patch.  With the second patch, the gcc.misc-test/outputs.exp results
>> are clean on AIX.
>> 
>
> Many thanks for confirming that the patch works.
>
> Is it OK to push?

I've now tested the v2 patch on Solaris without and with GNU ld.  Based
on Alexandre's and David's feedback, it is ok.

Thanks.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] testsuite: Fix test failures from outputs.exp [PR98225]

2021-01-11 Thread Rainer Orth
Hi Alexandre,

> On Jan  7, 2021, Bernd Edlinger  wrote:
>
>> I don't know why that is there in the first place, as there
>> are no C++ test cases, these files should not be created at all.
>
> collect2, on platforms that use it, create .cdtor files even for C.
> David Edelsohn told me so back then; the problem was on AIX IIRC.  That
> was why I added code to tolerate such outputs.  Removing it would likely
> bring that failure back.
>
>
>> Is it OK for trunk?
>
> It looks good to me, aside from the removal of the .cdtor handler.
>
> I don't think I have authority to approve it with that change,
> but I would if I did ;-)  Thanks!

that's exactly the kind of feedback I've been hoping for ;-)

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


RE: [PATCH 5/8 v9]middle-end slp: support complex multiply and complex multiply conjugate

2021-01-11 Thread Richard Biener
On Mon, 11 Jan 2021, Tamar Christina wrote:

> Hi Richi,
> 
> This adds support for complex multiply and complex multiply and accumulate to
> the vect pattern detector.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues.
> 
> Ok for master? (I replied to a comment at the very end of the mail from the 
> previous patch)
> 
> Example of instructions matched:
> 
> #include 
> #include 
> 
> #define N 200
> #define ROT
> #define TYPE float
> #define TYPE2 float
> 
> 
> void g (TYPE2 complex a[restrict N], TYPE complex b[restrict N], TYPE complex 
> c[restrict N])
> {
>   for (int i=0; i < N; i++)
> {
>   c[i] =  a[i] * (b[i] ROT);
> }
> }
> 
> void g_f1 (TYPE2 complex a[restrict N], TYPE complex b[restrict N], TYPE 
> complex c[restrict N])
> {
>   for (int i=0; i < N; i++)
> {
>   c[i] =  conjf (a[i]) * (b[i] ROT);
> }
> }
> 
> void g_s1 (TYPE2 complex a[restrict N], TYPE complex b[restrict N], TYPE 
> complex c[restrict N])
> {
>   for (int i=0; i < N; i++)
> {
>   c[i] =  a[i] * conjf (b[i] ROT);
> }
> }
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * internal-fn.def (COMPLEX_MUL, COMPLEX_MUL_CONJ): New.
>   * optabs.def (cmul_optab, cmul_conj_optab): New.
>   * doc/md.texi: Document them.
>   * tree-vect-slp-patterns.c (vect_match_call_complex_mla,
>   vect_normalize_conj_loc, is_eq_or_top, vect_validate_multiplication,
>   vect_build_combine_node, class complex_mul_pattern,
>   complex_mul_pattern::matches, complex_mul_pattern::recognize,
>   complex_mul_pattern::build): New.
> 
> --- inline copy of patch --
> 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 
> ec6ec180b91fcf9f481b6754c044483787fd923c..b8cc90e1a75e402abbf8a8cf2efefc1a333f8b3a
>  100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6202,6 +6202,50 @@ The operation is only supported for vector modes 
> @var{m}.
>  
>  This pattern is not allowed to @code{FAIL}.
>  
> +@cindex @code{cmul@var{m}4} instruction pattern
> +@item @samp{cmul@var{m}4}
> +Perform a vector multiply that is semantically the same as multiply of
> +complex numbers.
> +
> +@smallexample
> +  complex TYPE c[N];
> +  complex TYPE a[N];
> +  complex TYPE b[N];
> +  for (int i = 0; i < N; i += 1)
> +@{
> +  c[i] = a[i] * b[i];
> +@}
> +@end smallexample
> +
> +In GCC lane ordering the real part of the number must be in the even lanes 
> with
> +the imaginary part in the odd lanes.
> +
> +The operation is only supported for vector modes @var{m}.
> +
> +This pattern is not allowed to @code{FAIL}.
> +
> +@cindex @code{cmul_conj@var{m}4} instruction pattern
> +@item @samp{cmul_conj@var{m}4}
> +Perform a vector multiply by conjugate that is semantically the same as a
> +multiply of complex numbers where the second multiply arguments is 
> conjugated.
> +
> +@smallexample
> +  complex TYPE c[N];
> +  complex TYPE a[N];
> +  complex TYPE b[N];
> +  for (int i = 0; i < N; i += 1)
> +@{
> +  c[i] = a[i] * conj (b[i]);
> +@}
> +@end smallexample
> +
> +In GCC lane ordering the real part of the number must be in the even lanes 
> with
> +the imaginary part in the odd lanes.
> +
> +The operation is only supported for vector modes @var{m}.
> +
> +This pattern is not allowed to @code{FAIL}.
> +
>  @cindex @code{ffs@var{m}2} instruction pattern
>  @item @samp{ffs@var{m}2}
>  Store into operand 0 one plus the index of the least significant 1-bit
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 
> 511fe70162b5d9db3a61a5285d31c008f6835487..5a0bbe3fe5dee591d54130e60f6996b28164ae38
>  100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -279,6 +279,8 @@ DEF_INTERNAL_FLT_FLOATN_FN (FMAX, ECF_CONST, fmax, binary)
>  DEF_INTERNAL_OPTAB_FN (XORSIGN, ECF_CONST, xorsign, binary)
>  DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT90, ECF_CONST, cadd90, binary)
>  DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary)
> +DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary)
> +DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL_CONJ, ECF_CONST, cmul_conj, binary)
>  
>  
>  /* FP scales.  */
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 
> e9727def4dbf941bb9ac8b56f83f8ea0f52b262c..e82396bae1117c6de91304761a560b7fbcb69ce1
>  100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -292,6 +292,8 @@ OPTAB_D (copysign_optab, "copysign$F$a3")
>  OPTAB_D (xorsign_optab, "xorsign$F$a3")
>  OPTAB_D (cadd90_optab, "cadd90$a3")
>  OPTAB_D (cadd270_optab, "cadd270$a3")
> +OPTAB_D (cmul_optab, "cmul$a3")
> +OPTAB_D (cmul_conj_optab, "cmul_conj$a3")
>  OPTAB_D (cos_optab, "cos$a2")
>  OPTAB_D (cosh_optab, "cosh$a2")
>  OPTAB_D (exp10_optab, "exp10$a2")
> diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
> index 
> dbc58f7c53868ed431fc67de1f0162eb0d3b2c24..fb58b45602f00a440ef7c27853276945ba696522
>  100644
> --- a/gcc/tree-vect-slp-patterns.c
> +++ b/gcc/tree-vect-slp-patterns.c
> @@ -719,6 +719,375 @@ comple

Re: [PATCH] [x86]Delete dead code in ix86_expand_sse_comi.[PR98612]

2021-01-11 Thread Uros Bizjak via Gcc-patches
On Mon, Jan 11, 2021 at 11:28 AM Hongtao Liu  wrote:
>
> Hi:
>   d->flag is always 0 for builtins located in
> BDESC_FIRST (comi,COMI,...)
> ...
> BDESC_END (COMI, PCMPESTR)
>   So the condition can be deleted, so did BUILTIN_DESC_SWAP_OPERANDS.
>
>   Regtested and bootstrapped on x86_64-linux-gnu{-m32,}.
>
> gcc/ChangeLog:
> PR target/98612
> * config/i386/i386-builtins.h (BUILTIN_DESC_SWAP_OPERANDS):
> Deleted.
> * config/i386/i386-expand.c (ix86_expand_sse_comi): Delete
> dead code.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-builtins.h | 4 
>  gcc/config/i386/i386-expand.c   | 5 -
>  2 files changed, 9 deletions(-)
>
> diff --git a/gcc/config/i386/i386-builtins.h b/gcc/config/i386/i386-builtins.h
> index bb364458878..0641808c7a7 100644
> --- a/gcc/config/i386/i386-builtins.h
> +++ b/gcc/config/i386/i386-builtins.h
> @@ -236,10 +236,6 @@ struct builtin_isa {
>
>  /* Bits for builtin_description.flag.  */
>
> -/* Set when we don't support the comparison natively, and should
> -   swap_comparison in order to support it.  */
> -#define BUILTIN_DESC_SWAP_OPERANDS 1
> -
>  struct builtin_description
>  {
>const HOST_WIDE_INT mask;
> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> index 85ea2605696..d793e5a5bce 100644
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> @@ -8634,11 +8634,6 @@ ix86_expand_sse_comi (const struct
> builtin_description *d, tree exp,
>if (VECTOR_MODE_P (mode1))
>  op1 = safe_vector_operand (op1, mode1);
>
> -  /* Swap operands if we have a comparison that isn't available in
> - hardware.  */
> -  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
> -std::swap (op0, op1);
> -
>target = gen_reg_rtx (SImode);
>emit_move_insn (target, const0_rtx);
>target = gen_rtx_SUBREG (QImode, target, 0);
> --
> 2.18.1
>
> --
> BR,
> Hongtao


[PATCH] tree-optimization/98526 - fix vectorizer reduction cost

2021-01-11 Thread Richard Biener
This fixes a double-counting in the reduction cost when vectorizing
the reduction through the regular vectorizable_* functions.

Bootstrapped / tested on x86_64-unknown-linux-gnu, pushed.

2021-01-11  Richard Biener  

PR tree-optimization/98526
* tree-vect-loop.c (vect_model_reduction_cost): Remove costing
of the actual reduction op for the regular case.
(vectorizable_reduction): Cost the stmts
vect_transform_reduction produces here.
---
 gcc/tree-vect-loop.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 965cc164f6e..acfd1952e3b 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4452,8 +4452,8 @@ have_whole_vector_shift (machine_mode mode)
 /* Function vect_model_reduction_cost.
 
Models cost for a reduction operation, including the vector ops
-   generated within the strip-mine loop, the initial definition before
-   the loop, and the epilogue code that must be generated.  */
+   generated within the strip-mine loop in some cases, the initial
+   definition before the loop, and the epilogue code that must be generated.  
*/
 
 static void
 vect_model_reduction_cost (loop_vec_info loop_vinfo,
@@ -4516,10 +4516,6 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
   prologue_cost += record_stmt_cost (cost_vec, prologue_stmts,
 scalar_to_vec, stmt_info, 0,
 vect_prologue);
-
-  /* Cost of reduction op inside loop.  */
-  inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
 }
 
   /* Determine cost of epilogue code.
@@ -7268,6 +7264,15 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 
   vect_model_reduction_cost (loop_vinfo, stmt_info, reduc_fn,
 reduction_type, ncopies, cost_vec);
+  /* Cost the reduction op inside the loop if transformed via
+ vect_transform_reduction.  Otherwise this is costed by the
+ separate vectorizable_* routines.  */
+  if (single_defuse_cycle
+  || code == DOT_PROD_EXPR
+  || code == WIDEN_SUM_EXPR
+  || code == SAD_EXPR)
+record_stmt_cost (cost_vec, ncopies, vector_stmt, stmt_info, 0, vect_body);
+
   if (dump_enabled_p ()
   && reduction_type == FOLD_LEFT_REDUCTION)
 dump_printf_loc (MSG_NOTE, vect_location,
-- 
2.26.2


Re: [committed] d: Support deprecated, @disable, and user-defined attributes on enum members

2021-01-11 Thread Iain Buclaw via Gcc-patches
Excerpts from Andreas Schwab's message of January 10, 2021 9:08 am:
> ../../gcc/d/dmd/parse.c: In member function 'Dsymbols* 
> Parser::parseDeclDefs(int, Dsymbol**, PrefixAttributes*)':
> ../../gcc/d/dmd/parse.c:647:29: error: unused variable 'e' 
> [-Werror=unused-variable]
>   647 | Expression *e = NULL;
>   | ^
> 

Thanks for spotting, this was dealt with in the last merge in r11-6586.

Iain.


[committed] d: Remove visibility and lookup deprecation

2021-01-11 Thread Iain Buclaw via Gcc-patches
Hi,

This patch merges the D front-end implementation with upstream dmd
2d3d13748, removing the visibility and lookup deprecation.

The deprecation phase for access checks is finished.

The `-ftransition=import` and `-ftransition=checkimports` switches no
longer have an effect and are now removed.  Symbols that are not visible
in a particular scope will no longer be found by the compiler.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32, and
committed to mainline.

Regards
Iain.

---
gcc/d/ChangeLog:

* dmd/MERGE: Merge upstream dmd 2d3d13748.
* d-lang.cc (d_handle_option): Remove OPT_ftransition_checkimports and
OPT_ftransition_import.
* gdc.texi (Warnings): Remove documentation for -ftransition=import
and -ftransition=checkimports.
* lang.opt (ftransition=checkimports): Remove.
(ftransition=import): Remove.
---
 gcc/d/d-lang.cc   |  8 --
 gcc/d/dmd/MERGE   |  2 +-
 gcc/d/dmd/access.c| 21 ++
 gcc/d/dmd/dscope.c| 75 ++-
 gcc/d/dmd/dsymbol.c   | 23 ++
 gcc/d/dmd/expression.c| 35 +
 gcc/d/dmd/expression.h|  2 +-
 gcc/d/dmd/expressionsem.c | 47 
 gcc/d/dmd/globals.h   |  2 -
 gcc/d/dmd/mtype.c | 69 +++--
 gcc/d/dmd/parse.c |  1 -
 gcc/d/dmd/scope.h |  1 -
 gcc/d/dmd/traits.c|  2 +-
 gcc/d/gdc.texi| 10 ---
 gcc/d/lang.opt|  8 --
 .../gdc.test/compilable/checkimports3.d   | 12 +++
 gcc/testsuite/gdc.test/compilable/ddoc14633.d |  6 --
 .../gdc.test/compilable/deprecate14283.d  |  6 --
 gcc/testsuite/gdc.test/compilable/diag11066.d |  6 --
 .../diag12598.d   |  7 +-
 gcc/testsuite/gdc.test/compilable/diag3243.d  |  6 --
 gcc/testsuite/gdc.test/compilable/dip22d.d| 10 +++
 .../imports/checkimports3a.d  |  0
 .../imports/checkimports3b.d  |  0
 .../imports/checkimports3c.d  |  0
 .../imports/diag12598a.d  |  0
 .../gdc.test/compilable/imports/dip22d.d  |  5 ++
 .../gdc.test/compilable/imports/dip22e.d  |  4 +
 .../gdc.test/compilable/test12567a.d  |  5 --
 .../gdc.test/compilable/test12567b.d  |  5 --
 .../gdc.test/compilable/test12567d.d  |  5 --
 .../gdc.test/compilable/test12567e.d  |  5 --
 gcc/testsuite/gdc.test/compilable/test13053.d |  6 --
 gcc/testsuite/gdc.test/compilable/test14375.d |  6 +-
 gcc/testsuite/gdc.test/compilable/test15785.d |  5 --
 gcc/testsuite/gdc.test/compilable/test15856.d |  2 +-
 gcc/testsuite/gdc.test/compilable/test15925.d | 18 -
 gcc/testsuite/gdc.test/compilable/test17791.d |  7 +-
 gcc/testsuite/gdc.test/compilable/test7815.d  |  6 --
 .../gdc.test/compilable/testcheckimports.d|  8 +-
 gcc/testsuite/gdc.test/compilable/warn3882.d  |  6 --
 .../fail_compilation/checkimports1a.d | 20 -
 .../fail_compilation/checkimports1b.d | 20 -
 .../fail_compilation/checkimports1c.d | 20 -
 .../fail_compilation/checkimports2a.d | 15 ++--
 .../fail_compilation/checkimports2b.d | 13 +---
 .../fail_compilation/checkimports2c.d | 14 +---
 .../gdc.test/fail_compilation/checkimports3.d | 15 
 .../gdc.test/fail_compilation/diag10169.d |  3 +-
 .../gdc.test/fail_compilation/diag5385.d  | 24 ++
 .../gdc.test/fail_compilation/dip22a.d| 17 ++---
 .../gdc.test/fail_compilation/dip22b.d|  4 +-
 .../gdc.test/fail_compilation/dip22d.d| 12 ---
 .../gdc.test/fail_compilation/dip22e.d|  6 +-
 .../gdc.test/fail_compilation/fail10528.d | 20 ++---
 .../gdc.test/fail_compilation/fail262.d   |  2 +-
 .../gdc.test/fail_compilation/fail313.d   |  8 +-
 .../imports/imp15925.d|  0
 .../fail_compilation/imports/imp21353.d   |  7 ++
 .../fail_compilation/imports/test15117a.d |  9 +++
 .../gdc.test/fail_compilation/lookup.d|  4 +-
 .../gdc.test/fail_compilation/test143.d   |  2 +-
 .../gdc.test/fail_compilation/test15117.d | 30 
 .../gdc.test/fail_compilation/test15785.d |  7 +-
 .../gdc.test/fail_compilation/test15785b.d|  8 +-
 .../gdc.test/fail_compilation/test15897.d |  2 +-
 .../gdc.test/fail_compilation/test15925.d | 19 +
 .../gdc.test/fail_compilation/test21353.d | 26 +++
 .../gdc.test/fail_compilation/test314.d   | 10 +--
 69 files changed, 246 insertions(+), 543 deletions(-)
 create mode 100644 gcc/testsuite/gdc.test/compilable/checkimports3.d
 rename gcc/testsuite/gdc.test/{fail_compilation => compilable}/diag12598.d 
(50%)
 create m

Re: [PATCH v2] testsuite: Fix test failures from outputs.exp [PR98225]

2021-01-11 Thread Bernd Edlinger
On 1/8/21 8:27 PM, David Edelsohn wrote:
> Hi, Bernd
> 
> Thanks for investigating this and creating a revised version of the
> patch.  With the second patch, the gcc.misc-test/outputs.exp results
> are clean on AIX.
> 

Many thanks for confirming that the patch works.

Is it OK to push?

Thanks
Bernd.

> Thanks, David
> 
> On Fri, Jan 8, 2021 at 1:59 PM Bernd Edlinger  
> wrote:
>>
>> On 1/8/21 3:23 PM, David Edelsohn wrote:
>>> On Thu, Jan 7, 2021 at 5:18 PM Bernd Edlinger  
>>> wrote:

 Hi,

 On 1/7/21 5:12 PM, Rainer Orth wrote:
>   The unsetenv needs to be wrapped in
>
> if [info exists env(MAKEFLAGS)] {
>

 Done.

> @@ -163,6 +167,9 @@ proc outest { test sources opts dirs out
>   if { $ogl != {} } {
>   pass "$test: $d$o"
>   file delete $ogl
> + } elseif { [string match "*.ld1_args" $o] } {
> + # This file may be missing if !HAVE_GNU_LD
> + pass "$test: $d$o"
>
>   Always PASSing the test even if it isn't run is wrong.  Either wrap
>   the whole group of tests with response files in
>
> if [check_effective_target_gld] {
>
>   or make the test for the *.ld1_args file conditional on that
>   (e.g. along the lines of $ltop used elsewhere).  I'd welcome input
>   from Alexandre which is preferred.
>

 Ah, yes that is a good idea.  Thanks.


 I think the .cdtor.* handling, is probably a bad example that I followed 
 here.
 I don't know why that is there in the first place, as there
 are no C++ test cases, these files should not be created at all.
 If they are ever created we would have a couple of other files created
 as well IMHO.
 If there are still missing files in some cases,
 I'd prefer to track these per test case, instead of globally.

 Therefore I propose to remove that exception for now.

 Is it OK for trunk?
>>>
>>> As Alex said, please don't just remove features and functionality if
>>> you don't know why they were added.  The history is online in the
>>> mailing list and the repo history.
>>>
>>> AIX uses constructors to register EH frames and libgcc has an EH
>>> frame.  ctors and dtors can be found in non-C++ code.
>>>
>>
>> Okydoky.
>>
>> I think I understand now better what the issue is here.
>> Although the name cdtor suggests that it has something to do with
>> C++ it is also needed to collect EH frame info, in certain targets.
>> Those are mainly AIX but also hppa*-*-hpux*.
>> I believe those exceptions are only necessary for targets that
>> define EH_FRAME_THROUGH_COLLECT2.
>>
>> I have tested this new version of my patch but only on not-affected
>> x86_64-pc-linux-gnu.
>>
>> @David, @Rainer: I would very much appreciate if you could give this patch
>> a test on your systems.
>>
>>
>> Thanks
>> Berns.


[PATCH v4 04/29] Refactor 64-bit shift functions into a new file.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-07 Daniel Engel 

* config/arm/lib1funcs.S: Move __ashldi3(), __ashrdi3(), __lshldi3() to
* config/arm/bits/shift.S: New file.
---
 libgcc/config/arm/bits/shift.S | 102 
 libgcc/config/arm/lib1funcs.S  | 103 +
 2 files changed, 103 insertions(+), 102 deletions(-)
 create mode 100644 libgcc/config/arm/bits/shift.S

diff --git a/libgcc/config/arm/bits/shift.S b/libgcc/config/arm/bits/shift.S
new file mode 100644
index 000..94e466ac0d2
--- /dev/null
+++ b/libgcc/config/arm/bits/shift.S
@@ -0,0 +1,102 @@
+
+#ifdef L_lshrdi3
+
+   FUNC_START lshrdi3
+   FUNC_ALIAS aeabi_llsr lshrdi3
+   
+#ifdef __thumb__
+   lsrsal, r2
+   movsr3, ah
+   lsrsah, r2
+   mov ip, r3
+   subsr2, #32
+   lsrsr3, r2
+   orrsal, r3
+   negsr2, r2
+   mov r3, ip
+   lslsr3, r2
+   orrsal, r3
+   RET
+#else
+   subsr3, r2, #32
+   rsb ip, r2, #32
+   movmi   al, al, lsr r2
+   movpl   al, ah, lsr r3
+   orrmi   al, al, ah, lsl ip
+   mov ah, ah, lsr r2
+   RET
+#endif
+   FUNC_END aeabi_llsr
+   FUNC_END lshrdi3
+
+#endif
+   
+#ifdef L_ashrdi3
+   
+   FUNC_START ashrdi3
+   FUNC_ALIAS aeabi_lasr ashrdi3
+   
+#ifdef __thumb__
+   lsrsal, r2
+   movsr3, ah
+   asrsah, r2
+   subsr2, #32
+   @ If r2 is negative at this point the following step would OR
+   @ the sign bit into all of AL.  That's not what we want...
+   bmi 1f
+   mov ip, r3
+   asrsr3, r2
+   orrsal, r3
+   mov r3, ip
+1:
+   negsr2, r2
+   lslsr3, r2
+   orrsal, r3
+   RET
+#else
+   subsr3, r2, #32
+   rsb ip, r2, #32
+   movmi   al, al, lsr r2
+   movpl   al, ah, asr r3
+   orrmi   al, al, ah, lsl ip
+   mov ah, ah, asr r2
+   RET
+#endif
+
+   FUNC_END aeabi_lasr
+   FUNC_END ashrdi3
+
+#endif
+
+#ifdef L_ashldi3
+
+   FUNC_START ashldi3
+   FUNC_ALIAS aeabi_llsl ashldi3
+   
+#ifdef __thumb__
+   lslsah, r2
+   movsr3, al
+   lslsal, r2
+   mov ip, r3
+   subsr2, #32
+   lslsr3, r2
+   orrsah, r3
+   negsr2, r2
+   mov r3, ip
+   lsrsr3, r2
+   orrsah, r3
+   RET
+#else
+   subsr3, r2, #32
+   rsb ip, r2, #32
+   movmi   ah, ah, lsl r2
+   movpl   ah, al, lsl r3
+   orrmi   ah, ah, al, lsr ip
+   mov al, al, lsl r2
+   RET
+#endif
+   FUNC_END aeabi_llsl
+   FUNC_END ashldi3
+
+#endif
+
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index acafff62448..c7a3b85bf2b 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -1618,108 +1618,7 @@ LSYM(Lover12):
 
 /* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
 #ifndef __symbian__
-
-#ifdef L_lshrdi3
-
-   FUNC_START lshrdi3
-   FUNC_ALIAS aeabi_llsr lshrdi3
-   
-#ifdef __thumb__
-   lsrsal, r2
-   movsr3, ah
-   lsrsah, r2
-   mov ip, r3
-   subsr2, #32
-   lsrsr3, r2
-   orrsal, r3
-   negsr2, r2
-   mov r3, ip
-   lslsr3, r2
-   orrsal, r3
-   RET
-#else
-   subsr3, r2, #32
-   rsb ip, r2, #32
-   movmi   al, al, lsr r2
-   movpl   al, ah, lsr r3
-   orrmi   al, al, ah, lsl ip
-   mov ah, ah, lsr r2
-   RET
-#endif
-   FUNC_END aeabi_llsr
-   FUNC_END lshrdi3
-
-#endif
-   
-#ifdef L_ashrdi3
-   
-   FUNC_START ashrdi3
-   FUNC_ALIAS aeabi_lasr ashrdi3
-   
-#ifdef __thumb__
-   lsrsal, r2
-   movsr3, ah
-   asrsah, r2
-   subsr2, #32
-   @ If r2 is negative at this point the following step would OR
-   @ the sign bit into all of AL.  That's not what we want...
-   bmi 1f
-   mov ip, r3
-   asrsr3, r2
-   orrsal, r3
-   mov r3, ip
-1:
-   negsr2, r2
-   lslsr3, r2
-   orrsal, r3
-   RET
-#else
-   subsr3, r2, #32
-   rsb ip, r2, #32
-   movmi   al, al, lsr r2
-   movpl   al, ah, asr r3
-   orrmi   al, al, ah, lsl ip
-   mov ah, ah, asr r2
-   RET
-#endif
-
-   FUNC_END aeabi_lasr
-   FUNC_END ashrdi3
-
-#endif
-
-#ifdef L_ashldi3
-
-   FUNC_START ashldi3
-   FUNC_ALIAS aeabi_llsl ashldi3
-   
-#ifdef __thumb__
-   lslsah, r2
-   movsr3, al
-   lslsal, r2
-   mov ip, r3
-   subsr2, #32
-   lslsr3, r2
-   orrsah, r3
-   negsr2, r2
-   mov r3, ip
-   lsrsr3, r2
-   orrsah, r3
-   RET
-#else
-   subsr3,

[PATCH v4 17/29] Import replacement 64-bit division functions from the CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-07 Daniel Engel 

* config/arm/bpabi.c: Deleted unused file.
* config/arm/eabi/ldiv.S: Replaced the __aeabi_ldivmod() and
__aeabi_uldivmod() wrapper functions with a full implementation.
* config/arm/t-bpabi: Removed bpabi.c from LIB2ADD_ST.
* config/arm/t-elf: Add _divdi3 and _udivdi3 to LIB1ASMFUNCS.
---
 libgcc/config/arm/bpabi.c |  42 ---
 libgcc/config/arm/eabi/ldiv.S | 571 +-
 libgcc/config/arm/t-bpabi |   3 +-
 libgcc/config/arm/t-elf   |   9 +
 4 files changed, 501 insertions(+), 124 deletions(-)
 delete mode 100644 libgcc/config/arm/bpabi.c

diff --git a/libgcc/config/arm/bpabi.c b/libgcc/config/arm/bpabi.c
deleted file mode 100644
index bf6ba757964..000
--- a/libgcc/config/arm/bpabi.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Miscellaneous BPABI functions.
-
-   Copyright (C) 2003-2021 Free Software Foundation, Inc.
-   Contributed by CodeSourcery, LLC.
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   .  */
-
-extern long long __divdi3 (long long, long long);
-extern unsigned long long __udivdi3 (unsigned long long, 
-unsigned long long);
-extern long long __gnu_ldivmod_helper (long long, long long, long long *);
-
-
-long long
-__gnu_ldivmod_helper (long long a, 
- long long b, 
- long long *remainder)
-{
-  long long quotient;
-
-  quotient = __divdi3 (a, b);
-  *remainder = a - b * quotient;
-  return quotient;
-}
-
diff --git a/libgcc/config/arm/eabi/ldiv.S b/libgcc/config/arm/eabi/ldiv.S
index 514a3b8c3a3..c225e5973b2 100644
--- a/libgcc/config/arm/eabi/ldiv.S
+++ b/libgcc/config/arm/eabi/ldiv.S
@@ -1,82 +1,493 @@
+/* ldiv.S: Thumb-1 optimized 64-bit integer division
 
-.macro test_div_by_zero signed
-   cmp yyh, #0
-   bne 7f
-   cmp yyl, #0
-   bne 7f
-   cmp xxh, #0
-   .ifc\signed, unsigned
-   bne 2f
-   cmp xxl, #0
-2:
-   beq 3f
-   movsxxh, #0
-   mvnsxxh, xxh@ 0x
-   movsxxl, xxh
-3:
-   .else
-   blt 6f
-   bgt 4f
-   cmp xxl, #0
-   beq 5f
-4: movsxxl, #0
-   mvnsxxl, xxl@ 0x
-   lsrsxxh, xxl, #1@ 0x7fff
-   b   5f
-6: movsxxh, #0x80
-   lslsxxh, xxh, #24   @ 0x8000
-   movsxxl, #0
-5:
-   .endif
-   @ tailcalls are tricky on v6-m.
-   push{r0, r1, r2}
-   ldr r0, 1f
-   adr r1, 1f
-   addsr0, r1
-   str r0, [sp, #8]
-   @ We know we are not on armv4t, so pop pc is safe.
-   pop {r0, r1, pc}
-   .align  2
-1:
-   .word   __aeabi_ldiv0 - 1b
-7:
-.endm
-
-#ifdef L_aeabi_ldivmod
-
-FUNC_START aeabi_ldivmod
-   test_div_by_zero signed
-
-   push{r0, r1}
-   mov r0, sp
-   push{r0, lr}
-   ldr r0, [sp, #8]
-   bl  SYM(__gnu_ldivmod_helper)
-   ldr r3, [sp, #4]
-   mov lr, r3
-   add sp, sp, #8
-   pop {r2, r3}
-   RET
-   FUNC_END aeabi_ldivmod
-
-#endif /* L_aeabi_ldivmod */
-
-#ifdef L_aeabi_uldivmod
-
-FUNC_START aeabi_uldivmod
-   test_div_by_zero unsigned
-
-   push{r0, r1}
-   mov r0, sp
-   push{r0, lr}
-   ldr r0, [sp, #8]
-   bl  SYM(__udivmoddi4)
-   ldr r3, [sp, #4]
-   mov lr, r3
-   add sp, sp, #8
-   pop {r2, r3}
-   RET
-   FUNC_END aeabi_uldivmod
-   
-#endif /* L_aeabi_uldivmod */
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will

[PATCH v4 24/29] Import single precision division from the CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-08 Daniel Engel 

* config/arm/eabi/fdiv.S: New file for __divsf3().
* config/arm/lib1funcs.S: #include eabi/fdiv.S (v6m only).
* config/arm/t-elf: Add _divsf3 and _fp_divloopf3 to LIB1ASMFUNCS.
---
 libgcc/config/arm/eabi/fdiv.S | 261 ++
 libgcc/config/arm/lib1funcs.S |   1 +
 libgcc/config/arm/t-elf   |   2 +
 3 files changed, 264 insertions(+)
 create mode 100644 libgcc/config/arm/eabi/fdiv.S

diff --git a/libgcc/config/arm/eabi/fdiv.S b/libgcc/config/arm/eabi/fdiv.S
new file mode 100644
index 000..118f4e94676
--- /dev/null
+++ b/libgcc/config/arm/eabi/fdiv.S
@@ -0,0 +1,261 @@
+/* fdiv.S: Cortex M0 optimized 32-bit float division
+
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+
+#ifdef L_arm_divsf3
+
+// float __aeabi_fdiv(float, float)
+// Returns $r0 after division by $r1.
+// Subsection ordering within fpcore keeps conditional branches within range.
+FUNC_START_SECTION aeabi_fdiv .text.sorted.libgcc.fpcore.n.fdiv
+FUNC_ALIAS divsf3 aeabi_fdiv
+CFI_START_FUNCTION
+
+// Standard registers, compatible with exception handling.
+push{ rT, lr }
+.cfi_remember_state
+.cfi_remember_state
+.cfi_adjust_cfa_offset 8
+.cfi_rel_offset rT, 0
+.cfi_rel_offset lr, 4
+
+// Save for the sign of the result.
+movsr3, r1
+eorsr3, r0
+lsrsrT, r3, #31
+lslsrT, #31
+mov ip, rT
+
+// Set up INF for comparison.
+movsrT, #255
+lslsrT, #24
+
+// Check for divide by 0.  Automatically catches 0/0.
+lslsr2, r1, #1
+beq LLSYM(__fdiv_by_zero)
+
+// Check for INF/INF, or a number divided by itself.
+lslsr3, #1
+beq LLSYM(__fdiv_equal)
+
+// Check the numerator for INF/NAN.
+eorsr3, r2
+cmp r3, rT
+bhs LLSYM(__fdiv_special1)
+
+// Check the denominator for INF/NAN.
+cmp r2, rT
+bhs LLSYM(__fdiv_special2)
+
+// Check the numerator for zero.
+cmp r3, #0
+beq SYM(__fp_zero)
+
+// No action if the numerator is subnormal.
+//  The mantissa will normalize naturally in the division loop.
+lslsr0, #9
+lsrsr1, r3, #24
+beq LLSYM(__fdiv_denominator)
+
+// Restore the numerator's implicit '1'.
+addsr0, #1
+rorsr0, r0
+
+LLSYM(__fdiv_denominator):
+// The denominator must be normalized and left aligned.
+bl  SYM(__fp_normalize2)
+
+// 25 bits of precision will be sufficient.
+movsrT, #64
+
+// Run division.
+bl  SYM(__fp_divloopf)
+b   SYM(__fp_assemble)
+
+LLSYM(__fdiv_equal):
+  #if defined(EXCEPTION_CODES) && EXCEPTION_CODES
+movsr3, #(DIVISION_INF_BY_INF)
+  #endif
+
+// The absolute value of both operands are equal, but not 0.
+// If both operands are INF, create a new NAN.
+cmp r2, rT
+beq SYM(__fp_exception)
+
+  #if defined(TRAP_NANS) && TRAP_NANS
+// If both operands are NAN, return the NAN in $r0.
+bhi SYM(__fp_check_nan)
+  #else
+bhi LLSYM(__fdiv_return)
+  #endif
+
+// Return 1.0f, with appropriate sign.
+movsr0, #127
+lslsr0, #23
+add r0, ip
+
+LLSYM(__fdiv_return):
+pop { rT, pc }
+.cfi_restore_state
+
+LLSYM(__fdiv_special2):
+// The denominator is either INF or NAN, numerator is neither.
+// Also, the denominator is not equal to 0.
+// If the denominator is INF, the res

[PATCH v4 26/29] Import float-to-integer conversion functions from the CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-08 Daniel Engel 

* config/arm/bpabi-lib.h: Remove obsolete RENAME_LIBRARY directives.
* config/arm/eabi/ffixed.S: New file for __float[un]sisf/disf().
* config/arm/lib1funcs.S: #include eabi/ffixed.S (v6m only).
* config/arm/t-elf: Add _fix[uns]sfdi/sfsi objects to LIB1ASMFUNCS.
---
 libgcc/config/arm/bpabi-lib.h   |   6 -
 libgcc/config/arm/eabi/ffixed.S | 414 
 libgcc/config/arm/lib1funcs.S   |   1 +
 libgcc/config/arm/t-elf |   4 +
 4 files changed, 419 insertions(+), 6 deletions(-)
 create mode 100644 libgcc/config/arm/eabi/ffixed.S

diff --git a/libgcc/config/arm/bpabi-lib.h b/libgcc/config/arm/bpabi-lib.h
index 1e651ead4ac..a1c631640bb 100644
--- a/libgcc/config/arm/bpabi-lib.h
+++ b/libgcc/config/arm/bpabi-lib.h
@@ -32,9 +32,6 @@
 #ifdef L_muldi3
 #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (muldi3, lmul)
 #endif
-#ifdef L_muldi3
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (muldi3, lmul)
-#endif
 #ifdef L_fixdfdi
 #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixdfdi, d2lz) \
   extern DWtype __fixdfdi (DFtype) __attribute__((pcs("aapcs"))); \
@@ -62,9 +59,6 @@
 #ifdef L_fixunsdfsi
 #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfsi, d2uiz)
 #endif
-#ifdef L_fixunssfsi
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfsi, f2uiz)
-#endif
 #ifdef L_floatundidf
 #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatundidf, ul2d)
 #endif
diff --git a/libgcc/config/arm/eabi/ffixed.S b/libgcc/config/arm/eabi/ffixed.S
new file mode 100644
index 000..8ced3a701ff
--- /dev/null
+++ b/libgcc/config/arm/eabi/ffixed.S
@@ -0,0 +1,414 @@
+/* ffixed.S: Thumb-1 optimized float-to-integer conversion
+
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+
+// The implementation of __aeabi_f2uiz() expects to tail call __internal_f2iz()
+//  with the flags register set for unsigned conversion.  The __internal_f2iz()
+//  symbol itself is unambiguous, but there is a remote risk that the linker
+//  will prefer some other symbol in place of __aeabi_f2iz().  Importing an
+//  archive file that exports __aeabi_f2iz() will throw an error in this case.
+// As a workaround, this block configures __aeabi_f2iz() for compilation twice.
+// The first version configures __internal_f2iz() as a WEAK standalone symbol,
+//  and the second exports __aeabi_f2iz() and __internal_f2iz() normally.
+// A small bonus: programs only using __aeabi_f2uiz() will be slightly smaller.
+// '_internal_fixsfsi' should appear before '_arm_fixsfsi' in LIB1ASMFUNCS.
+#if defined(L_arm_fixsfsi) || \
+   (defined(L_internal_fixsfsi) && \
+  !(defined(__OPTIMIZE_SIZE__) && __OPTIMIZE_SIZE__))
+
+// Subsection ordering within fpcore keeps conditional branches within range.
+#define F2IZ_SECTION .text.sorted.libgcc.fpcore.r.fixsfsi
+
+// int __aeabi_f2iz(float)
+// Converts a float in $r0 to signed integer, rounding toward 0.
+// Values out of range are forced to either INT_MAX or INT_MIN.
+// NAN becomes zero.
+#ifdef L_arm_fixsfsi
+FUNC_START_SECTION aeabi_f2iz F2IZ_SECTION
+FUNC_ALIAS fixsfsi aeabi_f2iz
+CFI_START_FUNCTION
+#endif
+
+  #if defined(__OPTIMIZE_SIZE__) && __OPTIMIZE_SIZE__
+// Flag for unsigned conversion.
+movsr1, #33
+b   SYM(__internal_fixsfdi)
+
+  #else /* !__OPTIMIZE_SIZE__ */
+
+#ifdef L_arm_fixsfsi
+// Flag for signed conversion.
+movsr3, #1
+
+// [unsigned] int internal_f2iz(float, int)
+// Internal function expects a boolean flag in $r1.
+// If the boolean flag is 0, the result is unsigned.
+// If the boolean flag is 1, the result is signed.
+FUNC_ENTRY internal_f2iz
+
+#else /* L_internal_fixsfsi */
+WEAK_START_SECTION internal_f2iz F2IZ_SECTION
+CFI_START_FUNCTION
+
+#endif
+
+// Isolate the sign of the result.
+asrsr1, r0, #31
+lslsr0, #1
+
+

[PATCH v4 23/29] Import single precision multiplication from the CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-08 Daniel Engel 

* config/arm/eabi/fmul.S: New file for __mulsf3().
* config/arm/lib1funcs.S: #include eabi/fmul.S (v6m only).
* config/arm/t-elf: Move _mulsf3 to global scope in LIB1ASMFUNCS
(this object was formerly blocked on v6m builds).
---
 libgcc/config/arm/eabi/fmul.S | 215 ++
 libgcc/config/arm/lib1funcs.S |   1 +
 libgcc/config/arm/t-elf   |   3 +-
 3 files changed, 218 insertions(+), 1 deletion(-)
 create mode 100644 libgcc/config/arm/eabi/fmul.S

diff --git a/libgcc/config/arm/eabi/fmul.S b/libgcc/config/arm/eabi/fmul.S
new file mode 100644
index 000..767de988f0b
--- /dev/null
+++ b/libgcc/config/arm/eabi/fmul.S
@@ -0,0 +1,215 @@
+/* fmul.S: Thumb-1 optimized 32-bit float multiplication
+
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+
+#ifdef L_arm_mulsf3
+
+// float __aeabi_fmul(float, float)
+// Returns $r0 after multiplication by $r1.
+// Subsection ordering within fpcore keeps conditional branches within range.
+FUNC_START_SECTION aeabi_fmul .text.sorted.libgcc.fpcore.m.fmul
+FUNC_ALIAS mulsf3 aeabi_fmul
+CFI_START_FUNCTION
+
+// Standard registers, compatible with exception handling.
+push{ rT, lr }
+.cfi_remember_state
+.cfi_remember_state
+.cfi_adjust_cfa_offset 8
+.cfi_rel_offset rT, 0
+.cfi_rel_offset lr, 4
+
+// Save the sign of the result.
+movsrT, r1
+eorsrT, r0
+lsrsrT, #31
+lslsrT, #31
+mov ip, rT
+
+// Set up INF for comparison.
+movsrT, #255
+lslsrT, #24
+
+// Check for multiplication by zero.
+lslsr2, r0, #1
+beq LLSYM(__fmul_zero1)
+
+lslsr3, r1, #1
+beq LLSYM(__fmul_zero2)
+
+// Check for INF/NAN.
+cmp r3, rT
+bhs LLSYM(__fmul_special2)
+
+cmp r2, rT
+bhs LLSYM(__fmul_special1)
+
+// Because neither operand is INF/NAN, the result will be finite.
+// It is now safe to modify the original operand registers.
+lslsr0, #9
+
+// Isolate the first exponent.  When normal, add back the implicit '1'.
+// The result is always aligned with the MSB in bit [31].
+// Subnormal mantissas remain effectively multiplied by 2x relative to
+//  normals, but this works because the weight of a subnormal is -126.
+lsrsr2, #24
+beq LLSYM(__fmul_normalize2)
+addsr0, #1
+rorsr0, r0
+
+LLSYM(__fmul_normalize2):
+// IMPORTANT: exp10i() jumps in here!
+// Repeat for the mantissa of the second operand.
+// Short-circuit when the mantissa is 1.0, as the
+//  first mantissa is already prepared in $r0
+lslsr1, #9
+
+// When normal, add back the implicit '1'.
+lsrsr3, #24
+beq LLSYM(__fmul_go)
+addsr1, #1
+rorsr1, r1
+
+LLSYM(__fmul_go):
+// Calculate the final exponent, relative to bit [30].
+addsrT, r2, r3
+subsrT, #127
+
+  #if !defined(__OPTIMIZE_SIZE__) || !__OPTIMIZE_SIZE__
+// Short-circuit on multiplication by powers of 2.
+lslsr3, r0, #1
+beq LLSYM(__fmul_simple1)
+
+lslsr3, r1, #1
+beq LLSYM(__fmul_simple2)
+  #endif
+
+// Save $ip across the call.
+// (Alternatively, could push/pop a separate register,
+//  but the four instructions here are equivally fast)
+//  without imposing on the stack.
+add rT, ip
+
+// 32x32 unsigned multiplication, 64 bit result.
+bl  SYM(__umulsidi3) __PL

[PATCH v4 22/29] Import single precision addition and subtraction from the CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

Since this is the first phase of the floating point functions, some
common parsing and formatting routines and also included.  These common
routines will be referenced by other functions in subsequent commits.
However, even if the size penalty is accounted entirely to __addsf3(),
the total compiled size is still less than half the size of soft-float.

gcc/libgcc/ChangeLog:
2021-01-07 Daniel Engel 

* config/arm/eabi/fadd.S: Add new functions __addsf3() and __subsf3().
* config/arm/eabi/fneg.S: Add new file for __negsf2().
* config/arm/eabi/futil.S: Add new file for shared floating point
helper functions (normalization, rounding, etc).
* config/arm/lib1funcs.S: #include eabi/fneg.S and eabi/futil.S (v6m 
only).
* config/arm/t-elf: Add _addsf3, _frsubsf3, and helpers to LIB1ASMFUNCS.
---
 libgcc/config/arm/eabi/fadd.S  | 324 -
 libgcc/config/arm/eabi/fneg.S  |  76 ++
 libgcc/config/arm/eabi/fplib.h |   3 -
 libgcc/config/arm/eabi/futil.S | 418 +
 libgcc/config/arm/lib1funcs.S  |   2 +
 libgcc/config/arm/t-elf|   6 +
 6 files changed, 818 insertions(+), 11 deletions(-)
 create mode 100644 libgcc/config/arm/eabi/fneg.S
 create mode 100644 libgcc/config/arm/eabi/futil.S

diff --git a/libgcc/config/arm/eabi/fadd.S b/libgcc/config/arm/eabi/fadd.S
index 223e38f7e50..77b81d62b3b 100644
--- a/libgcc/config/arm/eabi/fadd.S
+++ b/libgcc/config/arm/eabi/fadd.S
@@ -1,16 +1,324 @@
+/* fadd.S: Thumb-1 optimized 32-bit float addition and subtraction
+
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+
+#ifdef L_arm_frsubsf3
+
+// float __aeabi_frsub(float, float)
+// Returns the floating point difference of $r1 - $r0 in $r0.
+// Subsection ordering within fpcore keeps conditional branches within range.
+FUNC_START_SECTION aeabi_frsub .text.sorted.libgcc.fpcore.b.frsub
+CFI_START_FUNCTION
+
+  #if defined(STRICT_NANS) && STRICT_NANS
+// Check if $r0 is NAN before modifying.
+lslsr2, r0, #1
+movsr3, #255
+lslsr3, #24
+
+// Let fadd() find the NAN in the normal course of operation,
+//  moving it to $r0 and checking the quiet/signaling bit.
+cmp r2, r3
+bhi SYM(__aeabi_fadd)
+  #endif
+
+// Flip sign and run through fadd().
+movsr2, #1
+lslsr2, #31
+addsr0, r2
+b   SYM(__aeabi_fadd)
+
+CFI_END_FUNCTION
+FUNC_END aeabi_frsub
+
+#endif /* L_arm_frsubsf3 */
+
 
 #ifdef L_arm_addsubsf3
 
-FUNC_START aeabi_frsub
+// float __aeabi_fsub(float, float)
+// Returns the floating point difference of $r0 - $r1 in $r0.
+// Subsection ordering within fpcore keeps conditional branches within range.
+FUNC_START_SECTION aeabi_fsub .text.sorted.libgcc.fpcore.c.faddsub
+FUNC_ALIAS subsf3 aeabi_fsub
+CFI_START_FUNCTION
+
+  #if defined(STRICT_NANS) && STRICT_NANS
+// Check if $r1 is NAN before modifying.
+lslsr2, r1, #1
+movsr3, #255
+lslsr3, #24
 
-  push {r4, lr}
-  movs r4, #1
-  lsls r4, #31
-  eors r0, r0, r4
-  bl   __aeabi_fadd
-  pop  {r4, pc}
+// Let fadd() find the NAN in the normal course of operation,
+//  moving it to $r0 and checking the quiet/signaling bit.
+cmp r2, r3
+bhi SYM(__aeabi_fadd)
+  #endif
 
-  FUNC_END aeabi_frsub
+// Flip sign and fall into fadd().
+movsr2, #1
+lslsr2, #31
+addsr1, r2
 
 #endif /* L_arm_addsubsf3 */
 
+
+// The execution of __subsf3() flows directly into __addsf3(), such that
+//  instructions must appear consecutively in the same memory section.
+//  However, this construction inhibits the ability to discard __subsf3()
+//  when only using __addsf3().
+// Th

[PATCH v4 25/29] Import integer-to-float conversion functions from the CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-08 Daniel Engel 

* config/arm/bpabi-lib.h: Remove obsolete RENAME_LIBRARY directives.
* config/arm/eabi/ffloat.S: New file for __float[un]sisf/disf().
* config/arm/lib1funcs.S: #include eabi/ffloat.S (v6m only).
* config/arm/t-elf: Add _float[un]sisf/disf objects to LIB1ASMFUNCS.
---
 libgcc/config/arm/bpabi-lib.h   |   6 -
 libgcc/config/arm/eabi/ffloat.S | 249 
 libgcc/config/arm/lib1funcs.S   |   1 +
 libgcc/config/arm/t-elf |   5 +-
 4 files changed, 254 insertions(+), 7 deletions(-)
 create mode 100644 libgcc/config/arm/eabi/ffloat.S

diff --git a/libgcc/config/arm/bpabi-lib.h b/libgcc/config/arm/bpabi-lib.h
index 3cb90b4b345..1e651ead4ac 100644
--- a/libgcc/config/arm/bpabi-lib.h
+++ b/libgcc/config/arm/bpabi-lib.h
@@ -56,9 +56,6 @@
 #ifdef L_floatdidf
 #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatdidf, l2d)
 #endif
-#ifdef L_floatdisf
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatdisf, l2f)
-#endif
 
 /* These renames are needed on ARMv6M.  Other targets get them from
assembly routines.  */
@@ -71,9 +68,6 @@
 #ifdef L_floatundidf
 #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatundidf, ul2d)
 #endif
-#ifdef L_floatundisf
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatundisf, ul2f)
-#endif
 
 /* For ARM bpabi, we only want to use a "__gnu_" prefix for the fixed-point
helper functions - not everything in libgcc - in the interests of
diff --git a/libgcc/config/arm/eabi/ffloat.S b/libgcc/config/arm/eabi/ffloat.S
new file mode 100644
index 000..eadc9d8d08e
--- /dev/null
+++ b/libgcc/config/arm/eabi/ffloat.S
@@ -0,0 +1,249 @@
+/* ffixed.S: Thumb-1 optimized integer-to-float conversion
+
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+
+#ifdef L_arm_floatsisf
+
+// float __aeabi_i2f(int)
+// Converts a signed integer in $r0 to float.
+
+// On little-endian cores (including all Cortex-M), __floatsisf() can be
+//  implemented as below in 5 instructions.  However, it can also be
+//  implemented by prefixing a single instruction to __floatdisf().
+// A memory savings of 4 instructions at a cost of only 2 execution cycles
+//  seems reasonable enough.  Plus, the trade-off only happens in programs
+//  that require both __floatsisf() and __floatdisf().  Programs only using
+//  __floatsisf() always get the smallest version.
+// When the combined version will be provided, this standalone version
+//  must be declared WEAK, so that the combined version can supersede it.
+// '_arm_floatsisf' should appear before '_arm_floatdisf' in LIB1ASMFUNCS.
+// Same parent section as __ul2f() to keep tail call branch within range.
+#if defined(__OPTIMIZE_SIZE__) && __OPTIMIZE_SIZE__
+WEAK_START_SECTION aeabi_i2f .text.sorted.libgcc.fpcore.p.floatsisf
+WEAK_ALIAS floatsisf aeabi_i2f
+CFI_START_FUNCTION
+
+#else /* !__OPTIMIZE_SIZE__ */
+FUNC_START_SECTION aeabi_i2f .text.sorted.libgcc.fpcore.p.floatsisf
+FUNC_ALIAS floatsisf aeabi_i2f
+CFI_START_FUNCTION
+
+#endif /* !__OPTIMIZE_SIZE__ */
+
+// Save the sign.
+asrsr3, r0, #31
+
+// Absolute value of the input.
+eorsr0, r3
+subsr0, r3
+
+// Sign extension to long long unsigned.
+eorsr1, r1
+b   SYM(__internal_floatundisf_noswap)
+
+CFI_END_FUNCTION
+FUNC_END floatsisf
+FUNC_END aeabi_i2f
+
+#endif /* L_arm_floatsisf */
+
+
+#ifdef L_arm_floatdisf
+
+// float __aeabi_l2f(long long)
+// Converts a signed 64-bit integer in $r1:$r0 to a float in $r0.
+// See build comments for __floatsisf() above.
+// Same parent section as __ul2f() to keep tail call branch within range.
+#if defined(__OPTIMIZE_SIZE__) && __OPTIMIZE_SIZE__
+FUNC_START_SECTION aeabi_i2f .text.sorted.libgcc.fpcore.p.floatdisf
+FUNC_ALIAS floatsisf aeabi_i2f
+CFI_START_FUNCTION
+
+  #if defined(__ARMEB__) && __ARMEB__
+ 

[PATCH v4 27/29] Import float<->double conversion functions from the CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-08 Daniel Engel 

* config/arm/eabi/fcast.S: New file for __aeabi_f2d/__extendsfdf2()
__aeabi_d2f(), __truncdfsf2().
* config/arm/lib1funcs.S: #include eabi/fcast.S (v6m only).
* config/arm/t-elf: Add _arm_d2f and _arm_f2d objects to LIB1ASMFUNCS.
---
 libgcc/config/arm/eabi/fcast.S | 256 +
 libgcc/config/arm/lib1funcs.S  |   1 +
 libgcc/config/arm/t-elf|   2 +
 3 files changed, 259 insertions(+)
 create mode 100644 libgcc/config/arm/eabi/fcast.S

diff --git a/libgcc/config/arm/eabi/fcast.S b/libgcc/config/arm/eabi/fcast.S
new file mode 100644
index 000..b1184ee1d53
--- /dev/null
+++ b/libgcc/config/arm/eabi/fcast.S
@@ -0,0 +1,256 @@
+/* fcast.S: Thumb-1 optimized 32- and 64-bit float conversions
+
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+
+#ifdef L_arm_f2d
+
+// double __aeabi_f2d(float)
+// Converts a single-precision float in $r0 to double-precision in $r1:$r0.
+// Rounding, overflow, and underflow are impossible.
+// INF and ZERO are returned unmodified.
+FUNC_START_SECTION aeabi_f2d .text.sorted.libgcc.fpcore.v.f2d
+FUNC_ALIAS extendsfdf2 aeabi_f2d
+CFI_START_FUNCTION
+
+// Save the sign.
+lsrsr1, r0, #31
+lslsr1, #31
+
+// Set up registers for __fp_normalize2().
+push{ rT, lr }
+.cfi_remember_state
+.cfi_adjust_cfa_offset 8
+.cfi_rel_offset rT, 0
+.cfi_rel_offset lr, 4
+
+// Test for zero.
+lslsr0, #1
+beq LLSYM(__f2d_return)
+
+// Split the exponent and mantissa into separate registers.
+// This is the most efficient way to convert subnormals in the
+//  half-precision form into normals in single-precision.
+// This does add a leading implicit '1' to INF and NAN,
+//  but that will be absorbed when the value is re-assembled.
+movsr2, r0
+bl  SYM(__fp_normalize2) __PLT__
+
+// Set up the exponent bias.  For INF/NAN values, the bias
+//  is 1791 (2047 - 255 - 1), where the last '1' accounts
+//  for the implicit '1' in the mantissa.
+movsr0, #3
+lslsr0, #9
+addsr0, #255
+
+// Test for INF/NAN, promote exponent if necessary
+cmp r2, #255
+beq LLSYM(__f2d_indefinite)
+
+// For normal values, the exponent bias is 895 (1023 - 127 - 1),
+//  which is half of the prepared INF/NAN bias.
+lsrsr0, #1
+
+LLSYM(__f2d_indefinite):
+// Assemble exponent with bias correction.
+addsr2, r0
+lslsr2, #20
+addsr1, r2
+
+// Assemble the high word of the mantissa.
+lsrsr0, r3, #11
+add r1, r0
+
+// Remainder of the mantissa in the low word of the result.
+lslsr0, r3, #21
+
+LLSYM(__f2d_return):
+pop { rT, pc }
+.cfi_restore_state
+
+CFI_END_FUNCTION
+FUNC_END extendsfdf2
+FUNC_END aeabi_f2d
+
+#endif /* L_arm_f2d */
+
+
+#if defined(L_arm_d2f) || defined(L_arm_truncdfsf2)
+
+// HACK: Build two separate implementations:
+//  * __aeabi_d2f() rounds to nearest per traditional IEEE-753 rules.
+//  * __truncdfsf2() rounds towards zero per GCC specification.
+// Presumably, a program will consistently use one ABI or the other,
+//  which means that code size will not be duplicated in practice.
+// Merging two versions with dynamic rounding would be rather hard.
+#ifdef L_arm_truncdfsf2
+  #define D2F_NAME truncdfsf2
+  #define D2F_SECTION .text.sorted.libgcc.fpcore.x.truncdfsf2
+#else
+  #define D2F_NAME aeabi_d2f
+  #define D2F_SECTION .text.sorted.libgcc.fpcore.w.d2f
+#endif
+
+// float __aeabi_d2f(double)
+// Converts a double-precision float in $r1:

[PATCH v4 28/29] Import float<->__fp16 conversion functions from the CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-09 Daniel Engel 

* config/arm/eabi/fcast.S: Add __aeabi_f2h*() and __aeabi_h2f*().
* config/arm/fp16: Disable duplicate C routines (v6m only).
* config/arm/t-bpabi: Add _arm_f2h* and _arm_h2f* objects to 
LIB1ASMFUNCS.
---
 libgcc/config/arm/eabi/fcast.S | 277 +
 libgcc/config/arm/fp16.c   |   4 +
 libgcc/config/arm/t-bpabi  |   7 +
 3 files changed, 288 insertions(+)

diff --git a/libgcc/config/arm/eabi/fcast.S b/libgcc/config/arm/eabi/fcast.S
index b1184ee1d53..1783a161912 100644
--- a/libgcc/config/arm/eabi/fcast.S
+++ b/libgcc/config/arm/eabi/fcast.S
@@ -254,3 +254,280 @@ FUNC_END D2F_NAME
 
 #endif /* L_arm_d2f || L_arm_truncdfsf2 */
 
+
+#if defined(L_aeabi_h2f_ieee) || defined(L_aeabi_h2f_alt)
+
+#ifdef L_aeabi_h2f_ieee
+  #define H2F_NAME aeabi_h2f
+  #define H2F_ALIAS gnu_h2f_ieee
+#else
+  #define H2F_NAME aeabi_h2f_alt
+  #define H2F_ALIAS gnu_h2f_alternative
+#endif
+
+// float __aeabi_h2f(short hf)
+// float __aeabi_h2f_alt(short hf)
+// Converts a half-precision float in $r0 to single-precision.
+// Rounding, overflow, and underflow conditions are impossible.
+// In IEEE mode, INF, ZERO, and NAN are returned unmodified.
+FUNC_START_SECTION H2F_NAME .text.sorted.libgcc.h2f
+FUNC_ALIAS H2F_ALIAS H2F_NAME
+CFI_START_FUNCTION
+
+// Set up registers for __fp_normalize2().
+push{ rT, lr }
+.cfi_remember_state
+.cfi_adjust_cfa_offset 8
+.cfi_rel_offset rT, 0
+.cfi_rel_offset lr, 4
+
+// Save the mantissa and exponent.
+lslsr2, r0, #17
+
+// Isolate the sign.
+lsrsr0, #15
+lslsr0, #31
+
+// Align the exponent at bit[24] for normalization.
+// If zero, return the original sign.
+lsrsr2, #3
+
+  #ifdef __ARM_FEATURE_IT
+do_it   eq
+RETc(eq)
+  #else
+beq LLSYM(__h2f_return)
+  #endif
+
+// Split the exponent and mantissa into separate registers.
+// This is the most efficient way to convert subnormals in the
+//  half-precision form into normals in single-precision.
+// This does add a leading implicit '1' to INF and NAN,
+//  but that will be absorbed when the value is re-assembled.
+bl  SYM(__fp_normalize2) __PLT__
+
+   #ifdef L_aeabi_h2f_ieee
+// Set up the exponent bias.  For INF/NAN values, the bias is 223,
+//  where the last '1' accounts for the implicit '1' in the mantissa.
+addsr2, #(255 - 31 - 1)
+
+// Test for INF/NAN.
+cmp r2, #254
+
+  #ifdef __ARM_FEATURE_IT
+do_it   ne
+  #else
+beq LLSYM(__h2f_assemble)
+  #endif
+
+// For normal values, the bias should have been 111.
+// However, this offset must be adjusted per the INF check above.
+ IT(sub,ne) r2, #((255 - 31 - 1) - (127 - 15 - 1))
+
+#else /* L_aeabi_h2f_alt */
+// Set up the exponent bias.  All values are normal.
+addsr2, #(127 - 15 - 1)
+#endif
+
+LLSYM(__h2f_assemble):
+// Combine exponent and sign.
+lslsr2, #23
+addsr0, r2
+
+// Combine mantissa.
+lsrsr3, #8
+add r0, r3
+
+LLSYM(__h2f_return):
+pop { rT, pc }
+.cfi_restore_state
+
+CFI_END_FUNCTION
+FUNC_END H2F_NAME
+FUNC_END H2F_ALIAS
+
+#endif /* L_aeabi_h2f_ieee || L_aeabi_h2f_alt */
+
+
+#if defined(L_aeabi_f2h_ieee) || defined(L_aeabi_f2h_alt)
+
+#ifdef L_aeabi_f2h_ieee
+  #define F2H_NAME aeabi_f2h
+  #define F2H_ALIAS gnu_f2h_ieee
+#else
+  #define F2H_NAME aeabi_f2h_alt
+  #define F2H_ALIAS gnu_f2h_alternative
+#endif
+
+// short __aeabi_f2h(float f)
+// short __aeabi_f2h_alt(float f)
+// Converts a single-precision float in $r0 to half-precision,
+//  rounding to nearest, ties to even.
+// Values out of range are forced to either ZERO or INF.
+// In IEEE mode, the upper 12 bits of a NAN will be preserved.
+FUNC_START_SECTION F2H_NAME .text.sorted.libgcc.f2h
+FUNC_ALIAS F2H_ALIAS F2H_NAME
+CFI_START_FUNCTION
+
+// Set up the sign.
+lsrsr2, r0, #31
+lslsr2, #15
+
+// Save the exponent and mantissa.
+// If ZERO, return the original sign.
+lslsr0, #1
+
+  #ifdef __ARM_FEATURE_IT
+do_it   ne,t
+addne   r0, r2
+RETc(ne)
+  #else
+beq LLSYM(__f2h_return)
+  #endif
+
+// Isolate the exponent.
+lsrsr1, r0, #24
+
+  #ifdef L_aeabi_f2h_ieee
+// Check for NAN.
+cmp r1, #255
+beq LLSYM(__f2h_indefinite)
+
+// Check for overflow.
+cmp r1, #(127 + 15)
+bhi LLSYM(__f2h_overflow)
+
+  #else /* L_aeabi_f2h_alt 

[PATCH v4 18/29] Import new integer multiplication functions from the CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-07 Daniel Engel 

* config/arm/eabi/lmul.S: New file for __muldi3(), __mulsidi3(), and
 __umulsidi3().
* config/arm/lib1funcs.S: #eabi/lmul.S (v6m only).
* config/arm/t-elf: Add the new objects to LIB1ASMFUNCS.
---
 libgcc/config/arm/eabi/lmul.S | 218 ++
 libgcc/config/arm/lib1funcs.S |   1 +
 libgcc/config/arm/t-elf   |  13 +-
 3 files changed, 230 insertions(+), 2 deletions(-)
 create mode 100644 libgcc/config/arm/eabi/lmul.S

diff --git a/libgcc/config/arm/eabi/lmul.S b/libgcc/config/arm/eabi/lmul.S
new file mode 100644
index 000..9fec4364a26
--- /dev/null
+++ b/libgcc/config/arm/eabi/lmul.S
@@ -0,0 +1,218 @@
+/* lmul.S: Thumb-1 optimized 64-bit integer multiplication
+
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+
+#ifdef L_muldi3
+
+// long long __aeabi_lmul(long long, long long)
+// Returns the least significant 64 bits of a 64 bit multiplication.
+// Expects the two multiplicands in $r1:$r0 and $r3:$r2.
+// Returns the product in $r1:$r0 (does not distinguish signed types).
+// Uses $r4 and $r5 as scratch space.
+// Same parent section as __umulsidi3() to keep tail call branch within range.
+FUNC_START_SECTION muldi3 .text.sorted.libgcc.lmul.muldi3
+
+#ifndef __symbian__
+  FUNC_ALIAS aeabi_lmul muldi3
+#endif
+
+CFI_START_FUNCTION
+
+// $r1:$r0 = 0x
+// $r3:$r2 = 0x
+
+// The following operations that only affect the upper 64 bits
+//  can be safely discarded:
+//    * 
+//    * 
+//    * 
+//    * 
+//    * 
+//    * 
+
+// MAYBE: Test for multiply by ZERO on implementations with a 32-cycle
+//  'muls' instruction, and skip over the operation in that case.
+
+// (0x * 0x), free $r1
+mulsxxh,yyl
+
+// (0x * 0x), free $r3
+mulsyyh,xxl
+addsyyh,xxh
+
+// Put the parameters in the correct form for umulsidi3().
+movsxxh,yyl
+b   LLSYM(__mul_overflow)
+
+CFI_END_FUNCTION
+FUNC_END muldi3
+
+#ifndef __symbian__
+  FUNC_END aeabi_lmul
+#endif
+
+#endif /* L_muldi3 */
+
+
+// The following implementation of __umulsidi3() integrates with __muldi3()
+//  above to allow the fast tail call while still preserving the extra
+//  hi-shifted bits of the result.  However, these extra bits add a few
+//  instructions not otherwise required when using only __umulsidi3().
+// Therefore, this block configures __umulsidi3() for compilation twice.
+// The first version is a minimal standalone implementation, and the second
+//  version adds the hi bits of __muldi3().  The standalone version must
+//  be declared WEAK, so that the combined version can supersede it and
+//  provide both symbols in programs that multiply long doubles.
+// This means '_umulsidi3' should appear before '_muldi3' in LIB1ASMFUNCS.
+#if defined(L_muldi3) || defined(L_umulsidi3)
+
+#ifdef L_umulsidi3
+// unsigned long long __umulsidi3(unsigned int, unsigned int)
+// Returns all 64 bits of a 32 bit multiplication.
+// Expects the two multiplicands in $r0 and $r1.
+// Returns the product in $r1:$r0.
+// Uses $r3, $r4 and $ip as scratch space.
+WEAK_START_SECTION umulsidi3 .text.sorted.libgcc.lmul.umulsidi3
+CFI_START_FUNCTION
+
+#else /* L_muldi3 */
+FUNC_ENTRY umulsidi3
+CFI_START_FUNCTION
+
+// 32x32 multiply with 64 bit result.
+// Expand the multiply into 4 parts, since muls only returns 32 bits.
+// (a16h * b16h / 2^32)
+//   + (a16h * b16l / 2^48) + (a16l * b16h / 2^48)
+//   + (a16l * b16l / 2^64)
+
+// MAYBE: Test for multiply by 0 on implementations with a 32-cycle
+//  'muls' instruction, and skip over the operation i

[PATCH v4 21/29] Refactor Thumb-1 floating point subtraction into a new file.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-07 Daniel Engel 

* config/arm/bpabi-v6m.S: Moved __aeabi_frsub() to
* config/arm/eabi/fadd.S: New file.
* config/arm/lib1funcs.S: #include eabi/fadd.S (v6m only).
---
 libgcc/config/arm/bpabi-v6m.S | 16 
 libgcc/config/arm/eabi/fadd.S | 16 
 libgcc/config/arm/lib1funcs.S |  1 +
 3 files changed, 17 insertions(+), 16 deletions(-)
 create mode 100644 libgcc/config/arm/eabi/fadd.S

diff --git a/libgcc/config/arm/bpabi-v6m.S b/libgcc/config/arm/bpabi-v6m.S
index 7c874f06218..c76c3b0568b 100644
--- a/libgcc/config/arm/bpabi-v6m.S
+++ b/libgcc/config/arm/bpabi-v6m.S
@@ -33,22 +33,6 @@
.eabi_attribute 25, 1
 #endif /* __ARM_EABI__ */
 
-
-#ifdef L_arm_addsubsf3
-
-FUNC_START aeabi_frsub
-
-  push {r4, lr}
-  movs r4, #1
-  lsls r4, #31
-  eors r0, r0, r4
-  bl   __aeabi_fadd
-  pop  {r4, pc}
-
-  FUNC_END aeabi_frsub
-
-#endif /* L_arm_addsubsf3 */
-
 #ifdef L_arm_addsubdf3
 
 FUNC_START aeabi_drsub
diff --git a/libgcc/config/arm/eabi/fadd.S b/libgcc/config/arm/eabi/fadd.S
new file mode 100644
index 000..223e38f7e50
--- /dev/null
+++ b/libgcc/config/arm/eabi/fadd.S
@@ -0,0 +1,16 @@
+
+#ifdef L_arm_addsubsf3
+
+FUNC_START aeabi_frsub
+
+  push {r4, lr}
+  movs r4, #1
+  lsls r4, #31
+  eors r0, r0, r4
+  bl   __aeabi_fadd
+  pop  {r4, pc}
+
+  FUNC_END aeabi_frsub
+
+#endif /* L_arm_addsubsf3 */
+
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index ed0b1eb1041..e439449422f 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -1946,6 +1946,7 @@ LSYM(Lchange_\register):
 #include "bpabi-v6m.S"
 #include "eabi/fplib.h"
 #include "eabi/fcmp.S"
+#include "eabi/fadd.S"
 #endif /* NOT_ISA_TARGET_32BIT */
 #include "eabi/lcmp.S"
 #endif /* !__symbian__ */
-- 
2.25.1



[PATCH v4 20/29] Import single precision comparison functions from the CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

These functions are significantly smaller and faster than the wrapper
functions and soft-float implementation they replace.  Using the first
comparison operator (e.g. '<=') costs about 70 bytes initially, but every
additional operator incrementally adds just 4 bytes.

NOTE: It seems that the __aeabi_cfcmp*() routines formerly in bpabi-v6m.S
were not well tested, as they produced the wrong output for the 'C' flag.
The replacement functions are tested correct.

gcc/libgcc/ChangeLog:
2021-01-07 Daniel Engel 

* config/arm/eabi/fcmp.S: New __aeabi_fcmp() function family.
* config/arm/eabi/fplib.h: New file with fcmp-specific constants
and general library configuration macros.
* config/arm/lib1funcs.S: #include eabi/fplib.h (v6m only).
* config/arm/t-elf: Add _fcmp family members to LIB1ASMFUNCS.
---
 libgcc/config/arm/eabi/fcmp.S  | 660 ++---
 libgcc/config/arm/eabi/fplib.h |  83 +
 libgcc/config/arm/lib1funcs.S  |   1 +
 libgcc/config/arm/t-elf|  18 +
 4 files changed, 702 insertions(+), 60 deletions(-)
 create mode 100644 libgcc/config/arm/eabi/fplib.h

diff --git a/libgcc/config/arm/eabi/fcmp.S b/libgcc/config/arm/eabi/fcmp.S
index 3d02e191a43..cada33f4d35 100644
--- a/libgcc/config/arm/eabi/fcmp.S
+++ b/libgcc/config/arm/eabi/fcmp.S
@@ -1,64 +1,604 @@
+/* fcmp.S: Thumb-1 optimized 32-bit float comparison
 
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+
+// The various compare functions in this file all expect to tail call 
__cmpsf2()
+//  with flags set for a particular comparison mode.  The __internal_cmpsf2()
+//  symbol  itself is unambiguous, but there is a remote risk that the linker 
+//  will prefer some other symbol in place of __cmpsf2().  Importing an archive
+//  file that also exports __cmpsf2() will throw an error in this case.
+// As a workaround, this block configures __aeabi_f2lz() for compilation twice.
+// The first version configures __internal_cmpsf2() as a WEAK standalone 
symbol,
+//  and the second exports __cmpsf2() and __internal_cmpsf2() normally.
+// A small bonus: programs not using __cmpsf2() itself will be slightly 
smaller.
+// 'L_internal_cmpsf2' should appear before 'L_arm_cmpsf2' in LIB1ASMFUNCS.
+#if defined(L_arm_cmpsf2) || defined(L_internal_cmpsf2)
+
+#define CMPSF2_SECTION .text.sorted.libgcc.fcmp.cmpsf2
+
+// int __cmpsf2(float, float)
+// 
+// Returns the three-way comparison result of $r0 with $r1:
+//  * +1 if ($r0 > $r1), or either argument is NAN
+//  *  0 if ($r0 == $r1)
+//  * -1 if ($r0 < $r1)
+// Uses $r2, $r3, and $ip as scratch space.
 #ifdef L_arm_cmpsf2
+FUNC_START_SECTION cmpsf2 CMPSF2_SECTION
+FUNC_ALIAS lesf2 cmpsf2
+FUNC_ALIAS ltsf2 cmpsf2
+CFI_START_FUNCTION
+
+// Assumption: The 'libgcc' functions should raise exceptions.
+movsr2, #(FCMP_UN_POSITIVE + FCMP_RAISE_EXCEPTIONS + FCMP_3WAY)
+
+// int,int __internal_cmpsf2(float, float, int)
+// Internal function expects a set of control flags in $r2.
+// If ordered, returns a comparison type { 0, 1, 2 } in $r3
+FUNC_ENTRY internal_cmpsf2
+
+#else /* L_internal_cmpsf2 */
+WEAK_START_SECTION internal_cmpsf2 CMPSF2_SECTION
+CFI_START_FUNCTION
+
+#endif 
+
+// When operand signs are considered, the comparison result falls
+//  within one of the following quadrants:
+//
+// $r0  $r1  $r0-$r1* flags  result
+//  ++  >  C=0 GT
+//  ++  =  Z=1 EQ
+//  ++  <  C=1 LT
+//  +-  >  C=1 GT
+//  +-  =  C=1 GT
+//  +-  <  C=1 GT
+//  -+  >  C=0 LT
+//  -+  =  C=0 LT
+//  -+  <  C=0 LT
+//  --  >  C=0 LT
+//  -- 

[PATCH v4 19/29] Refactor Thumb-1 single precision comparison functions into a new file.

2021-01-11 Thread gnu
From: Daniel Engel 

gcc/libgcc/ChangeLog:
2021-01-07 Daniel Engel 

* config/arm/bpabi-v6m.S: Moved __aeabi_fcmp*() functions to
* config/arm/eabi/fcmp.S: New file.
* config/arm/lib1funcs.S: #include eabi/fcmp.S (v6m only).
---
 libgcc/config/arm/bpabi-v6m.S | 63 --
 libgcc/config/arm/eabi/fcmp.S | 64 +++
 libgcc/config/arm/lib1funcs.S |  1 +
 3 files changed, 65 insertions(+), 63 deletions(-)
 create mode 100644 libgcc/config/arm/eabi/fcmp.S

diff --git a/libgcc/config/arm/bpabi-v6m.S b/libgcc/config/arm/bpabi-v6m.S
index b3dc3bf8f4d..7c874f06218 100644
--- a/libgcc/config/arm/bpabi-v6m.S
+++ b/libgcc/config/arm/bpabi-v6m.S
@@ -49,69 +49,6 @@ FUNC_START aeabi_frsub
 
 #endif /* L_arm_addsubsf3 */
 
-#ifdef L_arm_cmpsf2
-
-FUNC_START aeabi_cfrcmple
-
-   mov ip, r0
-   movsr0, r1
-   mov r1, ip
-   b   6f
-
-FUNC_START aeabi_cfcmpeq
-FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
-
-   @ The status-returning routines are required to preserve all
-   @ registers except ip, lr, and cpsr.
-6: push{r0, r1, r2, r3, r4, lr}
-   bl  __lesf2
-   @ Set the Z flag correctly, and the C flag unconditionally.
-   cmp r0, #0
-   @ Clear the C flag if the return value was -1, indicating
-   @ that the first operand was smaller than the second.
-   bmi 1f
-   movsr1, #0
-   cmn r0, r1
-1:
-   pop {r0, r1, r2, r3, r4, pc}
-
-   FUNC_END aeabi_cfcmple
-   FUNC_END aeabi_cfcmpeq
-   FUNC_END aeabi_cfrcmple
-
-FUNC_START aeabi_fcmpeq
-
-   push{r4, lr}
-   bl  __eqsf2
-   negsr0, r0
-   addsr0, r0, #1
-   pop {r4, pc}
-
-   FUNC_END aeabi_fcmpeq
-
-.macro COMPARISON cond, helper, mode=sf2
-FUNC_START aeabi_fcmp\cond
-
-   push{r4, lr}
-   bl  __\helper\mode
-   cmp r0, #0
-   b\cond  1f
-   movsr0, #0
-   pop {r4, pc}
-1:
-   movsr0, #1
-   pop {r4, pc}
-
-   FUNC_END aeabi_fcmp\cond
-.endm
-
-COMPARISON lt, le
-COMPARISON le, le
-COMPARISON gt, ge
-COMPARISON ge, ge
-
-#endif /* L_arm_cmpsf2 */
-
 #ifdef L_arm_addsubdf3
 
 FUNC_START aeabi_drsub
diff --git a/libgcc/config/arm/eabi/fcmp.S b/libgcc/config/arm/eabi/fcmp.S
new file mode 100644
index 000..3d02e191a43
--- /dev/null
+++ b/libgcc/config/arm/eabi/fcmp.S
@@ -0,0 +1,64 @@
+
+#ifdef L_arm_cmpsf2
+
+FUNC_START aeabi_cfrcmple
+
+   mov ip, r0
+   movsr0, r1
+   mov r1, ip
+   b   6f
+
+FUNC_START aeabi_cfcmpeq
+FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+   @ The status-returning routines are required to preserve all
+   @ registers except ip, lr, and cpsr.
+6: push{r0, r1, r2, r3, r4, lr}
+   bl  __lesf2
+   @ Set the Z flag correctly, and the C flag unconditionally.
+   cmp r0, #0
+   @ Clear the C flag if the return value was -1, indicating
+   @ that the first operand was smaller than the second.
+   bmi 1f
+   movsr1, #0
+   cmn r0, r1
+1:
+   pop {r0, r1, r2, r3, r4, pc}
+
+   FUNC_END aeabi_cfcmple
+   FUNC_END aeabi_cfcmpeq
+   FUNC_END aeabi_cfrcmple
+
+FUNC_START aeabi_fcmpeq
+
+   push{r4, lr}
+   bl  __eqsf2
+   negsr0, r0
+   addsr0, r0, #1
+   pop {r4, pc}
+
+   FUNC_END aeabi_fcmpeq
+
+.macro COMPARISON cond, helper, mode=sf2
+FUNC_START aeabi_fcmp\cond
+
+   push{r4, lr}
+   bl  __\helper\mode
+   cmp r0, #0
+   b\cond  1f
+   movsr0, #0
+   pop {r4, pc}
+1:
+   movsr0, #1
+   pop {r4, pc}
+
+   FUNC_END aeabi_fcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpsf2 */
+
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index a8afe78a69c..bd41ea79283 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -1944,6 +1944,7 @@ LSYM(Lchange_\register):
 #include "bpabi.S"
 #else /* NOT_ISA_TARGET_32BIT */
 #include "bpabi-v6m.S"
+#include "eabi/fcmp.S"
 #endif /* NOT_ISA_TARGET_32BIT */
 #include "eabi/lcmp.S"
 #endif /* !__symbian__ */
-- 
2.25.1



[PATCH v4 07/29] Import replacement 64-bit shift functions from CM0 library.

2021-01-11 Thread gnu
From: Daniel Engel 

In Thumb mode, the new functions are each 1-2 instructions smaller
and faster, and branchless when the IT instruction is available.
The ARM versions were converted to the "xxl/xxh" big-endian register
convention, but are otherwise unchanged.

gcc/libgcc/ChangeLog:
2021-01-07 Daniel Engel 

* config/arm/bits/shift.S: Faster thumb versions;
updated big-endian register convention to "xxl/xxh".
---
 libgcc/config/arm/bits/shift.S | 327 +++--
 1 file changed, 233 insertions(+), 94 deletions(-)

diff --git a/libgcc/config/arm/bits/shift.S b/libgcc/config/arm/bits/shift.S
index 94e466ac0d2..16cf2dcef04 100644
--- a/libgcc/config/arm/bits/shift.S
+++ b/libgcc/config/arm/bits/shift.S
@@ -1,102 +1,241 @@
+/* lshift.S: ARM optimized 64-bit integer shift
+
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
 
 #ifdef L_lshrdi3
 
-   FUNC_START lshrdi3
-   FUNC_ALIAS aeabi_llsr lshrdi3
-   
-#ifdef __thumb__
-   lsrsal, r2
-   movsr3, ah
-   lsrsah, r2
-   mov ip, r3
-   subsr2, #32
-   lsrsr3, r2
-   orrsal, r3
-   negsr2, r2
-   mov r3, ip
-   lslsr3, r2
-   orrsal, r3
-   RET
-#else
-   subsr3, r2, #32
-   rsb ip, r2, #32
-   movmi   al, al, lsr r2
-   movpl   al, ah, lsr r3
-   orrmi   al, al, ah, lsl ip
-   mov ah, ah, lsr r2
-   RET
-#endif
-   FUNC_END aeabi_llsr
-   FUNC_END lshrdi3
-
-#endif
-   
+// long long __aeabi_llsr(long long, int)
+// Logical shift right the 64 bit value in $r1:$r0 by the count in $r2.
+// The result is only guaranteed for shifts in the range of '0' to '63'.
+// Uses $r3 as scratch space.
+FUNC_START_SECTION aeabi_llsr .text.sorted.libgcc.lshrdi3
+FUNC_ALIAS lshrdi3 aeabi_llsr
+CFI_START_FUNCTION
+
+  #if defined(__thumb__) && __thumb__
+
+// Save a copy for the remainder.
+movsr3, xxh
+
+// Assume a simple shift.
+lsrsxxl,r2
+lsrsxxh,r2
+
+// Test if the shift distance is larger than 1 word.
+subsr2, #32
+
+#ifdef __HAVE_FEATURE_IT
+do_it   lo,te
+
+// The remainder is opposite the main shift, (32 - x) bits.
+rsblo   r2, #0
+lsllo   r3, r2
+
+// The remainder shift extends into the hi word.
+lsrhs   r3, r2
+
+#else /* !__HAVE_FEATURE_IT */
+bhs LLSYM(__llsr_large)
+
+// The remainder is opposite the main shift, (32 - x) bits.
+rsbsr2, #0
+lslsr3, r2
+
+// Cancel any remaining shift.
+eorsr2, r2
+
+  LLSYM(__llsr_large):
+// Apply any remaining shift to the hi word.
+lsrsr3, r2
+
+#endif /* !__HAVE_FEATURE_IT */
+
+// Merge remainder and result.
+addsxxl,r3
+RET
+
+  #else /* !__thumb__ */
+
+subsr3, r2, #32
+rsb ip, r2, #32
+movmi   xxl,xxl,lsr r2
+movpl   xxl,xxh,lsr r3
+orrmi   xxl,xxl,xxh,lsl ip
+mov xxh,xxh,lsr r2
+RET
+
+  #endif /* !__thumb__ */
+
+
+CFI_END_FUNCTION
+FUNC_END lshrdi3
+FUNC_END aeabi_llsr
+
+#endif /* L_lshrdi3 */
+
+
 #ifdef L_ashrdi3
-   
-   FUNC_START ashrdi3
-   FUNC_ALIAS aeabi_lasr ashrdi3
-   
-#ifdef __thumb__
-   lsrsal, r2
-   movsr3, ah
-   asrsah, r2
-   subsr2, #32
-   @ If r2 is negative at this point the following step would OR
-   @ the sign bit into all of AL.  That's not what we want...
-   bmi 1f
-   mov ip, r3
-   asrsr3, r2
-   orrsal, r3
-   mov r3, ip
-1:
-   negsr2, r2
-   lslsr3, r2
-   orrsal, r3
-   RET
-#else
-   subsr3, r2, #32
-   rsb ip, r2, #32
-   movmi 

  1   2   >