PING: [PATCH] libstdc++: use __bool_constant instead of integral_constant

2023-04-07 Thread Ken Matsui via Gcc-patches
Ping for this patch.

On Thu, Mar 23, 2023 at 4:06 AM Ken Matsui  wrote:
>
> In the type_traits header, both integral_constant and __bool_constant
> are used. This patch unifies those usages into __bool_constant.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits: Use __bool_constant instead of
> integral_constant.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 32 ++--
>  1 file changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 2bd607a8b8f..bc6982f9e64 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -578,19 +578,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>/// is_enum
>template
>  struct is_enum
> -: public integral_constant
> +: public __bool_constant<__is_enum(_Tp)>
>  { };
>
>/// is_union
>template
>  struct is_union
> -: public integral_constant
> +: public __bool_constant<__is_union(_Tp)>
>  { };
>
>/// is_class
>template
>  struct is_class
> -: public integral_constant
> +: public __bool_constant<__is_class(_Tp)>
>  { };
>
>/// is_function
> @@ -784,7 +784,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>/// is_trivial
>template
>  struct is_trivial
> -: public integral_constant
> +: public __bool_constant<__is_trivial(_Tp)>
>  {
>static_assert(std::__is_complete_or_unbounded(__type_identity<_Tp>{}),
> "template argument must be a complete class or an unbounded array");
> @@ -793,7 +793,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>/// is_trivially_copyable
>template
>  struct is_trivially_copyable
> -: public integral_constant
> +: public __bool_constant<__is_trivially_copyable(_Tp)>
>  {
>static_assert(std::__is_complete_or_unbounded(__type_identity<_Tp>{}),
> "template argument must be a complete class or an unbounded array");
> @@ -802,7 +802,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>/// is_standard_layout
>template
>  struct is_standard_layout
> -: public integral_constant
> +: public __bool_constant<__is_standard_layout(_Tp)>
>  {
>static_assert(std::__is_complete_or_unbounded(__type_identity<_Tp>{}),
> "template argument must be a complete class or an unbounded array");
> @@ -817,7 +817,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  struct
>  _GLIBCXX20_DEPRECATED_SUGGEST("is_standard_layout && is_trivial")
>  is_pod
> -: public integral_constant
> +: public __bool_constant<__is_pod(_Tp)>
>  {
>static_assert(std::__is_complete_or_unbounded(__type_identity<_Tp>{}),
> "template argument must be a complete class or an unbounded array");
> @@ -831,7 +831,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  struct
>  _GLIBCXX17_DEPRECATED
>  is_literal_type
> -: public integral_constant
> +: public __bool_constant<__is_literal_type(_Tp)>
>  {
>static_assert(std::__is_complete_or_unbounded(__type_identity<_Tp>{}),
> "template argument must be a complete class or an unbounded array");
> @@ -840,13 +840,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>/// is_empty
>template
>  struct is_empty
> -: public integral_constant
> +: public __bool_constant<__is_empty(_Tp)>
>  { };
>
>/// is_polymorphic
>template
>  struct is_polymorphic
> -: public integral_constant
> +: public __bool_constant<__is_polymorphic(_Tp)>
>  { };
>
>  #if __cplusplus >= 201402L
> @@ -855,14 +855,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>/// @since C++14
>template
>  struct is_final
> -: public integral_constant
> +: public __bool_constant<__is_final(_Tp)>
>  { };
>  #endif
>
>/// is_abstract
>template
>  struct is_abstract
> -: public integral_constant
> +: public __bool_constant<__is_abstract(_Tp)>
>  { };
>
>/// @cond undocumented
> @@ -873,7 +873,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>template
>  struct __is_signed_helper<_Tp, true>
> -: public integral_constant
> +: public __bool_constant<_Tp(-1) < _Tp(0)>
>  { };
>/// @endcond
>
> @@ -1333,7 +1333,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>/// has_virtual_destructor
>template
>  struct has_virtual_destructor
> -: public integral_constant
> +: public __bool_constant<__has_virtual_destructor(_Tp)>
>  {
>static_assert(std::__is_complete_or_unbounded(__type_identity<_Tp>{}),
> "template argument must be a complete class or an unbounded array");
> @@ -1392,7 +1392,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>template
>  struct is_same
>  #ifdef _GLIBCXX_HAVE_BUILTIN_IS_SAME
> -: public integral_constant
> +: public __bool_constant<__is_same(_Tp, _Up)>
>  #else
>  : public false_type
>  #endif
> @@ -1408,7 +1408,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>/// 

[PATCH] aarch64: Add the cost and scheduling models for Neoverse N1

2023-04-07 Thread Evandro Menezes via Gcc-patches
This patch adds the cost and scheduling models for Neoverse N1, based on the 
information from the "Arm Neoverse N1 Software Optimization Guide”.

-- 
Evandro Menezes ◊ evan...@yahoo.com

[PATCH] aarch64: Add the cost and scheduling models for Neoverse N1

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def:
Use the Neoverse N1 scheduling and cost models, but only for itself.
* config/aarch64/aarch64.cc
(cortexa76_tunings): Rename variable.
(neoversen1_addrcost_table): New variable.
(neoversen1_vector_cost): Likewise.
(neoversen1_regmove_cost): Likewise.
(neoversen1_advsimd_vector_cost): Likewise.
(neoversen1_scalar_issue_info): Likewise.
(neoversen1_advsimd_issue_info): Likewise.
(neoversen1_vec_issue_info): Likewise.
(neoversen1_vector_cost): Likewise.
(neoversen1_tunings): Likewise.
* config/aarch64/aarch64.md: Include `neoverse-n1.md`.
* config/aarch64/neoverse-n1.md: New file.
* gcc/config/arm/aarch-cost-tables.h
(neoversen1_extra_costs): New variable.

Signed-off-by: Evandro Menezes 

---
 gcc/config/aarch64/aarch64-cores.def |  22 +-
 gcc/config/aarch64/aarch64.cc| 155 +-
 gcc/config/aarch64/aarch64.md|   1 +
 gcc/config/aarch64/neoverse-n1.md| 716 +++
 gcc/config/arm/aarch-cost-tables.h   | 107 
 5 files changed, 977 insertions(+), 24 deletions(-)
 create mode 100644 gcc/config/aarch64/neoverse-n1.md

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 2ec88c98400..cc842c4e22c 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -105,18 +105,18 @@ AARCH64_CORE("thunderx2t99",  thunderx2t99,  
thunderx2t99, V8_1A,  (CRYPTO), thu
 /* ARM ('A') cores. */
 AARCH64_CORE("cortex-a55",  cortexa55, cortexa53, V8_2A,  (F16, RCPC, 
DOTPROD), cortexa53, 0x41, 0xd05, -1)
 AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, V8_2A,  (F16, RCPC, 
DOTPROD), cortexa73, 0x41, 0xd0a, -1)
-AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, V8_2A,  (F16, RCPC, 
DOTPROD), neoversen1, 0x41, 0xd0b, -1)
-AARCH64_CORE("cortex-a76ae",  cortexa76ae, cortexa57, V8_2A,  (F16, RCPC, 
DOTPROD, SSBS), neoversen1, 0x41, 0xd0e, -1)
-AARCH64_CORE("cortex-a77",  cortexa77, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, 
SSBS), neoversen1, 0x41, 0xd0d, -1)
-AARCH64_CORE("cortex-a78",  cortexa78, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, 
SSBS, PROFILE), neoversen1, 0x41, 0xd41, -1)
-AARCH64_CORE("cortex-a78ae",  cortexa78ae, cortexa57, V8_2A,  (F16, RCPC, 
DOTPROD, SSBS, PROFILE), neoversen1, 0x41, 0xd42, -1)
-AARCH64_CORE("cortex-a78c",  cortexa78c, cortexa57, V8_2A,  (F16, RCPC, 
DOTPROD, SSBS, PROFILE, FLAGM, PAUTH), neoversen1, 0x41, 0xd4b, -1)
+AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, V8_2A,  (F16, RCPC, 
DOTPROD), cortexa76, 0x41, 0xd0b, -1)
+AARCH64_CORE("cortex-a76ae",  cortexa76ae, cortexa57, V8_2A,  (F16, RCPC, 
DOTPROD, SSBS), cortexa76, 0x41, 0xd0e, -1)
+AARCH64_CORE("cortex-a77",  cortexa77, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, 
SSBS), cortexa76, 0x41, 0xd0d, -1)
+AARCH64_CORE("cortex-a78",  cortexa78, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, 
SSBS, PROFILE), cortexa76, 0x41, 0xd41, -1)
+AARCH64_CORE("cortex-a78ae",  cortexa78ae, cortexa57, V8_2A,  (F16, RCPC, 
DOTPROD, SSBS, PROFILE), cortexa76, 0x41, 0xd42, -1)
+AARCH64_CORE("cortex-a78c",  cortexa78c, cortexa57, V8_2A,  (F16, RCPC, 
DOTPROD, SSBS, PROFILE, FLAGM, PAUTH), cortexa76, 0x41, 0xd4b, -1)
 AARCH64_CORE("cortex-a65",  cortexa65, cortexa53, V8_2A,  (F16, RCPC, DOTPROD, 
SSBS), cortexa73, 0x41, 0xd06, -1)
 AARCH64_CORE("cortex-a65ae",  cortexa65ae, cortexa53, V8_2A,  (F16, RCPC, 
DOTPROD, SSBS), cortexa73, 0x41, 0xd43, -1)
-AARCH64_CORE("cortex-x1",  cortexx1, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, 
SSBS, PROFILE), neoversen1, 0x41, 0xd44, -1)
-AARCH64_CORE("cortex-x1c",  cortexx1c, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, 
SSBS, PROFILE, PAUTH), neoversen1, 0x41, 0xd4c, -1)
-AARCH64_CORE("ares",  ares, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, PROFILE), 
neoversen1, 0x41, 0xd0c, -1)
-AARCH64_CORE("neoverse-n1",  neoversen1, cortexa57, V8_2A,  (F16, RCPC, 
DOTPROD, PROFILE), neoversen1, 0x41, 0xd0c, -1)
+AARCH64_CORE("cortex-x1",  cortexx1, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, 
SSBS, PROFILE), cortexa76, 0x41, 0xd44, -1)
+AARCH64_CORE("cortex-x1c",  cortexx1c, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, 
SSBS, PROFILE, PAUTH), cortexa76, 0x41, 0xd4c, -1)
+AARCH64_CORE("ares",  ares, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, PROFILE), 
cortexa76, 0x41, 0xd0c, -1)
+AARCH64_CORE("neoverse-n1",  neoversen1, neoversen1, V8_2A,  (F16, RCPC, 
DOTPROD, PROFILE), neoversen1, 0x41, 0xd0c, -1)
 AARCH64_CORE("neoverse-e1",  neoversee1, cortexa53, V8_2A,  (F16, RCPC, 
DOTPROD, SSBS), cortexa73, 0x41, 0xd4a, -1)
 
 /* Cavium ('C') cores. */
@@ -160,7 +160,7 @@ AARCH64_CORE("cortex-a73.cortex-a53",  

Re: Ping! [Patch, fortran] PR87477 - [meta-bug] [F03] issues concerning the ASSOCIATE statement

2023-04-07 Thread Paul Richard Thomas via Gcc-patches
PS Quite right about the allocation in PR93813 - consider it to be included.

Cheers and thanks

Paul


On Fri, 7 Apr 2023 at 22:35, Paul Richard Thomas <
paul.richard.tho...@gmail.com> wrote:

> Hi Harald,
>
> Well done on noticing the memory leak :-) I have a fix for it that I was
> going to post separately. Actually, it is a trivial one liner, which I
> could include with the patch.
> @@ -2554,23 +2559,25 @@ gfc_conv_string_length (gfc_charlen * cl, gfc_expr
> * expr, stmtblock_t * pblock)
>expr_flat = gfc_copy_expr (expr);
>flatten_array_ctors_without_strlen (expr_flat);
>gfc_resolve_expr (expr_flat);
> -
> -  gfc_conv_expr (, expr_flat);
> -  gfc_add_block_to_block (pblock, );
> -  cl->backend_decl = convert (gfc_charlen_type_node,
> se.string_length);
> -
> +  if (expr_flat->rank)
> +   gfc_conv_expr_descriptor (, expr_flat);
> +  else
> +   gfc_conv_expr (, expr_flat);
> +  if (expr_flat->expr_type != EXPR_VARIABLE)
> +   gfc_add_block_to_block (pblock, );
> +  se.expr = convert (gfc_charlen_type_node, se.string_length);
> +  gfc_add_block_to_block (pblock, );
> // <<>>
>gfc_free_expr (expr_flat);
> -  return;
>  }
> -
> -  /* Convert cl->length.  */
> -
> -  gcc_assert (cl->length);
> -
> -  gfc_conv_expr_type (, cl->length, gfc_charlen_type_node);
> -  se.expr = fold_build2_loc (input_location, MAX_EXPR,
> gfc_charlen_type_node,
> -se.expr, build_zero_cst (TREE_TYPE
> (se.expr)));
> -  gfc_add_block_to_block (pblock, );
> +  else
> +{
> +  /* Convert cl->length.  */
> +  gfc_conv_expr_type (, cl->length, gfc_charlen_type_node);
> +  se.expr = fold_build2_loc (input_location, MAX_EXPR,
> +gfc_charlen_type_node, se.expr,
> +build_zero_cst (TREE_TYPE (se.expr)));
> +  gfc_add_block_to_block (pblock, );
> +}
>
>if (cl->backend_decl && VAR_P (cl->backend_decl))
>  gfc_add_modify (pblock, cl->backend_decl, se.expr);
>
> Cheers
>
> Paul
>
>
> On Fri, 7 Apr 2023 at 20:28, Harald Anlauf  wrote:
>
>> Hi Paul,
>>
>> On 4/7/23 15:53, Paul Richard Thomas via Gcc-patches wrote:
>> > duuuh! Please find them attached.
>>
>> the patch LGTM.  Thanks!
>>
>> However, I have comments on the new testcase associate_60.f90:
>> subroutine pr93813 is missing an allocation of x, e.g.:
>>
>>  allocate (t :: x)
>>
>> otherwise it would be invalid.  Please check and fix.
>>
>> Interestingly, subroutine pr92779 exhibits a small memory leak
>> with memory allocated by the spread intrinsic.  I played a little
>> and found that the leak depends on the presence of trim(): omitting
>> trim() removes the leak.  But looking at the related pr, it seems
>> that trim() was essential, so omitting it is likely not an option.
>>
>> I think the best way is to proceed and to open a PR on the memory
>> leak rather than leaving pr92779 open.  What do you think?
>>
>> Cheers,
>> Harald
>>
>>
>> > Thanks
>> >
>> > Paul
>> >
>> >
>> > On Fri, 7 Apr 2023 at 10:41, Harald Anlauf  wrote:
>> >
>> >> Hi Paul,
>> >>
>> >> I don't see the new testcases.  Is this an issue on my side,
>> >> or did you forget to attach them?
>> >>
>> >> Thanks,
>> >> Harald
>> >>
>> >> On 4/7/23 09:07, Paul Richard Thomas via Gcc-patches wrote:
>> >>> Dear All,
>> >>>
>> >>> Please find attached a slightly updated version of the patch with a
>> >>> consolidated testcase. The three additional testcases are nothing to
>> do
>> >>> with associate and test fixes of character related bugs.
>> >>>
>> >>> OK for mainline?
>> >>>
>> >>> Cheers
>> >>>
>> >>> Paul
>> >>> Fortran: Fix some of the bugs in associate [PR87477]
>> >>>
>> >>> 2023-04-07  Paul Thomas  
>> >>>
>> >>> gcc/fortran
>> >>> PR fortran/87477
>> >>> * resolve.cc (resolve_assoc_var): Handle parentheses around the
>> >>> target expression.
>> >>> (resolve_block_construct): Remove unnecessary static decls.
>> >>> * trans-array.cc (gfc_conv_expr_descriptor): Guard string len
>> >>> expression in condition. Improve handling of string length and
>> >>> span, especially for substrings of the descriptor.
>> >>> (duplicate_allocatable): Make element type more explicit with
>> >>> 'eltype'.
>> >>> * trans_decl.cc (gfc_get_symbol_decl): Emit a fatal error with
>> >>> appropriate message instead of ICE if symbol type is unknown.
>> >>> * trans-expr.cc (gfc_get_expr_charlen): Retain last charlen in
>> >>> 'previous' and use if end expression in substring reference is
>> >>> null.
>> >>> (gfc_conv_string_length): Use gfc_conv_expr_descriptor if
>> >>> 'expr_flat' is an array.
>> >>> (gfc_trans_alloc_subarray_assign): If this is a deferred string
>> >>> length component, store the string length in the hidden comp.
>> >>> Update the typespec length accordingly. Generate a new type
>> >>> spec for the call to gfc_duplicate-allocatable in this case.
>> >>> * trans-io.cc (gfc_trans_transfer): Scalarize transfer of
>> 

Re: Ping! [Patch, fortran] PR87477 - [meta-bug] [F03] issues concerning the ASSOCIATE statement

2023-04-07 Thread Paul Richard Thomas via Gcc-patches
Hi Harald,

Well done on noticing the memory leak :-) I have a fix for it that I was
going to post separately. Actually, it is a trivial one liner, which I
could include with the patch.
@@ -2554,23 +2559,25 @@ gfc_conv_string_length (gfc_charlen * cl, gfc_expr
* expr, stmtblock_t * pblock)
   expr_flat = gfc_copy_expr (expr);
   flatten_array_ctors_without_strlen (expr_flat);
   gfc_resolve_expr (expr_flat);
-
-  gfc_conv_expr (, expr_flat);
-  gfc_add_block_to_block (pblock, );
-  cl->backend_decl = convert (gfc_charlen_type_node, se.string_length);
-
+  if (expr_flat->rank)
+   gfc_conv_expr_descriptor (, expr_flat);
+  else
+   gfc_conv_expr (, expr_flat);
+  if (expr_flat->expr_type != EXPR_VARIABLE)
+   gfc_add_block_to_block (pblock, );
+  se.expr = convert (gfc_charlen_type_node, se.string_length);
+  gfc_add_block_to_block (pblock, );
  // <<>>
   gfc_free_expr (expr_flat);
-  return;
 }
-
-  /* Convert cl->length.  */
-
-  gcc_assert (cl->length);
-
-  gfc_conv_expr_type (, cl->length, gfc_charlen_type_node);
-  se.expr = fold_build2_loc (input_location, MAX_EXPR,
gfc_charlen_type_node,
-se.expr, build_zero_cst (TREE_TYPE (se.expr)));
-  gfc_add_block_to_block (pblock, );
+  else
+{
+  /* Convert cl->length.  */
+  gfc_conv_expr_type (, cl->length, gfc_charlen_type_node);
+  se.expr = fold_build2_loc (input_location, MAX_EXPR,
+gfc_charlen_type_node, se.expr,
+build_zero_cst (TREE_TYPE (se.expr)));
+  gfc_add_block_to_block (pblock, );
+}

   if (cl->backend_decl && VAR_P (cl->backend_decl))
 gfc_add_modify (pblock, cl->backend_decl, se.expr);

Cheers

Paul


On Fri, 7 Apr 2023 at 20:28, Harald Anlauf  wrote:

> Hi Paul,
>
> On 4/7/23 15:53, Paul Richard Thomas via Gcc-patches wrote:
> > duuuh! Please find them attached.
>
> the patch LGTM.  Thanks!
>
> However, I have comments on the new testcase associate_60.f90:
> subroutine pr93813 is missing an allocation of x, e.g.:
>
>  allocate (t :: x)
>
> otherwise it would be invalid.  Please check and fix.
>
> Interestingly, subroutine pr92779 exhibits a small memory leak
> with memory allocated by the spread intrinsic.  I played a little
> and found that the leak depends on the presence of trim(): omitting
> trim() removes the leak.  But looking at the related pr, it seems
> that trim() was essential, so omitting it is likely not an option.
>
> I think the best way is to proceed and to open a PR on the memory
> leak rather than leaving pr92779 open.  What do you think?
>
> Cheers,
> Harald
>
>
> > Thanks
> >
> > Paul
> >
> >
> > On Fri, 7 Apr 2023 at 10:41, Harald Anlauf  wrote:
> >
> >> Hi Paul,
> >>
> >> I don't see the new testcases.  Is this an issue on my side,
> >> or did you forget to attach them?
> >>
> >> Thanks,
> >> Harald
> >>
> >> On 4/7/23 09:07, Paul Richard Thomas via Gcc-patches wrote:
> >>> Dear All,
> >>>
> >>> Please find attached a slightly updated version of the patch with a
> >>> consolidated testcase. The three additional testcases are nothing to do
> >>> with associate and test fixes of character related bugs.
> >>>
> >>> OK for mainline?
> >>>
> >>> Cheers
> >>>
> >>> Paul
> >>> Fortran: Fix some of the bugs in associate [PR87477]
> >>>
> >>> 2023-04-07  Paul Thomas  
> >>>
> >>> gcc/fortran
> >>> PR fortran/87477
> >>> * resolve.cc (resolve_assoc_var): Handle parentheses around the
> >>> target expression.
> >>> (resolve_block_construct): Remove unnecessary static decls.
> >>> * trans-array.cc (gfc_conv_expr_descriptor): Guard string len
> >>> expression in condition. Improve handling of string length and
> >>> span, especially for substrings of the descriptor.
> >>> (duplicate_allocatable): Make element type more explicit with
> >>> 'eltype'.
> >>> * trans_decl.cc (gfc_get_symbol_decl): Emit a fatal error with
> >>> appropriate message instead of ICE if symbol type is unknown.
> >>> * trans-expr.cc (gfc_get_expr_charlen): Retain last charlen in
> >>> 'previous' and use if end expression in substring reference is
> >>> null.
> >>> (gfc_conv_string_length): Use gfc_conv_expr_descriptor if
> >>> 'expr_flat' is an array.
> >>> (gfc_trans_alloc_subarray_assign): If this is a deferred string
> >>> length component, store the string length in the hidden comp.
> >>> Update the typespec length accordingly. Generate a new type
> >>> spec for the call to gfc_duplicate-allocatable in this case.
> >>> * trans-io.cc (gfc_trans_transfer): Scalarize transfer of
> >>> deferred character array components.
> >>>
> >>>
> >>> gcc/testsuite/
> >>> PR fortran/87477
> >>> * gfortran.dg/finalize_47.f90 : Enable substring test.
> >>> * gfortran.dg/finalize_51.f90 : Update an error message.
> >>>
> >>> PR fortran/85686
> >>> PR fortran/88247
> >>> PR fortran/91941
> >>> PR fortran/92779
> >>> PR fortran/93339
> >>> PR fortran/93813
> >>> PR fortran/100948
> 

libgo patch committed: Remove test ordering dependency in mime

2023-04-07 Thread Ian Lance Taylor via Gcc-patches
This libgo patch removes a test ordering dependency in the mime
package.  This is a backport of https://go.dev/cl/421442 from the
upstream repo.  This fixes https://go.dev/issue/51648.  Bootstrapped
and ran mime tests on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
f22c12d7361d22d47cce73d342edf2e1ebf20520
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index addef6f8f51..e133650ad91 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-63ba7dd52f2cc49dab4b70ac81309296a920d4dc
+0411a2733fd468e69f1998edd91e8fe3ba40ff9e
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/go/mime/type_test.go b/libgo/go/mime/type_test.go
index 5e4d25cc872..5769c6a55de 100644
--- a/libgo/go/mime/type_test.go
+++ b/libgo/go/mime/type_test.go
@@ -14,7 +14,10 @@ import (
 func setMimeInit(fn func()) (cleanup func()) {
once = sync.Once{}
testInitMime = fn
-   return func() { testInitMime = nil }
+   return func() {
+   testInitMime = nil
+   once = sync.Once{}
+   }
 }
 
 func clearMimeTypes() {
diff --git a/libgo/go/mime/type_unix_test.go b/libgo/go/mime/type_unix_test.go
index 4d109aa71a2..43db44b7aa1 100644
--- a/libgo/go/mime/type_unix_test.go
+++ b/libgo/go/mime/type_unix_test.go
@@ -11,6 +11,7 @@ import (
 )
 
 func initMimeUnixTest(t *testing.T) {
+   once.Do(initMime)
err := loadMimeGlobsFile("testdata/test.types.globs2")
if err != nil {
t.Fatal(err)


Re: Ping! [Patch, fortran] PR87477 - [meta-bug] [F03] issues concerning the ASSOCIATE statement

2023-04-07 Thread Harald Anlauf via Gcc-patches

Hi Paul,

On 4/7/23 15:53, Paul Richard Thomas via Gcc-patches wrote:

duuuh! Please find them attached.


the patch LGTM.  Thanks!

However, I have comments on the new testcase associate_60.f90:
subroutine pr93813 is missing an allocation of x, e.g.:

allocate (t :: x)

otherwise it would be invalid.  Please check and fix.

Interestingly, subroutine pr92779 exhibits a small memory leak
with memory allocated by the spread intrinsic.  I played a little
and found that the leak depends on the presence of trim(): omitting
trim() removes the leak.  But looking at the related pr, it seems
that trim() was essential, so omitting it is likely not an option.

I think the best way is to proceed and to open a PR on the memory
leak rather than leaving pr92779 open.  What do you think?

Cheers,
Harald



Thanks

Paul


On Fri, 7 Apr 2023 at 10:41, Harald Anlauf  wrote:


Hi Paul,

I don't see the new testcases.  Is this an issue on my side,
or did you forget to attach them?

Thanks,
Harald

On 4/7/23 09:07, Paul Richard Thomas via Gcc-patches wrote:

Dear All,

Please find attached a slightly updated version of the patch with a
consolidated testcase. The three additional testcases are nothing to do
with associate and test fixes of character related bugs.

OK for mainline?

Cheers

Paul
Fortran: Fix some of the bugs in associate [PR87477]

2023-04-07  Paul Thomas  

gcc/fortran
PR fortran/87477
* resolve.cc (resolve_assoc_var): Handle parentheses around the
target expression.
(resolve_block_construct): Remove unnecessary static decls.
* trans-array.cc (gfc_conv_expr_descriptor): Guard string len
expression in condition. Improve handling of string length and
span, especially for substrings of the descriptor.
(duplicate_allocatable): Make element type more explicit with
'eltype'.
* trans_decl.cc (gfc_get_symbol_decl): Emit a fatal error with
appropriate message instead of ICE if symbol type is unknown.
* trans-expr.cc (gfc_get_expr_charlen): Retain last charlen in
'previous' and use if end expression in substring reference is
null.
(gfc_conv_string_length): Use gfc_conv_expr_descriptor if
'expr_flat' is an array.
(gfc_trans_alloc_subarray_assign): If this is a deferred string
length component, store the string length in the hidden comp.
Update the typespec length accordingly. Generate a new type
spec for the call to gfc_duplicate-allocatable in this case.
* trans-io.cc (gfc_trans_transfer): Scalarize transfer of
deferred character array components.


gcc/testsuite/
PR fortran/87477
* gfortran.dg/finalize_47.f90 : Enable substring test.
* gfortran.dg/finalize_51.f90 : Update an error message.

PR fortran/85686
PR fortran/88247
PR fortran/91941
PR fortran/92779
PR fortran/93339
PR fortran/93813
PR fortran/100948
PR fortran/102106
* gfortran.dg/associate_60.f90 : New test

PR fortran/98408
* gfortran.dg/pr98408.f90 : New test

PR fortran/105205
* gfortran.dg/pr105205.f90 : New test

PR fortran/106918
* gfortran.dg/pr106918.f90 : New test









Re: [PATCH, V2] PR target/70243: Do not generate vmaddfp and vnmsubfp

2023-04-07 Thread Segher Boessenkool
Hi!

On Fri, Apr 07, 2023 at 02:34:01AM -0400, Michael Meissner wrote:
> As we discussed in a private chat room, I modified the code to generate 
> vmaddfp
> and vnmsubfp if -Ofast (-ffast-math) is used.

As I said, that is no good.

> This allows the compiler to
> eliminate the extra move if the user does not care about strict floating point
> code generation, but it generates only the VSX instructions in the normal
> case.

You should not generate *any* VMX computational insns unless the user
asked for that *explicitly*.  Not only the rounding mode matters (always
RN=00 for VMX), but also the NJ setting, and the default for NJ is
unusable for normal code (that is, code that is not low-precision
graphics code or the like; most code).

Please change *only* the two patterns I mentioned?  Just never generate
vmaddfp or vnmsubfp when not explicitly asked for it.


Segher


[PATCH, rs6000] Disable generation of scalar modulo instructions

2023-04-07 Thread Pat Haugen via Gcc-patches

Disable generation of scalar modulo instructions.

It was recently discovered that the scalar modulo instructions can suffer
noticeable performance issues for certain input values. This patch disables
their generation since the equivalent div/mul/sub sequence does not suffer
the same problem.

Bootstrapped and regression tested on powerpc64le (Power10).
Ok for master and backports after burn in?

-Pat


2023-04-07  Pat Haugen  

gcc/
* config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
* config/rs6000/rs6000.md (mod3, *mod3): Use it.
(define_expand umod3): New.
(define_insn umod3): Rename to *umod3 and disable.

gcc/testsuite/
* gcc.target/powerpc/clone1.c: Add xfails.
* gcc.target/powerpc/clone3.c: Likewise.
* gcc.target/powerpc/mod-1.c: Likewise.
* gcc.target/powerpc/mod-2.c: Likewise.



diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..e4d9f357622 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2492,3 +2492,9 @@ while (0)
rs6000_asm_output_opcode (STREAM);  \
 }  \
   while (0)
+
+/* Disable generation of scalar modulo instructions for word/dword due to
+   performance issues with certain input values. This can be removed in the
+   future when the issues have been resolved.  */
+#define RS6000_DISABLE_SCALAR_MODULO 1
+
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 44f7dd509cb..a267b7ee2d0 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3421,6 +3421,17 @@ (define_expand "mod3"
FAIL;

   operands[2] = force_reg (mode, operands[2]);
+
+  if (RS6000_DISABLE_SCALAR_MODULO)
+   {
+ temp1 = gen_reg_rtx (mode);
+ temp2 = gen_reg_rtx (mode);
+
+ emit_insn (gen_div3 (temp1, operands[1], operands[2]));
+ emit_insn (gen_mul3 (temp2, temp1, operands[2]));
+ emit_insn (gen_sub3 (operands[0], operands[1], temp2));
+ DONE;
+   }
 }
   else
 {
@@ -3440,17 +3451,42 @@ (define_insn "*mod3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=,r")
 (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
 (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
-  "TARGET_MODULO"
+  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
   "mods %0,%1,%2"
   [(set_attr "type" "div")
(set_attr "size" "")])

+;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
+;; removed.
+(define_expand "umod3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+   (umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
+ (match_operand:GPR 2 "gpc_reg_operand")))]
+  ""
+{
+  rtx temp1;
+  rtx temp2;
+
+  if (!TARGET_MODULO)
+   FAIL;
+
+  if (RS6000_DISABLE_SCALAR_MODULO)
+{
+  temp1 = gen_reg_rtx (mode);
+  temp2 = gen_reg_rtx (mode);
+
+  emit_insn (gen_udiv3 (temp1, operands[1], operands[2]));
+  emit_insn (gen_mul3 (temp2, temp1, operands[2]));
+  emit_insn (gen_sub3 (operands[0], operands[1], temp2));
+  DONE;
+}
+})

-(define_insn "umod3"
+(define_insn "*umod3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=,r")
 (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
  (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
-  "TARGET_MODULO"
+  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
   "modu %0,%1,%2"
   [(set_attr "type" "div")
(set_attr "size" "")])
diff --git a/gcc/testsuite/gcc.target/powerpc/clone1.c 
b/gcc/testsuite/gcc.target/powerpc/clone1.c

index c69fd2aa1b8..74323ca0e8c 100644
--- a/gcc/testsuite/gcc.target/powerpc/clone1.c
+++ b/gcc/testsuite/gcc.target/powerpc/clone1.c
@@ -21,6 +21,7 @@ long mod_func_or (long a, long b, long c)
   return mod_func (a, b) | c;
 }

-/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mmodsd\M} 1 } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodsd\M} 1 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/clone3.c 
b/gcc/testsuite/gcc.target/powerpc/clone3.c

index 911b88b781d..d3eb4dd2378 100644
--- a/gcc/testsuite/gcc.target/powerpc/clone3.c
+++ b/gcc/testsuite/gcc.target/powerpc/clone3.c
@@ -27,7 +27,8 @@ long mod_func_or (long a, long b, long c)
   return mod_func (a, b) | c;
 }

-/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mmodsd\M} 2 } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } 

Re: Ping! [Patch, fortran] PR87477 - [meta-bug] [F03] issues concerning the ASSOCIATE statement

2023-04-07 Thread Paul Richard Thomas via Gcc-patches
duuuh! Please find them attached.

Thanks

Paul


On Fri, 7 Apr 2023 at 10:41, Harald Anlauf  wrote:

> Hi Paul,
>
> I don't see the new testcases.  Is this an issue on my side,
> or did you forget to attach them?
>
> Thanks,
> Harald
>
> On 4/7/23 09:07, Paul Richard Thomas via Gcc-patches wrote:
> > Dear All,
> >
> > Please find attached a slightly updated version of the patch with a
> > consolidated testcase. The three additional testcases are nothing to do
> > with associate and test fixes of character related bugs.
> >
> > OK for mainline?
> >
> > Cheers
> >
> > Paul
> > Fortran: Fix some of the bugs in associate [PR87477]
> >
> > 2023-04-07  Paul Thomas  
> >
> > gcc/fortran
> > PR fortran/87477
> > * resolve.cc (resolve_assoc_var): Handle parentheses around the
> > target expression.
> > (resolve_block_construct): Remove unnecessary static decls.
> > * trans-array.cc (gfc_conv_expr_descriptor): Guard string len
> > expression in condition. Improve handling of string length and
> > span, especially for substrings of the descriptor.
> > (duplicate_allocatable): Make element type more explicit with
> > 'eltype'.
> > * trans_decl.cc (gfc_get_symbol_decl): Emit a fatal error with
> > appropriate message instead of ICE if symbol type is unknown.
> > * trans-expr.cc (gfc_get_expr_charlen): Retain last charlen in
> > 'previous' and use if end expression in substring reference is
> > null.
> > (gfc_conv_string_length): Use gfc_conv_expr_descriptor if
> > 'expr_flat' is an array.
> > (gfc_trans_alloc_subarray_assign): If this is a deferred string
> > length component, store the string length in the hidden comp.
> > Update the typespec length accordingly. Generate a new type
> > spec for the call to gfc_duplicate-allocatable in this case.
> > * trans-io.cc (gfc_trans_transfer): Scalarize transfer of
> > deferred character array components.
> >
> >
> > gcc/testsuite/
> > PR fortran/87477
> > * gfortran.dg/finalize_47.f90 : Enable substring test.
> > * gfortran.dg/finalize_51.f90 : Update an error message.
> >
> > PR fortran/85686
> > PR fortran/88247
> > PR fortran/91941
> > PR fortran/92779
> > PR fortran/93339
> > PR fortran/93813
> > PR fortran/100948
> > PR fortran/102106
> > * gfortran.dg/associate_60.f90 : New test
> >
> > PR fortran/98408
> > * gfortran.dg/pr98408.f90 : New test
> >
> > PR fortran/105205
> > * gfortran.dg/pr105205.f90 : New test
> >
> > PR fortran/106918
> > * gfortran.dg/pr106918.f90 : New test
>
>

-- 
"If you can't explain it simply, you don't understand it well enough" -
Albert Einstein
! { dg-do run }
!
! Tests fixes for various pr87477 dependencies
!
! Contributed by Gerhard Steinmetz   except for pr102106:
! which was contributed by Brad Richardson  
!
program associate_60
  implicit none
  character(20) :: buffer

  call pr102106
  call pr100948
  call pr85686
  call pr88247
  call pr91941
  call pr92779
  call pr93339
  call pr93813

contains

  subroutine pr102106
type :: sub_class_t
integer :: i
end type
type :: with_polymorphic_component_t
class(sub_class_t), allocatable :: sub_obj_
end type
associate(obj => with_polymorphic_component_t(sub_class_t(42)))
if (obj%sub_obj_%i .ne. 42) stop 1
end associate
  end

  subroutine pr100948
type t
  character(:), allocatable :: c(:)
end type
type(t), allocatable :: x
!
! Valid test in comment 1
!
x = t(['ab','cd'])
associate (y => x%c(:))
  if (any (y .ne. x%c)) stop 2
  if (any (y .ne. ['ab','cd'])) stop 3
end associate
deallocate (x)
!
! Allocation with source was found to only copy over one of the array elements
!
allocate (x, source = t(['ef','gh']))
associate (y => x%c(:))
  if (any (y .ne. x%c)) stop 4
  if (any (y .ne. ['ef','gh'])) stop 5
end associate
deallocate (x)
  end

  subroutine pr85686
call s85686([" g'day "," bye!! "])
if (trim (buffer) .ne. " a g'day a bye!!") stop 6
  end

  subroutine s85686(x)
character(*) :: x(:)
associate (y => 'a'//x)
  write (buffer, *) y ! Used to segfault at the write statement.
end associate
  end

  subroutine pr88247
  type t
 character(:), dimension(:), allocatable :: d
  end type t
  type(t), allocatable :: x
  character(5) :: buffer(3)
  allocate (x, source = t (['ab','cd'])) ! Didn't work
  write(buffer(1), *) x%d(2:1:-1)! Was found to be broken
  write(buffer(2), *) [x%d(2:1:-1)]  ! Was OK
  associate (y => [x%d(2:1:-1)])
write(buffer(3), *) y! Bug in comment 7
  end associate
  if (any (buffer .ne. " cdab")) stop 7
  end

  subroutine pr91941
character(:), allocatable :: x(:), z(:)
x = [' abc', ' xyz']
z = adjustl(x)
associate (y => adjustl(x))  ! Wrong character length was passed
  if (any(y .ne. ['abc ', 'xyz '])) stop 8
end associate
  end

  subroutine pr92779
character(3) :: a = 'abc'
associate (y => 

[PATCH v3] RISC-V: Fix regression of -fzero-call-used-regs=all

2023-04-07 Thread yanzhang.wang--- via Gcc-patches
From: Yanzhang Wang 

This patch registers a riscv specific function to
TARGET_ZERO_CALL_USED_REGS instead of default in targhooks.cc. It will
clean gpr and vector relevant registers.

PR 109104

gcc/ChangeLog:

* config/riscv/riscv-protos.h (emit_hard_vlmax_vsetvl):
* config/riscv/riscv-v.cc (emit_hard_vlmax_vsetvl):
(emit_vlmax_vsetvl):
* config/riscv/riscv.cc (vector_zero_call_used_regs):
(riscv_zero_call_used_regs):
(TARGET_ZERO_CALL_USED_REGS):

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zero-scratch-regs-1.c: New test.
* gcc.target/riscv/zero-scratch-regs-2.c: New test.
* gcc.target/riscv/zero-scratch-regs-3.c: New test.

Signed-off-by: Yanzhang Wang 
Co-authored-by: Pan Li 
Co-authored-by: Ju-Zhe Zhong 
Co-authored-by: Kito Cheng 
---
 gcc/config/riscv/riscv-protos.h   |  1 +
 gcc/config/riscv/riscv-v.cc   | 15 +++-
 gcc/config/riscv/riscv.cc | 71 +++
 .../gcc.target/riscv/zero-scratch-regs-1.c|  9 +++
 .../gcc.target/riscv/zero-scratch-regs-2.c| 24 +++
 .../gcc.target/riscv/zero-scratch-regs-3.c| 57 +++
 6 files changed, 174 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-scratch-regs-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-scratch-regs-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-scratch-regs-3.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 4611447ddde..5244e8dcbf0 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -159,6 +159,7 @@ bool check_builtin_call (location_t, vec, 
unsigned int,
 bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 bool legitimize_move (rtx, rtx, machine_mode);
 void emit_vlmax_vsetvl (machine_mode, rtx);
+void emit_hard_vlmax_vsetvl (machine_mode, rtx);
 void emit_vlmax_op (unsigned, rtx, rtx, machine_mode);
 void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
 void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 2e91d019f6c..13dd6639c9f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -118,6 +118,17 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
  && IN_RANGE (INTVAL (elt), minval, maxval));
 }
 
+/* Emit a vlmax vsetvl instruction with side effect, this should be only used
+   when optimization is tune off or emit after vsetvl insertion pass.  */
+void
+emit_hard_vlmax_vsetvl (machine_mode vmode, rtx vl)
+{
+  unsigned int sew = get_sew (vmode);
+  emit_insn (gen_vsetvl (Pmode, vl, RVV_VLMAX, gen_int_mode (sew, Pmode),
+gen_int_mode (get_vlmul (vmode), Pmode), const0_rtx,
+const0_rtx));
+}
+
 void
 emit_vlmax_vsetvl (machine_mode vmode, rtx vl)
 {
@@ -126,9 +137,7 @@ emit_vlmax_vsetvl (machine_mode vmode, rtx vl)
   unsigned int ratio = calculate_ratio (sew, vlmul);
 
   if (!optimize)
-emit_insn (gen_vsetvl (Pmode, vl, RVV_VLMAX, gen_int_mode (sew, Pmode),
-  gen_int_mode (get_vlmul (vmode), Pmode), const0_rtx,
-  const0_rtx));
+emit_hard_vlmax_vsetvl (vmode, vl);
   else
 emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode)));
 }
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5f542932d13..a6a610f5901 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7066,6 +7066,74 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT 
mask)
   return shamt == ctz_hwi (mask);
 }
 
+HARD_REG_SET
+vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
+{
+  HARD_REG_SET zeroed_hardregs;
+  CLEAR_HARD_REG_SET (zeroed_hardregs);
+
+  /* Find a register to hold vl.  */
+  unsigned vl_regno = INVALID_REGNUM;
+  /* Skip the first GPR, otherwise the existing vl is kept due to the same
+ between vl and avl.  */
+  for (unsigned regno = GP_REG_FIRST + 1; regno <= GP_REG_LAST; regno++)
+{
+  if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
+   {
+ vl_regno = regno;
+ break;
+   }
+}
+
+  if (vl_regno > GP_REG_LAST)
+sorry ("cannot allocate vl register for %qs on this target",
+  "-fzero-call-used-regs");
+
+  bool emitted_vlmax_vsetvl = false;
+  rtx vl = gen_rtx_REG (Pmode, vl_regno); /* vl is VLMAX.  */
+  for (unsigned regno = V_REG_FIRST; regno <= V_REG_LAST; ++regno)
+{
+  if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
+   {
+ rtx target = regno_reg_rtx[regno];
+ machine_mode mode = GET_MODE (target);
+ poly_uint16 nunits = GET_MODE_NUNITS (mode);
+ machine_mode mask_mode = riscv_vector::get_vector_mode (BImode,
+ nunits)
+   .require ();
+
+ if 

Re: PR target/70243: Do not generate fmaddfp and fnmsubfp

2023-04-07 Thread Segher Boessenkool
Hi!

On Fri, Apr 07, 2023 at 02:32:04AM -0400, Michael Meissner wrote:
> On Thu, Apr 06, 2023 at 03:37:59PM -0500, Segher Boessenkool wrote:
> > > This patch eliminates the generation of the Altivec fmaddfp and fnmsubfp
> > > instructions as alternatives in the VSX instruction insn support, and in 
> > > the
> > > Altivec insns it adds a test to prevent the insn from being used if VSX is
> > > available.  I also added a test to the regression test suite.
> > 
> > Please leave the latter out, it does not belong in this patch.  If you
> > want a patch to do that deal with *all* VMX FP insns?  There also are
> > add, sub, mul, etc.  Well I think those (as well as madd and nmsub) are
> > the only ones that use the NJ bit or the RN bits, but please check.
> 
> After I posted the patch I refreshed my memory of the VECTOR_UNIT_ALTIVEC_P
> macro and it is not true if VSX code generation is enabled.  So I dropped the
> changes to altivec.md.

Right you are.  We still run into all the same problems for -maltivec
-mno-vsx compilations, but no one (knock on wood) does that except it is
the default on old systems.  We can live with that / we'll just have to
live with that, take your pick.

> In addition, as far as I know, the only AltiVec (VMX) floating point
> instructions generated when VSX is used are the vmaddfp and vnmsubfp
> instructions.

That is more likely given that VECTOR_UNIT_ALTIVEC_P means things match
if *only* VMX registers are allowed, right.  But it still sits very
uneasy with me, the way it is written is not very defensive.

> In the case of add and subtract, xvaddsp and xvsubsp is more
> general than vaddfp or vsubfp since it can access all VSX registers.  VMX does
> not have a stand-alone multiply (it generates FMA with a zero register) and it
> does not have a division operation.  And VMX does not have xvmsub{a,m}sp nor
> xvnadd{a,m}sp variations of the FMA instructions.

Yes, it has only the more frequent two variants.

> > >  (define_insn "*altivec_fmav4sf4"
> > >[(set (match_operand:V4SF 0 "register_operand" "=v")
> > >   (fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
> > > (match_operand:V4SF 2 "register_operand" "v")
> > > (match_operand:V4SF 3 "register_operand" "v")))]
> > > -  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
> > > +  "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && !TARGET_VSX"
> > 
> > This is very error-prone.  Maybe add a test to the VECTOR_UNIT_ALTIVEC
> > macro instead?
> 
> As I said that part of the code is not in the next patch.

Excellent.

> > > -;; Fused vector multiply/add instructions. Support the classical Altivec
> > > -;; versions of fma, which allows the target to be a separate register 
> > > from the
> > > -;; 3 inputs.  Under VSX, the target must be either the addend or the 
> > > first
> > > -;; multiply.
> > > +;; Fused vector multiply/add instructions. Do not use the classical 
> > > Altivec
> 
> > (Two spaces after dot, and AltiVec is spelled with a capital V.  I don't
> > like it either, VMX is a much nicer and more regular name).
> 
> When the name might be more regular, but in terms of the instruction set, it
> does have holes that I mentioned above (no multiply that is not a FMA, two of
> the four FMA variants are not provided).

Yes.  And several new insns (like all QP insns) are VMX only, in terms
of what registers are allowed.  The mnemonics for most of those insns
(with noteworthy exception the QP insns) starts with a "v", too.

> > So this part looks okay, and it alone is safe for GCC 13 as well.
> 
> Well as we were discussing on a private channel, it is desirable to generate
> vmaddfp and vnmsubfp if -Ofast is used, so the next patch incorporates that
> change.

If only considering the RN effects.  But not when considering NJ screws
us over big time -- we should never generate VMX FP insns unless
explicitly asked for, its semantics are just too foreign.

We could add a separate flag for just this, but is there any demand?


Segher


Re: [PATCH] [testsuite] [ppc] expect vectorization in gen-vect-11c.c

2023-04-07 Thread Kewen.Lin via Gcc-patches
Hi Alexandre,

on 2023/4/7 12:37, Alexandre Oliva wrote:
> On Apr  6, 2023, "Kewen.Lin"  wrote:
> 
>> on 2023/4/6 13:20, Alexandre Oliva wrote:
>>> I confirm I observe the problem with gcc-12 targeting ppc64-vx7r2,
>>> containing the backported patch, and that the loop is vectorized,
>>> failing the test.
> 
> I take that back.  My notes indicate I looked into this failure on March
> 15th.  The patch you referenced was dated Feb 10, so I assumed it was
> already in when I looked into it: my confirmation amounted to checking
> what I had observed according to my notes, and when.
> 
> But now that you asked me to investigate it again, I used a far more
> recent tree, and I failed to duplicate it.  Digging further, I found out
> the patch, despite its commit date, was only merged into gcc-12 on March
> 16th.  What I was missing to get the intended effects of the fix was
> just a fresher tree athat actually contained the fix.

aha, good to know it's not due to some differences between our ENVs or
some other mysteries. :) Thanks for checking.

> 
> I suppose this means we don't need the testsuite tweak, after all.

Yeah. :)

BR,
Kewen

> Patch withdrawn.
> 


Re: [PATCH] [PR99708] [rs6000] don't expect __ibm128 with 64-bit long double

2023-04-07 Thread Kewen.Lin via Gcc-patches
Hi Alexandre,

on 2023/4/7 09:48, Alexandre Oliva wrote:
> On Apr  6, 2023, "Kewen.Lin"  wrote:
> 
>> The reason why personally I preferred to fix it with xfail is that:
> 
> Got it.  I'm convinced, and I agree.
> 
> I tried an xfail in the initial dg-do, but that is no good for a compile
> error, so I went for a dg-bogus xfail.  I hope that will still have the
> intended effect when __ibm128 is defined when it currently isn't.
> 

Thanks for looking into it.

> There is a dg-skip-if in this test on the trunk, covering some targets,
> that IIRC are longdouble64, so maybe that's related and I could have
> dropped them, but I wasn't sure, so I left them alone.

I think it's due to that -mfloat128 isn't fully supported on them, so
yeah, just leave them alone.

> 
> Regstrapped on ppc64-linux-gnu (pass), also tested on ppc64-vx7r2/gcc-12
> (xfail).  Ok to install?
> 
> 
> [PR99708] [rs6000] don't expect __ibm128 with 64-bit long double
> 
> When long double is 64-bit wide, as on vxworks, the rs6000 backend
> defines neither the __ibm128 type nor the __SIZEOF_IBM128__ macro, but
> pr99708.c expected both to be always defined.  Adjust the test to
> match the implementation.
> 
> 
> for  gcc/testsuite/ChangeLog
> 
>   * gcc.target/powerpc/pr99708.c: Accept lack of
>   __SIZEOF_IBM128__ when long double is 64-bit wide.
> ---
>  gcc/testsuite/gcc.target/powerpc/pr99708.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99708.c 
> b/gcc/testsuite/gcc.target/powerpc/pr99708.c
> index 02b40ebc40d3d..66a5f88479330 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr99708.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr99708.c
> @@ -14,7 +14,7 @@
>  int main (void)
>  {
>if (__SIZEOF_FLOAT128__ != sizeof (__float128)
> -  || __SIZEOF_IBM128__ != sizeof (__ibm128))
> +  || __SIZEOF_IBM128__ != sizeof (__ibm128)) /* { dg-bogus "undeclared" 
> "" { xfail longdouble64 } } */
>  abort ();
>  

This new version causes unresolved record on my side, it's due to the 
compilation failed to produce executable.

=== gcc Summary for unix/-m64 ===

# of expected passes1
# of expected failures  1
# of unresolved testcases   1

So I think we need to make the file be compiled well, how about something like:

--

diff --git a/gcc/testsuite/gcc.target/powerpc/pr99708.c 
b/gcc/testsuite/gcc.target/powerpc/pr99708.c
index 02b40ebc40d..c6aa0511b89 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr99708.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr99708.c
@@ -14,9 +14,17 @@
 int main (void)
 {
   if (__SIZEOF_FLOAT128__ != sizeof (__float128)
-  || __SIZEOF_IBM128__ != sizeof (__ibm128))
+  /* FIXME: Once type __ibm128 gets supported with long-double-64,
+ we shouldn't need this conditional #ifdef and xfail.  */
+#ifdef __SIZEOF_IBM128__
+  || __SIZEOF_IBM128__ != sizeof (__ibm128)
+#else
+  || 1
+#endif
+ )
 abort ();

   return 0;
 }

+/* { dg-xfail-run-if "unsupported type __ibm128 with long-double-64" { 
longdouble64 } } */

--  
   

?  OK if it looks reasonable to you and the testing goes well.  Thanks!

BR,
Kewen


Re: Ping! [Patch, fortran] PR87477 - [meta-bug] [F03] issues concerning the ASSOCIATE statement

2023-04-07 Thread Harald Anlauf via Gcc-patches

Hi Paul,

I don't see the new testcases.  Is this an issue on my side,
or did you forget to attach them?

Thanks,
Harald

On 4/7/23 09:07, Paul Richard Thomas via Gcc-patches wrote:

Dear All,

Please find attached a slightly updated version of the patch with a
consolidated testcase. The three additional testcases are nothing to do
with associate and test fixes of character related bugs.

OK for mainline?

Cheers

Paul
Fortran: Fix some of the bugs in associate [PR87477]

2023-04-07  Paul Thomas  

gcc/fortran
PR fortran/87477
* resolve.cc (resolve_assoc_var): Handle parentheses around the
target expression.
(resolve_block_construct): Remove unnecessary static decls.
* trans-array.cc (gfc_conv_expr_descriptor): Guard string len
expression in condition. Improve handling of string length and
span, especially for substrings of the descriptor.
(duplicate_allocatable): Make element type more explicit with
'eltype'.
* trans_decl.cc (gfc_get_symbol_decl): Emit a fatal error with
appropriate message instead of ICE if symbol type is unknown.
* trans-expr.cc (gfc_get_expr_charlen): Retain last charlen in
'previous' and use if end expression in substring reference is
null.
(gfc_conv_string_length): Use gfc_conv_expr_descriptor if
'expr_flat' is an array.
(gfc_trans_alloc_subarray_assign): If this is a deferred string
length component, store the string length in the hidden comp.
Update the typespec length accordingly. Generate a new type
spec for the call to gfc_duplicate-allocatable in this case.
* trans-io.cc (gfc_trans_transfer): Scalarize transfer of
deferred character array components.


gcc/testsuite/
PR fortran/87477
* gfortran.dg/finalize_47.f90 : Enable substring test.
* gfortran.dg/finalize_51.f90 : Update an error message.

PR fortran/85686
PR fortran/88247
PR fortran/91941
PR fortran/92779
PR fortran/93339
PR fortran/93813
PR fortran/100948
PR fortran/102106
* gfortran.dg/associate_60.f90 : New test

PR fortran/98408
* gfortran.dg/pr98408.f90 : New test

PR fortran/105205
* gfortran.dg/pr105205.f90 : New test

PR fortran/106918
* gfortran.dg/pr106918.f90 : New test




Re: [Patch, fortran] PR87477 - [meta-bug] [F03] issues concerning the ASSOCIATE statement

2023-04-07 Thread Harald Anlauf via Gcc-patches

Hi Paul,

On 4/7/23 09:02, Paul Richard Thomas via Gcc-patches wrote:

Hi All,

Please find attached the patch to fix the dg directives and remove a lot of
trailing white space.

Unless there are any objections, I will commit as obvious over the weekend.


this is OK.

Thanks for the patch!

Harald


Cheers

Paul

Fortran: Fix dg directives and remove trailing whitespaces in testsuite

2023-04-07  Paul Thomas  

* gfortran.dg/c-interop/allocatable-optional-pointer.f90 : Fix
dg directive and remove trailing whitespace.
* gfortran.dg/c-interop/c407a-1.f90 : ditto
* gfortran.dg/c-interop/c407b-1.f90 : ditto
* gfortran.dg/c-interop/c407b-2.f90 : ditto
* gfortran.dg/c-interop/c407c-1.f90 : ditto
* gfortran.dg/c-interop/c535a-1.f90 : ditto
* gfortran.dg/c-interop/c535a-2.f90 : ditto
* gfortran.dg/c-interop/c535b-1.f90 : ditto
* gfortran.dg/c-interop/c535b-2.f90 : ditto
* gfortran.dg/c-interop/c535b-3.f90 : ditto
* gfortran.dg/c-interop/c535c-1.f90 : ditto
* gfortran.dg/c-interop/c535c-2.f90 : ditto
* gfortran.dg/c-interop/deferred-character-1.f90 : ditto
* gfortran.dg/c-interop/removed-restrictions-1.f90 : ditto
* gfortran.dg/c-interop/removed-restrictions-2.f90 : ditto
* gfortran.dg/c-interop/removed-restrictions-4.f90 : ditto
* gfortran.dg/c-interop/tkr.f90 : ditto
* gfortran.dg/class_result_10.f90 : ditto
* gfortran.dg/dtio_35.f90 : ditto
* gfortran.dg/goacc/array-with-dt-2.f90 : ditto
* gfortran.dg/gomp/affinity-clause-1.f90 : ditto
* gfortran.dg/pr103258.f90 : ditto
* gfortran.dg/pr59107.f90 : ditto
* gfortran.dg/pr93835.f08 : ditto



On Wed, 29 Mar 2023 at 09:53, Paul Richard Thomas <
paul.richard.tho...@gmail.com> wrote:


Hi Manfred,

Indeed I do :-) Thanks for the spot. I have decided that it will be less
messy if I roll all the testcases into one or, perhaps two =>
associate_xx.f90

Forgetting the space before the final brace seems to be rife!

Cheers

Paul


On Wed, 29 Mar 2023 at 09:24, Manfred Schwarb  wrote:


Am 28.03.23 um 23:04 schrieb Paul Richard Thomas via Fortran:

Hi All,

I have made a start on ASSOCIATE issues. Some of the low(-ish) hanging
fruit are already fixed but I have yet to check that they a really fixed
and to close them:
pr102106, pr102111, pr104430, pr106048, pr85510, pr87460, pr92960 &

pr93338


The attached patch picks up those PRs involving deferred length

characters

in one guise or another. I believe that it is all pretty

straightforward.

Structure constructors with allocatable, deferred length, character

array

components just weren't implemented and so this is the biggest part of

the

patch. I found two other, non-associate PRs(106918 &  105205) that are
fixed and there are probably more.

The chunk in trans-io.cc is something of a kludge, which I will come

back

to. Some descriptors come through with a data pointer that looks as if

it

should be OK but

I thought to submit this now to get it out of the way. The ratio of PRs
fixed to the size of the patch warrants this. The next stage is going

to be

rather messy and so "I might take a little while" (cross talk between
associate and select type, in particular).

Regtests OK - good for mainline?



Paul, you have some "dg-do-run" and "dg-do-compile" statements in your
testcases,
could you change them into their single-minus-sign variants?

Cheers,
Manfred


BTW: I just ran my script again and found the following testsuite issues
(note that outer-most
braces need to be space-padded):

./c-interop/removed-restrictions-1.f90:! { dg-do compile}
./c-interop/removed-restrictions-2.f90:! { dg-do compile}
./c-interop/removed-restrictions-3.f90:! { dg-do compile}
./c-interop/removed-restrictions-4.f90:! { dg-do compile}
./c-interop/tkr.f90:! { dg-do compile}
./c-interop/c407c-1.f90:! { dg-do compile}
./c-interop/deferred-character-1.f90:! { dg-do compile}
./c-interop/allocatable-optional-pointer.f90:! { dg-do compile}
./c-interop/c407a-1.f90:! { dg-do compile}
./c-interop/c407b-1.f90:! { dg-do compile}
./c-interop/c407b-2.f90:! { dg-do compile}
./c-interop/c535a-1.f90:! { dg-do compile}
./c-interop/c535a-2.f90:! { dg-do compile}
./c-interop/c535b-1.f90:! { dg-do compile}
./c-interop/c535b-2.f90:! { dg-do compile}
./c-interop/c535b-3.f90:! { dg-do compile}
./c-interop/c535c-1.f90:! { dg-do compile}
./c-interop/c535c-2.f90:! { dg-do compile}
./gomp/affinity-clause-1.f90:! { dg final { scan-tree-dump-times "#pragma
omp task affinity\\(iterator\\(integer\\(kind=4\\)
i=D\\.\[0-9\]+:5:1\\):b\\\[\\(.* ? \\+ -1\\\]\\)
affinity\\(iterator\\(integer\\(kind=4\\)
i=D\\.\[0-9\]+:5:1\\):d\\\[\\(\\(integer\\(kind=8\\)\\) i \\+ -1\\) \\*
6\\\]\\)"  1 "original" } }
./class_result_10.f90:! { dg-do run}
./pr103258.f90:! { dg-do compile}
./dtio_35.f90:! { dg-compile }
./pr93835.f08:! {dg-do run }
./pr59107.f90:! { dg-compile }




Cheers

Paul

Fortran: Fix some of the bugs in associate [PR87477]

2023-03-28  Paul Thomas  

gcc/fortran
PR fortran/87477
* trans-array.cc (gfc_conv_expr_descriptor): Guard string len
expression in condition.

[PATCH v2] LoongArch: Add built-in functions description of LoongArch Base instruction set instructions.

2023-04-07 Thread Lulu Cheng
gcc/ChangeLog:

* doc/extend.texi: Add section for LoongArch Base Built-in functions.
---
 gcc/doc/extend.texi | 129 
 1 file changed, 129 insertions(+)

---
 v1 -> v2:
   (1) Does not use i8, u8, i16, u16 etc.
   (2) Add the description information of the built-in functions before 
encapsulation.

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 3adb67aa47a..9fbb33b370b 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -14669,6 +14669,7 @@ instructions, but allow the compiler to schedule those 
calls.
 * Blackfin Built-in Functions::
 * BPF Built-in Functions::
 * FR-V Built-in Functions::
+* LoongArch Base Built-in Functions::
 * MIPS DSP Built-in Functions::
 * MIPS Paired-Single Support::
 * MIPS Loongson Built-in Functions::
@@ -16197,6 +16198,134 @@ Use the @code{nldub} instruction to load the contents 
of address @var{x}
 into the data cache.  The instruction is issued in slot I1@.
 @end table

+@node LoongArch Base Built-in Functions
+@subsection LoongArch Base Built-in Functions
+
+These built-in functions are available for LoongArch.
+
+Data Type Description:
+@itemize
+@item @code{imm0_31}, a compile-time constant in range 0 to 31;
+@item @code{imm0_16383}, a compile-time constant in range 0 to 16383;
+@item @code{imm0_32767}, a compile-time constant in range 0 to 32767;
+@item @code{imm_n2048_2047}, a compile-time constant in range -2048 to 2047;
+@end itemize
+
+The intrinsics provided are listed below:
+@smallexample
+unsigned int __builtin_loongarch_movfcsr2gr (imm0_31)
+void __builtin_loongarch_movgr2fcsr (imm0_31, unsigned int)
+void __builtin_loongarch_cacop_d (imm0_31, unsigned long int, 
imm_n2048_2047)
+unsigned int __builtin_loongarch_cpucfg (unsigned int)
+void __builtin_loongarch_asrtle_d (long int, long int)
+void __builtin_loongarch_asrtgt_d (long int, long int)
+long int __builtin_loongarch_lddir_d (long int, imm0_31)
+void __builtin_loongarch_ldpte_d (long int, imm0_31)
+
+int __builtin_loongarch_crc_w_b_w (char, int)
+int __builtin_loongarch_crc_w_h_w (short, int)
+int __builtin_loongarch_crc_w_w_w (int, int)
+int __builtin_loongarch_crc_w_d_w (long int, int)
+int __builtin_loongarch_crcc_w_b_w (char, int)
+int __builtin_loongarch_crcc_w_h_w (short, int)
+int __builtin_loongarch_crcc_w_w_w (int, int)
+int __builtin_loongarch_crcc_w_d_w (long int, int)
+
+unsigned int __builtin_loongarch_csrrd_w (imm0_16383)
+unsigned int __builtin_loongarch_csrwr_w (unsigned int, imm0_16383)
+unsigned int __builtin_loongarch_csrxchg_w (unsigned int, unsigned int, 
imm0_16383)
+unsigned long int __builtin_loongarch_csrrd_d (imm0_16383)
+unsigned long int __builtin_loongarch_csrwr_d (unsigned long int, 
imm0_16383)
+unsigned long int __builtin_loongarch_csrxchg_d (unsigned long int, 
unsigned long int, imm0_16383)
+
+unsigned char __builtin_loongarch_iocsrrd_b (unsigned int)
+unsigned short __builtin_loongarch_iocsrrd_h (unsigned int)
+unsigned int __builtin_loongarch_iocsrrd_w (unsigned int)
+unsigned long int __builtin_loongarch_iocsrrd_d (unsigned int)
+void __builtin_loongarch_iocsrwr_b (unsigned char, unsigned int)
+void __builtin_loongarch_iocsrwr_h (unsigned short, unsigned int)
+void __builtin_loongarch_iocsrwr_w (unsigned int, unsigned int)
+void __builtin_loongarch_iocsrwr_d (unsigned long int, unsigned int)
+
+void __builtin_loongarch_dbar (imm0_32767)
+void __builtin_loongarch_ibar (imm0_32767)
+
+void __builtin_loongarch_syscall (imm0_32767)
+void __builtin_loongarch_break (imm0_32767)
+@end smallexample
+
+@emph{Note:}Since the control register is divided into 32-bit and 64-bit,
+but the access instruction is not distinguished. So GCC renames the control
+instructions when implementing intrinsics.
+
+Take the csrrd instruction as an example, built-in functions are implemented 
as follows:
+@smallexample
+  __builtin_loongarch_csrrd_w  // When reading the 32-bit control register use.
+  __builtin_loongarch_csrrd_d  // When reading the 64-bit control register use.
+@end smallexample
+
+For the convenience of use, the built-in functions are encapsulated,
+the encapsulated functions and @code{__drdtime_t, __rdtime_t} are
+defined in the @code{larchintrin.h}. So if you call the following
+function you need to include @code{larchintrin.h}.
+
+@smallexample
+ typedef struct drdtime@{
+unsigned long dvalue;
+unsigned long dtimeid;
+ @} __drdtime_t;
+
+ typedef struct rdtime@{
+unsigned int value;
+unsigned int timeid;
+ @} __rdtime_t;
+@end smallexample
+
+@smallexample
+__drdtime_t __rdtime_d (void)
+__rdtime_t  __rdtimel_w (void)
+__rdtime_t  __rdtimeh_w (void)
+unsigned int  __movfcsr2gr (imm0_31)
+void __movgr2fcsr (imm0_31, unsigned int)
+void __cacop_d (imm0_31, unsigned long, 

Re: [PATCH v2] RISC-V: Fix regression of -fzero-call-used-regs=all

2023-04-07 Thread Kito Cheng via Gcc-patches
Generally LGTM, just one more comment :)

> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 2e91d019f6c..aad046240ee 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -724,4 +735,54 @@ gen_avl_for_scalar_move (rtx avl)
>  }
>  }
>
> +HARD_REG_SET
> +vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)

I would prefer this should be moved to riscv.cc too, major concern is
we don't want to introduce `hard-reg-set.h` into riscv-protos.h.


Re: Ping! [Patch, fortran] PR87477 - [meta-bug] [F03] issues concerning the ASSOCIATE statement

2023-04-07 Thread Paul Richard Thomas via Gcc-patches
Dear All,

Please find attached a slightly updated version of the patch with a
consolidated testcase. The three additional testcases are nothing to do
with associate and test fixes of character related bugs.

OK for mainline?

Cheers

Paul
Fortran: Fix some of the bugs in associate [PR87477]

2023-04-07  Paul Thomas  

gcc/fortran
PR fortran/87477
* resolve.cc (resolve_assoc_var): Handle parentheses around the
target expression.
(resolve_block_construct): Remove unnecessary static decls.
* trans-array.cc (gfc_conv_expr_descriptor): Guard string len
expression in condition. Improve handling of string length and
span, especially for substrings of the descriptor.
(duplicate_allocatable): Make element type more explicit with
'eltype'.
* trans_decl.cc (gfc_get_symbol_decl): Emit a fatal error with
appropriate message instead of ICE if symbol type is unknown.
* trans-expr.cc (gfc_get_expr_charlen): Retain last charlen in
'previous' and use if end expression in substring reference is
null.
(gfc_conv_string_length): Use gfc_conv_expr_descriptor if
'expr_flat' is an array.
(gfc_trans_alloc_subarray_assign): If this is a deferred string
length component, store the string length in the hidden comp.
Update the typespec length accordingly. Generate a new type
spec for the call to gfc_duplicate-allocatable in this case.
* trans-io.cc (gfc_trans_transfer): Scalarize transfer of
deferred character array components.


gcc/testsuite/
PR fortran/87477
* gfortran.dg/finalize_47.f90 : Enable substring test.
* gfortran.dg/finalize_51.f90 : Update an error message.

PR fortran/85686
PR fortran/88247
PR fortran/91941
PR fortran/92779
PR fortran/93339
PR fortran/93813
PR fortran/100948
PR fortran/102106
* gfortran.dg/associate_60.f90 : New test

PR fortran/98408
* gfortran.dg/pr98408.f90 : New test

PR fortran/105205
* gfortran.dg/pr105205.f90 : New test

PR fortran/106918
* gfortran.dg/pr106918.f90 : New test
diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc
index 33794f0a858..8acad60a02b 100644
--- a/gcc/fortran/iresolve.cc
+++ b/gcc/fortran/iresolve.cc
@@ -230,7 +230,9 @@ gfc_resolve_adjustl (gfc_expr *f, gfc_expr *string)
 {
   f->ts.type = BT_CHARACTER;
   f->ts.kind = string->ts.kind;
-  if (string->ts.u.cl)
+  if (string->ts.deferred)
+f->ts = string->ts;
+  else if (string->ts.u.cl)
 f->ts.u.cl = gfc_new_charlen (gfc_current_ns, string->ts.u.cl);

   f->value.function.name = gfc_get_string ("__adjustl_s%d", f->ts.kind);
@@ -242,7 +244,9 @@ gfc_resolve_adjustr (gfc_expr *f, gfc_expr *string)
 {
   f->ts.type = BT_CHARACTER;
   f->ts.kind = string->ts.kind;
-  if (string->ts.u.cl)
+  if (string->ts.deferred)
+f->ts = string->ts;
+  else if (string->ts.u.cl)
 f->ts.u.cl = gfc_new_charlen (gfc_current_ns, string->ts.u.cl);

   f->value.function.name = gfc_get_string ("__adjustr_s%d", f->ts.kind);
@@ -3361,7 +3365,7 @@ gfc_resolve_mvbits (gfc_code *c)
 }


-/* Set up the call to RANDOM_INIT.  */
+/* Set up the call to RANDOM_INIT.  */

 void
 gfc_resolve_random_init (gfc_code *c)
diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index f6ec76acb0b..6e42397c2ea 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -9084,6 +9084,7 @@ static void
 resolve_assoc_var (gfc_symbol* sym, bool resolve_target)
 {
   gfc_expr* target;
+  bool parentheses = false;

   gcc_assert (sym->assoc);
   gcc_assert (sym->attr.flavor == FL_VARIABLE);
@@ -9096,6 +9097,16 @@ resolve_assoc_var (gfc_symbol* sym, bool resolve_target)
 return;
   gcc_assert (!sym->assoc->dangling);

+  if (target->expr_type == EXPR_OP
+  && target->value.op.op == INTRINSIC_PARENTHESES
+  && target->value.op.op1->expr_type == EXPR_VARIABLE)
+{
+  sym->assoc->target = gfc_copy_expr (target->value.op.op1);
+  gfc_free_expr (target);
+  target = sym->assoc->target;
+  parentheses = true;
+}
+
   if (resolve_target && !gfc_resolve_expr (target))
 return;

@@ -9177,6 +9188,7 @@ resolve_assoc_var (gfc_symbol* sym, bool resolve_target)

   /* See if this is a valid association-to-variable.  */
   sym->assoc->variable = (target->expr_type == EXPR_VARIABLE
+			  && !parentheses
 			  && !gfc_has_vector_subscript (target));

   /* Finally resolve if this is an array or not.  */
@@ -9191,7 +9203,6 @@ resolve_assoc_var (gfc_symbol* sym, bool resolve_target)
   return;
 }

-
   /* We cannot deal with class selectors that need temporaries.  */
   if (target->ts.type == BT_CLASS
 	&& gfc_ref_needs_temporary_p (target->ref))
@@ -10885,11 +10896,6 @@ gfc_resolve_forall (gfc_code *code, gfc_namespace *ns, int forall_save)


 /* Resolve a BLOCK construct statement.  */
-static gfc_expr*
-get_temp_from_expr (gfc_expr *, gfc_namespace *);
-static gfc_code *
-build_assignment (gfc_exec_op, gfc_expr *, gfc_expr *,
-		  gfc_component *, gfc_component *, locus);

 static void
 resolve_block_construct (gfc_code* code)
diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 

Re: [Patch, fortran] PR87477 - [meta-bug] [F03] issues concerning the ASSOCIATE statement

2023-04-07 Thread Paul Richard Thomas via Gcc-patches
Hi All,

Please find attached the patch to fix the dg directives and remove a lot of
trailing white space.

Unless there are any objections, I will commit as obvious over the weekend.

Cheers

Paul

Fortran: Fix dg directives and remove trailing whitespaces in testsuite

2023-04-07  Paul Thomas  

* gfortran.dg/c-interop/allocatable-optional-pointer.f90 : Fix
dg directive and remove trailing whitespace.
* gfortran.dg/c-interop/c407a-1.f90 : ditto
* gfortran.dg/c-interop/c407b-1.f90 : ditto
* gfortran.dg/c-interop/c407b-2.f90 : ditto
* gfortran.dg/c-interop/c407c-1.f90 : ditto
* gfortran.dg/c-interop/c535a-1.f90 : ditto
* gfortran.dg/c-interop/c535a-2.f90 : ditto
* gfortran.dg/c-interop/c535b-1.f90 : ditto
* gfortran.dg/c-interop/c535b-2.f90 : ditto
* gfortran.dg/c-interop/c535b-3.f90 : ditto
* gfortran.dg/c-interop/c535c-1.f90 : ditto
* gfortran.dg/c-interop/c535c-2.f90 : ditto
* gfortran.dg/c-interop/deferred-character-1.f90 : ditto
* gfortran.dg/c-interop/removed-restrictions-1.f90 : ditto
* gfortran.dg/c-interop/removed-restrictions-2.f90 : ditto
* gfortran.dg/c-interop/removed-restrictions-4.f90 : ditto
* gfortran.dg/c-interop/tkr.f90 : ditto
* gfortran.dg/class_result_10.f90 : ditto
* gfortran.dg/dtio_35.f90 : ditto
* gfortran.dg/goacc/array-with-dt-2.f90 : ditto
* gfortran.dg/gomp/affinity-clause-1.f90 : ditto
* gfortran.dg/pr103258.f90 : ditto
* gfortran.dg/pr59107.f90 : ditto
* gfortran.dg/pr93835.f08 : ditto



On Wed, 29 Mar 2023 at 09:53, Paul Richard Thomas <
paul.richard.tho...@gmail.com> wrote:

> Hi Manfred,
>
> Indeed I do :-) Thanks for the spot. I have decided that it will be less
> messy if I roll all the testcases into one or, perhaps two =>
> associate_xx.f90
>
> Forgetting the space before the final brace seems to be rife!
>
> Cheers
>
> Paul
>
>
> On Wed, 29 Mar 2023 at 09:24, Manfred Schwarb  wrote:
>
>> Am 28.03.23 um 23:04 schrieb Paul Richard Thomas via Fortran:
>> > Hi All,
>> >
>> > I have made a start on ASSOCIATE issues. Some of the low(-ish) hanging
>> > fruit are already fixed but I have yet to check that they a really fixed
>> > and to close them:
>> > pr102106, pr102111, pr104430, pr106048, pr85510, pr87460, pr92960 &
>> pr93338
>> >
>> > The attached patch picks up those PRs involving deferred length
>> characters
>> > in one guise or another. I believe that it is all pretty
>> straightforward.
>> > Structure constructors with allocatable, deferred length, character
>> array
>> > components just weren't implemented and so this is the biggest part of
>> the
>> > patch. I found two other, non-associate PRs(106918 &  105205) that are
>> > fixed and there are probably more.
>> >
>> > The chunk in trans-io.cc is something of a kludge, which I will come
>> back
>> > to. Some descriptors come through with a data pointer that looks as if
>> it
>> > should be OK but
>> >
>> > I thought to submit this now to get it out of the way. The ratio of PRs
>> > fixed to the size of the patch warrants this. The next stage is going
>> to be
>> > rather messy and so "I might take a little while" (cross talk between
>> > associate and select type, in particular).
>> >
>> > Regtests OK - good for mainline?
>> >
>>
>> Paul, you have some "dg-do-run" and "dg-do-compile" statements in your
>> testcases,
>> could you change them into their single-minus-sign variants?
>>
>> Cheers,
>> Manfred
>>
>>
>> BTW: I just ran my script again and found the following testsuite issues
>> (note that outer-most
>> braces need to be space-padded):
>>
>> ./c-interop/removed-restrictions-1.f90:! { dg-do compile}
>> ./c-interop/removed-restrictions-2.f90:! { dg-do compile}
>> ./c-interop/removed-restrictions-3.f90:! { dg-do compile}
>> ./c-interop/removed-restrictions-4.f90:! { dg-do compile}
>> ./c-interop/tkr.f90:! { dg-do compile}
>> ./c-interop/c407c-1.f90:! { dg-do compile}
>> ./c-interop/deferred-character-1.f90:! { dg-do compile}
>> ./c-interop/allocatable-optional-pointer.f90:! { dg-do compile}
>> ./c-interop/c407a-1.f90:! { dg-do compile}
>> ./c-interop/c407b-1.f90:! { dg-do compile}
>> ./c-interop/c407b-2.f90:! { dg-do compile}
>> ./c-interop/c535a-1.f90:! { dg-do compile}
>> ./c-interop/c535a-2.f90:! { dg-do compile}
>> ./c-interop/c535b-1.f90:! { dg-do compile}
>> ./c-interop/c535b-2.f90:! { dg-do compile}
>> ./c-interop/c535b-3.f90:! { dg-do compile}
>> ./c-interop/c535c-1.f90:! { dg-do compile}
>> ./c-interop/c535c-2.f90:! { dg-do compile}
>> ./gomp/affinity-clause-1.f90:! { dg final { scan-tree-dump-times "#pragma
>> omp task affinity\\(iterator\\(integer\\(kind=4\\)
>> i=D\\.\[0-9\]+:5:1\\):b\\\[\\(.* ? \\+ -1\\\]\\)
>> affinity\\(iterator\\(integer\\(kind=4\\)
>> i=D\\.\[0-9\]+:5:1\\):d\\\[\\(\\(integer\\(kind=8\\)\\) i \\+ -1\\) \\*
>> 6\\\]\\)"  1 "original" } }
>> ./class_result_10.f90:! { dg-do run}
>> ./pr103258.f90:! { dg-do compile}
>> ./dtio_35.f90:! { dg-compile }
>> ./pr93835.f08:! {dg-do run }
>> ./pr59107.f90:! { dg-compile }
>>
>>
>>
>> > Cheers
>> >
>> > Paul
>> >
>> > Fortran: Fix 

[PATCH v2] RISC-V: Fix regression of -fzero-call-used-regs=all

2023-04-07 Thread yanzhang.wang--- via Gcc-patches
From: Yanzhang Wang 

This patch registers a riscv specific function to
TARGET_ZERO_CALL_USED_REGS instead of default in targhooks.cc. It will
clean gpr and vector relevant registers.

PR 109104

gcc/ChangeLog:

* config/riscv/riscv-protos.h (GCC_RISCV_PROTOS_H):
(emit_hard_vlmax_vsetvl):
(vector_zero_call_used_regs):
* config/riscv/riscv-v.cc (emit_hard_vlmax_vsetvl):
(emit_vlmax_vsetvl):
(vector_zero_call_used_regs):
* config/riscv/riscv.cc (riscv_zero_call_used_regs):
(TARGET_ZERO_CALL_USED_REGS):

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zero-scratch-regs-1.c: New test.
* gcc.target/riscv/zero-scratch-regs-2.c: New test.
* gcc.target/riscv/zero-scratch-regs-3.c: New test.

Signed-off-by: Yanzhang Wang 
Co-authored-by: Pan Li 
Co-authored-by: Ju-Zhe Zhong 
Co-authored-by: Kito Cheng 
---
 gcc/config/riscv/riscv-protos.h   |  5 ++
 gcc/config/riscv/riscv-v.cc   | 67 ++-
 gcc/config/riscv/riscv.cc | 21 ++
 .../gcc.target/riscv/zero-scratch-regs-1.c|  9 +++
 .../gcc.target/riscv/zero-scratch-regs-2.c| 24 +++
 .../gcc.target/riscv/zero-scratch-regs-3.c| 57 
 6 files changed, 180 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-scratch-regs-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-scratch-regs-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-scratch-regs-3.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 4611447ddde..7ab0ec4b8be 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -22,6 +22,8 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_RISCV_PROTOS_H
 #define GCC_RISCV_PROTOS_H
 
+#include "hard-reg-set.h"
+
 /* Symbol types we understand.  The order of this list must match that of
the unspec enum in riscv.md, subsequent to UNSPEC_ADDRESS_FIRST.  */
 enum riscv_symbol_type {
@@ -159,6 +161,7 @@ bool check_builtin_call (location_t, vec, 
unsigned int,
 bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 bool legitimize_move (rtx, rtx, machine_mode);
 void emit_vlmax_vsetvl (machine_mode, rtx);
+void emit_hard_vlmax_vsetvl (machine_mode, rtx);
 void emit_vlmax_op (unsigned, rtx, rtx, machine_mode);
 void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
 void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
@@ -206,6 +209,8 @@ enum vlen_enum
 bool slide1_sew64_helper (int, machine_mode, machine_mode,
  machine_mode, rtx *);
 rtx gen_avl_for_scalar_move (rtx);
+
+HARD_REG_SET vector_zero_call_used_regs (HARD_REG_SET);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 2e91d019f6c..aad046240ee 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -43,6 +43,8 @@
 #include "optabs.h"
 #include "tm-constrs.h"
 #include "rtx-vector-builder.h"
+#include "diagnostic-core.h"
+#include "targhooks.h"
 
 using namespace riscv_vector;
 
@@ -118,6 +120,17 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
  && IN_RANGE (INTVAL (elt), minval, maxval));
 }
 
+/* Emit a vlmax vsetvl instruction with side effect, this should be only used
+   when optimization is tune off or emit after vsetvl insertion pass.  */
+void
+emit_hard_vlmax_vsetvl (machine_mode vmode, rtx vl)
+{
+  unsigned int sew = get_sew (vmode);
+  emit_insn (gen_vsetvl (Pmode, vl, RVV_VLMAX, gen_int_mode (sew, Pmode),
+gen_int_mode (get_vlmul (vmode), Pmode), const0_rtx,
+const0_rtx));
+}
+
 void
 emit_vlmax_vsetvl (machine_mode vmode, rtx vl)
 {
@@ -126,9 +139,7 @@ emit_vlmax_vsetvl (machine_mode vmode, rtx vl)
   unsigned int ratio = calculate_ratio (sew, vlmul);
 
   if (!optimize)
-emit_insn (gen_vsetvl (Pmode, vl, RVV_VLMAX, gen_int_mode (sew, Pmode),
-  gen_int_mode (get_vlmul (vmode), Pmode), const0_rtx,
-  const0_rtx));
+emit_hard_vlmax_vsetvl (vmode, vl);
   else
 emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode)));
 }
@@ -724,4 +735,54 @@ gen_avl_for_scalar_move (rtx avl)
 }
 }
 
+HARD_REG_SET
+vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
+{
+  HARD_REG_SET zeroed_hardregs;
+  CLEAR_HARD_REG_SET (zeroed_hardregs);
+
+  /* Find a register to hold vl.  */
+  unsigned vl_regno = INVALID_REGNUM;
+  /* Skip the first GPR, otherwise the existing vl is kept due to the same
+ between vl and avl.  */
+  for (unsigned regno = GP_REG_FIRST + 1; regno <= GP_REG_LAST; regno++)
+{
+  if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
+   {
+ vl_regno = regno;
+ break;
+   }
+}
+
+  if (vl_regno > GP_REG_LAST)
+sorry ("can't allocate vl 

[PATCH, V2] PR target/70243: Do not generate vmaddfp and vnmsubfp

2023-04-07 Thread Michael Meissner via Gcc-patches
This is version 2 of the patch.  The first version was posted on April 6th.

In this version, I eliminated the changes to Altivec.md that added checks to
altivec_fmav4sf4 and altivec_vnmsubfp.  After writing the code, I remembered
that VECTOR_UNIT_ALTIVEC_P that is used by those insns will not be true if the
VSX instruction set is enabled, so no additional test is needed.

As we discussed in a private chat room, I modified the code to generate vmaddfp
and vnmsubfp if -Ofast (-ffast-math) is used.  This allows the compiler to
eliminate the extra move if the user does not care about strict floating point
code generation, but it generates only the VSX instructions in the normal
case.

I reworked the examples and split them into two tests to test both the normal
case when -Ofast is not used and when it is used.

I also fixed the instructions mentioned in the comments to be the actual
instructions (vmaddfp and vnmsubfp) instead of fmaddfp and fnmsubdp.  Sorry
about tat.

The AltiVec (VMX) instructions vmaddfp and vnmsubfp have different rounding
behaviors than the VSX xvmadd{a,m}sp and xvnmsub{a,m}sp instructions.  In
particular, generating these instructions seems to break Eigen.

The bug is that GCC has generated the VMX vmaddfp and vnmsubfp instructions on
VSX systems as an alternative to the xsmadd{a,m}sp and xsnmsub{a,m}sp
instructions.  The advantage of the VMX instructions is that they are 4 operand
instructions (i.e. the target register does not have to overlap with one of the
input registers).  This can mean that the compiler can eliminate an extra move
instruction. The disadvantage of generating these instructions is it does not
round the same was as the VSX instructions.

This patch will only generate the VMX vmaddfp and vnmsubfp instructions as
alternatives in the VSX instruction insn support if -Ofast (-ffast-math) is
used.  I also added 2 tests to the regression suite.

I have done bootstrap builds on power9 little endian (with both IEEE long
double and IBM long double).  I have also done the builds and test on a power8
big endian system (testing both 32-bit and 64-bit code generation).  Chip has
verified that it fixes the problem that Eigen encountered.  Can I check this
into the master GCC branch?  After a burn-in period, can I check this patch
into the active GCC branches?

Thanks in advance.

2023-04-07   Michael Meissner  

gcc/

PR target/70243
* config/rs6000/rs6000.md (isa attribute): Add fastmath.
(enabled attribute): Add support for fastmath.
* config/rs6000/vsx.md (vsx_fmav4sf4): Set the isa attribute to
fastmath to disable Altivec instruction generatins normally.
(vsx_nfmsv4sf4): Likewise.

gcc/testsuite/

PR target/70243
* gcc.target/powerpc/pr70243.c: New test.
* gcc.target/powerpc/pr70243-2.c: New test.
---
 gcc/config/rs6000/rs6000.md  |  6 ++-
 gcc/config/rs6000/vsx.md | 17 
 gcc/testsuite/gcc.target/powerpc/pr70243-2.c | 41 
 gcc/testsuite/gcc.target/powerpc/pr70243.c   | 41 
 4 files changed, 97 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr70243-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr70243.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 44f7dd509cb..7fea6a40e0c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -354,7 +354,7 @@ (define_attr "cpu"
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10,fastmath"
   (const_string "any"))
 
 ;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -402,6 +402,10 @@ (define_attr "enabled" ""
  (and (eq_attr "isa" "p10")
  (match_test "TARGET_POWER10"))
  (const_int 1)
+
+ (and (eq_attr "isa" "fastmath")
+ (match_test "flag_unsafe_math_optimizations"))
+ (const_int 1)
 ] (const_int 0)))
 
 ;; If this instruction is microcoded on the CELL processor
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0865608f94a..7f64a2dd356 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -2009,11 +2009,12 @@ (define_insn "*vsx_tsqrt2_internal"
   "xtsqrtp %0,%x1"
   [(set_attr "type" "")])
 
-;; Fused vector multiply/add instructions. Support the classical Altivec
-;; versions of fma, which allows the target to be a separate register from the
-;; 3 inputs.  Under VSX, the target must be either the addend or the first
-;; multiply.
-
+;; Fused vector multiply/add instructions. Under VSX, the target must be either
+;; the addend or the first multiply.  If the user used -Ofast, also support the
+;; classical VMX versions of fma (vmaddfp and vnmsubfp), which allows the
+;; target to be a separate register from the 3 inputs.  This restriction is due

Re: PR target/70243: Do not generate fmaddfp and fnmsubfp

2023-04-07 Thread Michael Meissner via Gcc-patches
On Thu, Apr 06, 2023 at 03:37:59PM -0500, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Apr 06, 2023 at 11:12:11AM -0400, Michael Meissner wrote:
> > The Altivec instructions fmaddfp and fnmsubfp have different rounding 
> > behaviors
> 
> Those are not existing instructions.  You mean "vmaddfp" etc.

Yes, sorry about that.  I guess I was thinking about the scalar instructions.

> > than the VSX xvmaddsp and xvnmsubsp instructions.  In particular, generating
> > these instructions seems to break Eigen.
> 
> Those instructions use round-to-nearest-tiea-to-even, like all other
> VMX FP insns.  A proper patch has to deal with all VMX FP insns.  But,
> almost all programs expect that rounding mode anyway, so this is not a
> problem in practice.  What happened on Eigen is that the Linux kernel
> starts every new process with VSCR[NJ]=1, breaking pretty much
> everything that wants floating point for non-toy purposes.  (There
> currently is a bug on LE that sets the wrong bit, hiding the problem in
> that configuration, but it is intended there as well).
> 
> > GCC has generated the Altivec fmaddfp and fnmsubfp instructions on VSX 
> > systems
> > as an alternative to the xsmadd{a,m}sp and xsnmsub{a,m}sp instructions.  The
> > advantage  of the Altivec instructions is that they are 4 operand 
> > instructions
> > (i.e. the target register does not have to overlap with one of the input
> > registers).  The advantage is it can eliminate an extra move instruction.  
> > The
> > disadvantage is it does round the same was as the VSX instructions.
> 
> And it gets the VSCR[NJ] setting applied.  Yup.
> 
> > This patch eliminates the generation of the Altivec fmaddfp and fnmsubfp
> > instructions as alternatives in the VSX instruction insn support, and in the
> > Altivec insns it adds a test to prevent the insn from being used if VSX is
> > available.  I also added a test to the regression test suite.
> 
> Please leave the latter out, it does not belong in this patch.  If you
> want a patch to do that deal with *all* VMX FP insns?  There also are
> add, sub, mul, etc.  Well I think those (as well as madd and nmsub) are
> the only ones that use the NJ bit or the RN bits, but please check.

After I posted the patch I refreshed my memory of the VECTOR_UNIT_ALTIVEC_P
macro and it is not true if VSX code generation is enabled.  So I dropped the
changes to altivec.md.

In addition, as far as I know, the only AltiVec (VMX) floating point
instructions generated when VSX is used are the vmaddfp and vnmsubfp
instructions.  In the case of add and subtract, xvaddsp and xvsubsp is more
general than vaddfp or vsubfp since it can access all VSX registers.  VMX does
not have a stand-alone multiply (it generates FMA with a zero register) and it
does not have a division operation.  And VMX does not have xvmsub{a,m}sp nor
xvnadd{a,m}sp variations of the FMA instructions.

> > --- a/gcc/config/rs6000/altivec.md
> > +++ b/gcc/config/rs6000/altivec.md
> > @@ -750,12 +750,15 @@ (define_insn "altivec_vsel4"
> >  
> >  ;; Fused multiply add.
> >  
> > +;; If we are using VSX instructions, do not generate the vmaddfp 
> > instruction
> > +;; since is has different rounding behavior than the xvmaddsp instruction.
> > +
> 
> No blank lines please.

Ok.

> >  (define_insn "*altivec_fmav4sf4"
> >[(set (match_operand:V4SF 0 "register_operand" "=v")
> > (fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
> >   (match_operand:V4SF 2 "register_operand" "v")
> >   (match_operand:V4SF 3 "register_operand" "v")))]
> > -  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
> > +  "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && !TARGET_VSX"
> 
> This is very error-prone.  Maybe add a test to the VECTOR_UNIT_ALTIVEC
> macro instead?

As I said that part of the code is not in the next patch.

> > -;; Fused vector multiply/add instructions. Support the classical Altivec
> > -;; versions of fma, which allows the target to be a separate register from 
> > the
> > -;; 3 inputs.  Under VSX, the target must be either the addend or the first
> > -;; multiply.
> > +;; Fused vector multiply/add instructions. Do not use the classical Altivec

> (Two spaces after dot, and AltiVec is spelled with a capital V.  I don't
> like it either, VMX is a much nicer and more regular name).

When the name might be more regular, but in terms of the instruction set, it
does have holes that I mentioned above (no multiply that is not a FMA, two of
the four FMA variants are not provided).

> > +;; versions of fma.  Those instructions allows the target to be a separate
> > +;; register from the 3 inputs, but they have different rounding behaviors.
> >  
> >  (define_insn "*vsx_fmav4sf4"
> > -  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
> > +  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
> > (fma:V4SF
> > - (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
> > - (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
> > -