[gcc r15-1383] vshuf-mem.C: Make -march=z14 depend on s390_vxe

2024-06-17 Thread Andreas Krebbel via Gcc-cvs
https://gcc.gnu.org/g:7e59f0c05da840ca13ba73d25947df8a4eaf199e

commit r15-1383-g7e59f0c05da840ca13ba73d25947df8a4eaf199e
Author: Andreas Krebbel 
Date:   Mon Jun 17 21:50:27 2024 +0200

vshuf-mem.C: Make -march=z14 depend on s390_vxe

gcc/testsuite/ChangeLog:

* g++.dg/torture/vshuf-mem.C: Use -march=z14 only, if the we are
on a machine which can actually run it.

Diff:
---
 gcc/testsuite/g++.dg/torture/vshuf-mem.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/torture/vshuf-mem.C 
b/gcc/testsuite/g++.dg/torture/vshuf-mem.C
index 6d892f876be5..1d828e33a140 100644
--- a/gcc/testsuite/g++.dg/torture/vshuf-mem.C
+++ b/gcc/testsuite/g++.dg/torture/vshuf-mem.C
@@ -1,6 +1,6 @@
 // { dg-options "-std=c++11 -Wno-psabi" }
 // { dg-do run }
-// { dg-additional-options "-march=z14" { target s390*-*-* } }
+// { dg-additional-options "-march=z14" { target s390_vxe } }
 
 /* This used to trigger (2024-05-28) the vectorize_vec_perm_const
backend hook to be invoked with a MEM source operand.  Extracted


[Committed] IBM Z: Fix ICE in expand_perm_as_replicate

2024-06-10 Thread Andreas Krebbel via Gcc
The current implementation assumes to always be invoked with register
operands. For memory operands we even have an instruction
though (vlrep). With the patch we try this first and only if it fails
force the input into a register and continue.

vec_splats generation fails for single element 128bit types which are
allowed for vec_splat. This is something to sort out with another
patch I guess.

Bootstrapped and regtested on IBM Z. Committed to mainline. Needs to
be committed to GCC 14 branch as well.

gcc/ChangeLog:

* config/s390/s390.cc (expand_perm_as_replicate): Handle memory
operands.
* config/s390/vx-builtins.md (vec_splats): Turn into 
parameterized expander.
(@vec_splats): New expander.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vshuf-mem.C: New test.
---
 gcc/config/s390/s390.cc  | 17 +--
 gcc/config/s390/vx-builtins.md   |  2 +-
 gcc/testsuite/g++.dg/torture/vshuf-mem.C | 27 
 3 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/vshuf-mem.C

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index fa517bd3e77..ec836ec3cd4 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17940,7 +17940,8 @@ expand_perm_as_replicate (const struct 
expand_vec_perm_d )
   unsigned char i;
   unsigned char elem;
   rtx base = d.op0;
-  rtx insn;
+  rtx insn = NULL_RTX;
+
   /* Needed to silence maybe-uninitialized warning.  */
   gcc_assert (d.nelt > 0);
   elem = d.perm[0];
@@ -17954,7 +17955,19 @@ expand_perm_as_replicate (const struct 
expand_vec_perm_d )
  base = d.op1;
  elem -= d.nelt;
}
-  insn = maybe_gen_vec_splat (d.vmode, d.target, base, GEN_INT (elem));
+  if (memory_operand (base, d.vmode))
+   {
+ /* Try to use vector load and replicate.  */
+ rtx new_base = adjust_address (base, GET_MODE_INNER (d.vmode),
+elem * GET_MODE_UNIT_SIZE (d.vmode));
+ insn = maybe_gen_vec_splats (d.vmode, d.target, new_base);
+   }
+  if (insn == NULL_RTX)
+   {
+ base = force_reg (d.vmode, base);
+ insn = maybe_gen_vec_splat (d.vmode, d.target, base, GEN_INT (elem));
+   }
+
   if (insn == NULL_RTX)
return false;
   emit_insn (insn);
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 93c0d408a43..bb271c09a7d 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -145,7 +145,7 @@
   DONE;
 })
 
-(define_expand "vec_splats"
+(define_expand "@vec_splats"
   [(set (match_operand:VEC_HW  0 "register_operand" "")
(vec_duplicate:VEC_HW (match_operand: 1 "general_operand"  
"")))]
   "TARGET_VX")
diff --git a/gcc/testsuite/g++.dg/torture/vshuf-mem.C 
b/gcc/testsuite/g++.dg/torture/vshuf-mem.C
new file mode 100644
index 000..5f1ebf65665
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vshuf-mem.C
@@ -0,0 +1,27 @@
+// { dg-options "-std=c++11" }
+// { dg-do run }
+// { dg-additional-options "-march=z14" { target s390*-*-* } }
+
+/* This used to trigger (2024-05-28) the vectorize_vec_perm_const
+   backend hook to be invoked with a MEM source operand.  Extracted
+   from onnxruntime's mlas library.  */
+
+typedef float V4SF __attribute__((vector_size (16)));
+typedef int V4SI __attribute__((vector_size (16)));
+
+template < unsigned I0, unsigned I1, unsigned I2, unsigned I3 > V4SF
+MlasShuffleFloat32x4 (V4SF Vector)
+{
+  return __builtin_shuffle (Vector, Vector, V4SI{I0, I1, I2, I3});
+}
+
+int
+main ()
+{
+  V4SF f = { 1.0f, 2.0f, 3.0f, 4.0f };
+  if (MlasShuffleFloat32x4 < 1, 1, 1, 1 > (f)[3] != 2.0f)
+__builtin_abort ();
+  if (MlasShuffleFloat32x4 < 3, 3, 3, 3 > (f)[1] != 4.0f)
+__builtin_abort ();
+  return 0;
+}
-- 
2.45.1



[gcc r15-1129] IBM Z: Fix ICE in expand_perm_as_replicate

2024-06-10 Thread Andreas Krebbel via Gcc-cvs
https://gcc.gnu.org/g:21fd8c67ad297212e3cb885883cc8df8611f3040

commit r15-1129-g21fd8c67ad297212e3cb885883cc8df8611f3040
Author: Andreas Krebbel 
Date:   Mon Jun 10 09:09:10 2024 +0200

IBM Z: Fix ICE in expand_perm_as_replicate

The current implementation assumes to always be invoked with register
operands. For memory operands we even have an instruction
though (vlrep). With the patch we try this first and only if it fails
force the input into a register and continue.

vec_splats generation fails for single element 128bit types which are
allowed for vec_splat. This is something to sort out with another
patch I guess.

gcc/ChangeLog:

* config/s390/s390.cc (expand_perm_as_replicate): Handle memory
operands.
* config/s390/vx-builtins.md (vec_splats): Turn into 
parameterized expander.
(@vec_splats): New expander.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vshuf-mem.C: New test.

Diff:
---
 gcc/config/s390/s390.cc  | 17 +++--
 gcc/config/s390/vx-builtins.md   |  2 +-
 gcc/testsuite/g++.dg/torture/vshuf-mem.C | 27 +++
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index fa517bd3e77..ec836ec3cd4 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17940,7 +17940,8 @@ expand_perm_as_replicate (const struct 
expand_vec_perm_d )
   unsigned char i;
   unsigned char elem;
   rtx base = d.op0;
-  rtx insn;
+  rtx insn = NULL_RTX;
+
   /* Needed to silence maybe-uninitialized warning.  */
   gcc_assert (d.nelt > 0);
   elem = d.perm[0];
@@ -17954,7 +17955,19 @@ expand_perm_as_replicate (const struct 
expand_vec_perm_d )
  base = d.op1;
  elem -= d.nelt;
}
-  insn = maybe_gen_vec_splat (d.vmode, d.target, base, GEN_INT (elem));
+  if (memory_operand (base, d.vmode))
+   {
+ /* Try to use vector load and replicate.  */
+ rtx new_base = adjust_address (base, GET_MODE_INNER (d.vmode),
+elem * GET_MODE_UNIT_SIZE (d.vmode));
+ insn = maybe_gen_vec_splats (d.vmode, d.target, new_base);
+   }
+  if (insn == NULL_RTX)
+   {
+ base = force_reg (d.vmode, base);
+ insn = maybe_gen_vec_splat (d.vmode, d.target, base, GEN_INT (elem));
+   }
+
   if (insn == NULL_RTX)
return false;
   emit_insn (insn);
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 93c0d408a43..bb271c09a7d 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -145,7 +145,7 @@
   DONE;
 })
 
-(define_expand "vec_splats"
+(define_expand "@vec_splats"
   [(set (match_operand:VEC_HW  0 "register_operand" "")
(vec_duplicate:VEC_HW (match_operand: 1 "general_operand"  
"")))]
   "TARGET_VX")
diff --git a/gcc/testsuite/g++.dg/torture/vshuf-mem.C 
b/gcc/testsuite/g++.dg/torture/vshuf-mem.C
new file mode 100644
index 000..5f1ebf65665
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vshuf-mem.C
@@ -0,0 +1,27 @@
+// { dg-options "-std=c++11" }
+// { dg-do run }
+// { dg-additional-options "-march=z14" { target s390*-*-* } }
+
+/* This used to trigger (2024-05-28) the vectorize_vec_perm_const
+   backend hook to be invoked with a MEM source operand.  Extracted
+   from onnxruntime's mlas library.  */
+
+typedef float V4SF __attribute__((vector_size (16)));
+typedef int V4SI __attribute__((vector_size (16)));
+
+template < unsigned I0, unsigned I1, unsigned I2, unsigned I3 > V4SF
+MlasShuffleFloat32x4 (V4SF Vector)
+{
+  return __builtin_shuffle (Vector, Vector, V4SI{I0, I1, I2, I3});
+}
+
+int
+main ()
+{
+  V4SF f = { 1.0f, 2.0f, 3.0f, 4.0f };
+  if (MlasShuffleFloat32x4 < 1, 1, 1, 1 > (f)[3] != 2.0f)
+__builtin_abort ();
+  if (MlasShuffleFloat32x4 < 3, 3, 3, 3 > (f)[1] != 4.0f)
+__builtin_abort ();
+  return 0;
+}


[gcc r14-10087] s390x: Fix vec_xl/vec_xst type aliasing [PR114676]

2024-04-23 Thread Andreas Krebbel via Gcc-cvs
https://gcc.gnu.org/g:42189f21b22c43ac8ab46edf5f6a7b4d99bc86a5

commit r14-10087-g42189f21b22c43ac8ab46edf5f6a7b4d99bc86a5
Author: Andreas Krebbel 
Date:   Tue Apr 23 10:05:46 2024 +0200

s390x: Fix vec_xl/vec_xst type aliasing [PR114676]

The requirements of the vec_xl/vec_xst intrinsincs wrt aliasing of the
pointer argument are not really documented.  As it turns out, users
are likely to get it wrong.  With this patch we let the pointer
argument alias everything in order to make it more robust for users.

gcc/ChangeLog:

PR target/114676
* config/s390/s390-c.cc (s390_expand_overloaded_builtin): Use a
MEM_REF with an addend of type ptr_type_node.

gcc/testsuite/ChangeLog:

PR target/114676
* gcc.target/s390/zvector/pr114676.c: New test.

Suggested-by: Jakub Jelinek 

Diff:
---
 gcc/config/s390/s390-c.cc| 16 +---
 gcc/testsuite/gcc.target/s390/zvector/pr114676.c | 19 +++
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/gcc/config/s390/s390-c.cc b/gcc/config/s390/s390-c.cc
index 8d3d1a467a8..1bb6e810766 100644
--- a/gcc/config/s390/s390-c.cc
+++ b/gcc/config/s390/s390-c.cc
@@ -498,11 +498,11 @@ s390_expand_overloaded_builtin (location_t loc,
/* Build a vector type with the alignment of the source
   location in order to enable correct alignment hints to be
   generated for vl.  */
-   tree mem_type = build_aligned_type (return_type,
-   TYPE_ALIGN (TREE_TYPE (TREE_TYPE 
((*arglist)[1];
+   unsigned align = TYPE_ALIGN (TREE_TYPE (TREE_TYPE ((*arglist)[1])));
+   tree mem_type = build_aligned_type (return_type, align);
return build2 (MEM_REF, mem_type,
   fold_build_pointer_plus ((*arglist)[1], (*arglist)[0]),
-  build_int_cst (TREE_TYPE ((*arglist)[1]), 0));
+  build_int_cst (ptr_type_node, 0));
   }
 case S390_OVERLOADED_BUILTIN_s390_vec_xst:
 case S390_OVERLOADED_BUILTIN_s390_vec_xstd2:
@@ -511,11 +511,13 @@ s390_expand_overloaded_builtin (location_t loc,
/* Build a vector type with the alignment of the target
   location in order to enable correct alignment hints to be
   generated for vst.  */
-   tree mem_type = build_aligned_type (TREE_TYPE((*arglist)[0]),
-   TYPE_ALIGN (TREE_TYPE (TREE_TYPE 
((*arglist)[2];
+   unsigned align = TYPE_ALIGN (TREE_TYPE (TREE_TYPE ((*arglist)[2])));
+   tree mem_type = build_aligned_type (TREE_TYPE ((*arglist)[0]), align);
return build2 (MODIFY_EXPR, mem_type,
-  build1 (INDIRECT_REF, mem_type,
-  fold_build_pointer_plus ((*arglist)[2], 
(*arglist)[1])),
+  build2 (MEM_REF, mem_type,
+  fold_build_pointer_plus ((*arglist)[2],
+   (*arglist)[1]),
+  build_int_cst (ptr_type_node, 0)),
   (*arglist)[0]);
   }
 case S390_OVERLOADED_BUILTIN_s390_vec_load_pair:
diff --git a/gcc/testsuite/gcc.target/s390/zvector/pr114676.c 
b/gcc/testsuite/gcc.target/s390/zvector/pr114676.c
new file mode 100644
index 000..bdc66b2920a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/zvector/pr114676.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
+
+#include 
+
+void __attribute__((noinline)) foo (int *mem)
+{
+  vec_xst ((vector float){ 1.0f, 2.0f, 3.0f, 4.0f }, 0, (float*)mem);
+}
+
+int
+main ()
+{
+  int m[4] = { 0 };
+  foo (m);
+  if (m[3] == 0)
+__builtin_abort ();
+  return 0;
+}


[gcc r14-10071] s390x: Do not default to -mvx for -mesa

2024-04-22 Thread Andreas Krebbel via Gcc-cvs
https://gcc.gnu.org/g:1b7785fdf95d179209f7277dd0ef912562130a39

commit r14-10071-g1b7785fdf95d179209f7277dd0ef912562130a39
Author: Andreas Krebbel 
Date:   Mon Apr 22 11:07:43 2024 +0200

s390x: Do not default to -mvx for -mesa

We currently enable the vector extensions also for -march=z13 -m31
-mesa which is very wrong.

gcc/ChangeLog:

* config/s390/s390.cc (s390_option_override_internal): Check zarch
flag before enabling -mvx.

Diff:
---
 gcc/config/s390/s390.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index bf46eab2d63..5968808fcb6 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -16104,7 +16104,7 @@ s390_option_override_internal (struct gcc_options *opts,
 }
   else
 {
-  if (TARGET_CPU_VX_P (opts))
+  if (TARGET_CPU_VX_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
/* Enable vector support if available and not explicitly disabled
   by user.  E.g. with -m31 -march=z13 -mzarch */
opts->x_target_flags |= MASK_OPT_VX;


[gcc r14-9461] IBM Z: Fix -munaligned-symbols

2024-03-14 Thread Andreas Krebbel via Gcc-cvs
https://gcc.gnu.org/g:90a7da695284da49182446ba45fbcddb9eb7fc91

commit r14-9461-g90a7da695284da49182446ba45fbcddb9eb7fc91
Author: Andreas Krebbel 
Date:   Thu Mar 14 09:54:31 2024 +0100

IBM Z: Fix -munaligned-symbols

With this fix we make sure that only symbols with a natural alignment
smaller than 2 are considered misaligned with
-munaligned-symbols. Background is that -munaligned-symbols is only
supposed to affect symbols whose natural alignment wouldn't be enough
to fulfill our ABI requirement of having all symbols at even
addresses. Because only these are the cases where we differ from other
architectures.

gcc/ChangeLog:

* config/s390/s390.cc (s390_encode_section_info): Adjust the check
for misaligned symbols.
* config/s390/s390.opt: Improve documentation.

gcc/testsuite/ChangeLog:

* gcc.target/s390/aligned-1.c: Add weak and void variables
incorporating the cases from unaligned-2.c.
* gcc.target/s390/unaligned-1.c: Likewise.
* gcc.target/s390/unaligned-2.c: Removed.

Diff:
---
 gcc/config/s390/s390.cc |  15 +++-
 gcc/config/s390/s390.opt|   7 +-
 gcc/testsuite/gcc.target/s390/aligned-1.c   | 101 ---
 gcc/testsuite/gcc.target/s390/unaligned-1.c | 103 +---
 gcc/testsuite/gcc.target/s390/unaligned-2.c |  16 -
 5 files changed, 201 insertions(+), 41 deletions(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index e63965578f1..372a2324403 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -13802,10 +13802,19 @@ s390_encode_section_info (tree decl, rtx rtl, int 
first)
 that can go wrong (i.e. no FUNC_DECLs).
 All symbols without an explicit alignment are assumed to be 2
 byte aligned as mandated by our ABI.  This behavior can be
-overridden for external symbols with the -munaligned-symbols
-switch.  */
+overridden for external and weak symbols with the
+-munaligned-symbols switch.
+For all external symbols without explicit alignment
+DECL_ALIGN is already trimmed down to 8, however for weak
+symbols this does not happen.  These cases are catched by the
+type size check.  */
+  const_tree size = TYPE_SIZE (TREE_TYPE (decl));
+  unsigned HOST_WIDE_INT size_num = (tree_fits_uhwi_p (size)
+? tree_to_uhwi (size) : 0);
   if ((DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) % 16)
- || (s390_unaligned_symbols_p && !decl_binds_to_current_def_p (decl)))
+ || (s390_unaligned_symbols_p
+ && !decl_binds_to_current_def_p (decl)
+ && (DECL_USER_ALIGN (decl) ? DECL_ALIGN (decl) % 16 : size_num < 
16)))
SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
   else if (DECL_ALIGN (decl) % 32)
SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index 901ae4beb01..a5b5aa95a12 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -332,7 +332,8 @@ Store all argument registers on the stack.
 
 munaligned-symbols
 Target Var(s390_unaligned_symbols_p) Init(0)
-Assume external symbols to be potentially unaligned.  By default all
-symbols without explicit alignment are assumed to reside on a 2 byte
-boundary as mandated by the IBM Z ABI.
+Assume external symbols, whose natural alignment would be 1, to be
+potentially unaligned.  By default all symbols without explicit
+alignment are assumed to reside on a 2 byte boundary as mandated by
+the IBM Z ABI.
 
diff --git a/gcc/testsuite/gcc.target/s390/aligned-1.c 
b/gcc/testsuite/gcc.target/s390/aligned-1.c
index 2dc99cf66bd..3f5a2611ef1 100644
--- a/gcc/testsuite/gcc.target/s390/aligned-1.c
+++ b/gcc/testsuite/gcc.target/s390/aligned-1.c
@@ -1,20 +1,103 @@
-/* Even symbols without explicite alignment are assumed to reside on a
+/* Even symbols without explicit alignment are assumed to reside on a
2 byte boundary, as mandated by the IBM Z ELF ABI, and therefore
can be accessed using the larl instruction.  */
 
 /* { dg-do compile } */
 /* { dg-options "-O3 -march=z900 -fno-section-anchors" } */
 
-extern unsigned char extern_implicitly_aligned;
-extern unsigned char extern_explicitly_aligned __attribute__((aligned(2)));
-unsigned char aligned;
+extern unsigned char extern_char;
+extern unsigned char extern_explicitly_aligned_char 
__attribute__((aligned(2)));
+extern unsigned char extern_explicitly_unaligned_char 
__attribute__((aligned(1)));
+extern unsigned char __attribute__((weak)) extern_weak_char;
+extern unsigned char extern_explicitly_aligned_weak_char 
__attribute__((weak,aligned(2)));
+extern unsigned char extern_explicitly_unaligned_weak_char 
__attribute__((weak,aligned(1)));
 
-unsigned char
+unsigned char normal_char;
+unsigned char 

Re: [PATCH] s390: Fix builtins vec_rli and verll

2023-09-11 Thread Andreas Krebbel via Gcc-patches
On 9/11/23 08:56, Stefan Schulze Frielinghaus wrote:
> On Mon, Aug 28, 2023 at 11:33:37AM +0200, Andreas Krebbel wrote:
>> Hi Stefan,
>>
>> do you really need to introduce a new flag for U64 given that the type of 
>> the builtin is unsigned long?
> 
> In function s390_const_operand_ok the immediate is checked whether it is
> valide w.r.t. the flag:
> 
>   tree_to_uhwi (arg) > ((HOST_WIDE_INT_1U << (bitwidth - 1) << 1) - 1)
> 
> Here bitwidth is derived from the flag.

I see, it is about enabling the constant check at all.

Ok, thanks!

Andreas

> 
> Cheers,
> Stefan
> 
>>
>> Andreas
>>
>> On 8/21/23 17:56, Stefan Schulze Frielinghaus wrote:
>>> The second argument of these builtins is an unsigned immediate.  For
>>> vec_rli the API allows immediates up to 64 bits whereas the instruction
>>> verll only allows immediates up to 32 bits.  Since the shift count
>>> equals the immediate modulo vector element size, truncating those
>>> immediates is fine.
>>>
>>> Bootstrapped and regtested on s390.  Ok for mainline?
>>>
>>> gcc/ChangeLog:
>>>
>>> * config/s390/s390-builtins.def (O_U64): New.
>>> (O1_U64): Ditto.
>>> (O2_U64): Ditto.
>>> (O3_U64): Ditto.
>>> (O4_U64): Ditto.
>>> (O_M12): Change bit position.
>>> (O_S2): Ditto.
>>> (O_S3): Ditto.
>>> (O_S4): Ditto.
>>> (O_S5): Ditto.
>>> (O_S8): Ditto.
>>> (O_S12): Ditto.
>>> (O_S16): Ditto.
>>> (O_S32): Ditto.
>>> (O_ELEM): Ditto.
>>> (O_LIT): Ditto.
>>> (OB_DEF_VAR): Add operand constraints.
>>> (B_DEF): Ditto.
>>> * config/s390/s390.cc (s390_const_operand_ok): Honour 64 bit
>>> operands.
>>> ---
>>>  gcc/config/s390/s390-builtins.def | 60 ++-
>>>  gcc/config/s390/s390.cc   |  6 ++--
>>>  2 files changed, 39 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/gcc/config/s390/s390-builtins.def 
>>> b/gcc/config/s390/s390-builtins.def
>>> index a16983b18bd..c829f445a11 100644
>>> --- a/gcc/config/s390/s390-builtins.def
>>> +++ b/gcc/config/s390/s390-builtins.def
>>> @@ -28,6 +28,7 @@
>>>  #undef O_U12
>>>  #undef O_U16
>>>  #undef O_U32
>>> +#undef O_U64
>>>  
>>>  #undef O_M12
>>>  
>>> @@ -88,6 +89,11 @@
>>>  #undef O3_U32
>>>  #undef O4_U32
>>>  
>>> +#undef O1_U64
>>> +#undef O2_U64
>>> +#undef O3_U64
>>> +#undef O4_U64
>>> +
>>>  #undef O1_M12
>>>  #undef O2_M12
>>>  #undef O3_M12
>>> @@ -157,20 +163,21 @@
>>>  #define O_U127 /* unsigned 16 bit literal */
>>>  #define O_U168 /* unsigned 16 bit literal */
>>>  #define O_U329 /* unsigned 32 bit literal */
>>> +#define O_U64   10 /* unsigned 64 bit literal */
>>>  
>>> -#define O_M12   10 /* matches bitmask of 12 */
>>> +#define O_M12   11 /* matches bitmask of 12 */
>>>  
>>> -#define O_S211 /* signed  2 bit literal */
>>> -#define O_S312 /* signed  3 bit literal */
>>> -#define O_S413 /* signed  4 bit literal */
>>> -#define O_S514 /* signed  5 bit literal */
>>> -#define O_S815 /* signed  8 bit literal */
>>> -#define O_S12   16 /* signed 12 bit literal */
>>> -#define O_S16   17 /* signed 16 bit literal */
>>> -#define O_S32   18 /* signed 32 bit literal */
>>> +#define O_S212 /* signed  2 bit literal */
>>> +#define O_S313 /* signed  3 bit literal */
>>> +#define O_S414 /* signed  4 bit literal */
>>> +#define O_S515 /* signed  5 bit literal */
>>> +#define O_S816 /* signed  8 bit literal */
>>> +#define O_S12   17 /* signed 12 bit literal */
>>> +#define O_S16   18 /* signed 16 bit literal */
>>> +#define O_S32   19 /* signed 32 bit literal */
>>>  
>>> -#define O_ELEM  19 /* Element selector requiring modulo arithmetic. */
>>> -#define O_LIT   20 /* Operand must be a literal fitting the target type.  
>>> */
>>> +#define O_ELEM  20 /* Element selector requiring modulo arithmetic. */
>>> +#define O_LIT   21 /* Operand must be a literal fitting the target type.  
>>> */
>>>  
>>>  #define O_SHIFT 5
>>>  
>>> @@ -223,6 +230,11 @@
>>>  #define O3_U32 (O_U32 << (2 * O_SHIFT))
>>>  #define O4_U32 (O_U32 << (3 * O_SHIFT))
>>>  
>>> +#define O1_U64 O_U64
>>> +#define O2_U64 (O_U64 << O_SHIFT)
>>> +#define O3_U64 (O_U64 << (2 * O_SHIFT))
>>> +#define O4_U64 (O_U64 << (3 * O_SHIFT))
>>> +
>>>  #define O1_M12 O_M12
>>>  #define O2_M12 (O_M12 << O_SHIFT)
>>>  #define O3_M12 (O_M12 << (2 * O_SHIFT))
>>> @@ -1989,19 +2001,19 @@ B_DEF  (s390_verllvf,   vrotlv4si3, 
>>> 0,
>>>  B_DEF  (s390_verllvg,   vrotlv2di3, 0, 
>>>  B_VX,   0,  BT_FN_UV2DI_UV2DI_UV2DI)
>>>  
>>>  OB_DEF (s390_vec_rli,   s390_vec_rli_u8,
>>> s390_vec_rli_s64,   B_VX,   BT_FN_OV4SI_OV4SI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_u8,s390_verllb,0, 
>>>  0,  BT_OV_UV16QI_UV16QI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_s8,s390_verllb,0, 
>>>  0,  BT_OV_V16QI_V16QI_ULONG)

Re: [PATCH] s390: Fix builtins vec_rli and verll

2023-08-28 Thread Andreas Krebbel via Gcc-patches
Hi Stefan,

do you really need to introduce a new flag for U64 given that the type of the 
builtin is unsigned long?

Andreas

On 8/21/23 17:56, Stefan Schulze Frielinghaus wrote:
> The second argument of these builtins is an unsigned immediate.  For
> vec_rli the API allows immediates up to 64 bits whereas the instruction
> verll only allows immediates up to 32 bits.  Since the shift count
> equals the immediate modulo vector element size, truncating those
> immediates is fine.
> 
> Bootstrapped and regtested on s390.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390-builtins.def (O_U64): New.
>   (O1_U64): Ditto.
>   (O2_U64): Ditto.
>   (O3_U64): Ditto.
>   (O4_U64): Ditto.
>   (O_M12): Change bit position.
>   (O_S2): Ditto.
>   (O_S3): Ditto.
>   (O_S4): Ditto.
>   (O_S5): Ditto.
>   (O_S8): Ditto.
>   (O_S12): Ditto.
>   (O_S16): Ditto.
>   (O_S32): Ditto.
>   (O_ELEM): Ditto.
>   (O_LIT): Ditto.
>   (OB_DEF_VAR): Add operand constraints.
>   (B_DEF): Ditto.
>   * config/s390/s390.cc (s390_const_operand_ok): Honour 64 bit
>   operands.
> ---
>  gcc/config/s390/s390-builtins.def | 60 ++-
>  gcc/config/s390/s390.cc   |  6 ++--
>  2 files changed, 39 insertions(+), 27 deletions(-)
> 
> diff --git a/gcc/config/s390/s390-builtins.def 
> b/gcc/config/s390/s390-builtins.def
> index a16983b18bd..c829f445a11 100644
> --- a/gcc/config/s390/s390-builtins.def
> +++ b/gcc/config/s390/s390-builtins.def
> @@ -28,6 +28,7 @@
>  #undef O_U12
>  #undef O_U16
>  #undef O_U32
> +#undef O_U64
>  
>  #undef O_M12
>  
> @@ -88,6 +89,11 @@
>  #undef O3_U32
>  #undef O4_U32
>  
> +#undef O1_U64
> +#undef O2_U64
> +#undef O3_U64
> +#undef O4_U64
> +
>  #undef O1_M12
>  #undef O2_M12
>  #undef O3_M12
> @@ -157,20 +163,21 @@
>  #define O_U127 /* unsigned 16 bit literal */
>  #define O_U168 /* unsigned 16 bit literal */
>  #define O_U329 /* unsigned 32 bit literal */
> +#define O_U64   10 /* unsigned 64 bit literal */
>  
> -#define O_M12   10 /* matches bitmask of 12 */
> +#define O_M12   11 /* matches bitmask of 12 */
>  
> -#define O_S211 /* signed  2 bit literal */
> -#define O_S312 /* signed  3 bit literal */
> -#define O_S413 /* signed  4 bit literal */
> -#define O_S514 /* signed  5 bit literal */
> -#define O_S815 /* signed  8 bit literal */
> -#define O_S12   16 /* signed 12 bit literal */
> -#define O_S16   17 /* signed 16 bit literal */
> -#define O_S32   18 /* signed 32 bit literal */
> +#define O_S212 /* signed  2 bit literal */
> +#define O_S313 /* signed  3 bit literal */
> +#define O_S414 /* signed  4 bit literal */
> +#define O_S515 /* signed  5 bit literal */
> +#define O_S816 /* signed  8 bit literal */
> +#define O_S12   17 /* signed 12 bit literal */
> +#define O_S16   18 /* signed 16 bit literal */
> +#define O_S32   19 /* signed 32 bit literal */
>  
> -#define O_ELEM  19 /* Element selector requiring modulo arithmetic. */
> -#define O_LIT   20 /* Operand must be a literal fitting the target type.  */
> +#define O_ELEM  20 /* Element selector requiring modulo arithmetic. */
> +#define O_LIT   21 /* Operand must be a literal fitting the target type.  */
>  
>  #define O_SHIFT 5
>  
> @@ -223,6 +230,11 @@
>  #define O3_U32 (O_U32 << (2 * O_SHIFT))
>  #define O4_U32 (O_U32 << (3 * O_SHIFT))
>  
> +#define O1_U64 O_U64
> +#define O2_U64 (O_U64 << O_SHIFT)
> +#define O3_U64 (O_U64 << (2 * O_SHIFT))
> +#define O4_U64 (O_U64 << (3 * O_SHIFT))
> +
>  #define O1_M12 O_M12
>  #define O2_M12 (O_M12 << O_SHIFT)
>  #define O3_M12 (O_M12 << (2 * O_SHIFT))
> @@ -1989,19 +2001,19 @@ B_DEF  (s390_verllvf,   vrotlv4si3,   
>   0,
>  B_DEF  (s390_verllvg,   vrotlv2di3, 0,   
>B_VX,   0,  BT_FN_UV2DI_UV2DI_UV2DI)
>  
>  OB_DEF (s390_vec_rli,   s390_vec_rli_u8,
> s390_vec_rli_s64,   B_VX,   BT_FN_OV4SI_OV4SI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_u8,s390_verllb,0,   
>0,  BT_OV_UV16QI_UV16QI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_s8,s390_verllb,0,   
>0,  BT_OV_V16QI_V16QI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_u16,   s390_verllh,0,   
>0,  BT_OV_UV8HI_UV8HI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_s16,   s390_verllh,0,   
>0,  BT_OV_V8HI_V8HI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_u32,   s390_verllf,0,   
>0,  BT_OV_UV4SI_UV4SI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_s32,   s390_verllf,0,   
>0,  BT_OV_V4SI_V4SI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_u64,   s390_verllg,0,   
>0,  BT_OV_UV2DI_UV2DI_ULONG)
> -OB_DEF_VAR 

Re: [PATCH] s390: Fix some builtin definitions

2023-08-28 Thread Andreas Krebbel via Gcc-patches
On 8/21/23 17:58, Stefan Schulze Frielinghaus wrote:
> Bootstrapped and regtested on s390.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390-builtins.def (s390_vec_signed_flt): Fix
>   builtin flag.
>   (s390_vec_unsigned_flt): Ditto.
>   (s390_vec_revb_flt): Ditto.
>   (s390_vec_reve_flt): Ditto.
>   (s390_vclfnhs): Fix operand flags.
>   (s390_vclfnls): Ditto.
>   (s390_vcrnfs): Ditto.
>   (s390_vcfn): Ditto.
>   (s390_vcnf): Ditto.

Ok. Thanks!

Andreas


> ---
>  gcc/config/s390/s390-builtins.def | 20 ++--
>  1 file changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/gcc/config/s390/s390-builtins.def 
> b/gcc/config/s390/s390-builtins.def
> index c829f445a11..964d86c74a0 100644
> --- a/gcc/config/s390/s390-builtins.def
> +++ b/gcc/config/s390/s390-builtins.def
> @@ -2846,12 +2846,12 @@ B_DEF  (s390_vcelfb,
> floatunsv4siv4sf2,  0,
>  B_DEF  (s390_vcdlgb,floatunsv2div2df2,  0,   
>B_VX,   O2_U4 | O3_U3,  BT_FN_V2DF_UV2DI)
>  
>  OB_DEF (s390_vec_signed,
> s390_vec_signed_flt,s390_vec_signed_dbl,B_VX,   BT_FN_OV4SI_OV4SI)
> -OB_DEF_VAR (s390_vec_signed_flt,s390_vcfeb, 0,   
>B_VXE2, BT_OV_V4SI_V4SF)
> +OB_DEF_VAR (s390_vec_signed_flt,s390_vcfeb, B_VXE2,  
>0,  BT_OV_V4SI_V4SF)
>  OB_DEF_VAR (s390_vec_signed_dbl,s390_vcgdb, 0,   
>0,  BT_OV_V2DI_V2DF)
>  
>  OB_DEF (s390_vec_unsigned,  
> s390_vec_unsigned_flt,s390_vec_unsigned_dbl,B_VX,   BT_FN_OV4SI_OV4SI)
> -OB_DEF_VAR (s390_vec_unsigned_flt,  s390_vclfeb,0,   
>  B_VXE2, BT_OV_UV4SI_V4SF)
> -OB_DEF_VAR (s390_vec_unsigned_dbl,  s390_vclgdb,0,   
>  0,  BT_OV_UV2DI_V2DF)
> +OB_DEF_VAR (s390_vec_unsigned_flt,  s390_vclfeb,B_VXE2,  
>0,  BT_OV_UV4SI_V4SF)
> +OB_DEF_VAR (s390_vec_unsigned_dbl,  s390_vclgdb,0,   
>0,  BT_OV_UV2DI_V2DF)
>  
>  B_DEF  (s390_vcfeb, fix_truncv4sfv4si2, 0,   
>B_VXE2, O2_U4 | O3_U3,  BT_FN_V4SI_V4SF)
>  B_DEF  (s390_vcgdb, fix_truncv2dfv2di2, 0,   
>B_VX,   O2_U4 | O3_U3,  BT_FN_V2DI_V2DF)
> @@ -2929,7 +2929,7 @@ OB_DEF_VAR (s390_vec_revb_s32,  s390_vlbrf, 
> 0,
>  OB_DEF_VAR (s390_vec_revb_u32,  s390_vlbrf, 0,   
>0,  BT_OV_UV4SI_UV4SI)
>  OB_DEF_VAR (s390_vec_revb_s64,  s390_vlbrg, 0,   
>0,  BT_OV_V2DI_V2DI)
>  OB_DEF_VAR (s390_vec_revb_u64,  s390_vlbrg, 0,   
>0,  BT_OV_UV2DI_UV2DI)
> -OB_DEF_VAR (s390_vec_revb_flt,  s390_vlbrf_flt, 0,   
>B_VXE,  BT_OV_V4SF_V4SF)
> +OB_DEF_VAR (s390_vec_revb_flt,  s390_vlbrf_flt, B_VXE,   
>0,  BT_OV_V4SF_V4SF)
>  OB_DEF_VAR (s390_vec_revb_dbl,  s390_vlbrg_dbl, 0,   
>0,  BT_OV_V2DF_V2DF)
>  
>  B_DEF  (s390_vlbrh, bswapv8hi,  0,   
>B_VX,   0,   BT_FN_V8HI_V8HI)
> @@ -2960,7 +2960,7 @@ OB_DEF_VAR (s390_vec_reve_u32,  s390_vlerf, 
> 0,
>  OB_DEF_VAR (s390_vec_reve_b64,  s390_vlerg, 0,   
>0,  BT_OV_BV2DI_BV2DI)
>  OB_DEF_VAR (s390_vec_reve_s64,  s390_vlerg, 0,   
>0,  BT_OV_V2DI_V2DI)
>  OB_DEF_VAR (s390_vec_reve_u64,  s390_vlerg, 0,   
>0,  BT_OV_UV2DI_UV2DI)
> -OB_DEF_VAR (s390_vec_reve_flt,  s390_vlerf_flt, 0,   
>B_VXE,  BT_OV_V4SF_V4SF)
> +OB_DEF_VAR (s390_vec_reve_flt,  s390_vlerf_flt, B_VXE,   
>0,  BT_OV_V4SF_V4SF)
>  OB_DEF_VAR (s390_vec_reve_dbl,  s390_vlerg_dbl, 0,   
>0,  BT_OV_V2DF_V2DF)
>  
>  B_DEF  (s390_vlerb, eltswapv16qi,   0,   
>B_VX,   0,   BT_FN_V16QI_V16QI)
> @@ -3037,10 +3037,10 @@ B_DEF  (s390_vstrszf,vstrszv4si,  
>   0,
>  
>  /* arch 14 builtins */
>  
> -B_DEF  (s390_vclfnhs,vclfnhs_v8hi,  0,   
>B_NNPA, O3_U4,  BT_FN_V4SF_V8HI_UINT)
> -B_DEF  (s390_vclfnls,vclfnls_v8hi,  0,   
>B_NNPA, O3_U4,  BT_FN_V4SF_V8HI_UINT)
> +B_DEF  (s390_vclfnhs,vclfnhs_v8hi,  0,   
>B_NNPA, O2_U4,

Re: [PATCH] s390: Try to emit vlbr/vstbr instead of vperm et al.

2023-08-03 Thread Andreas Krebbel via Gcc-patches
On 8/3/23 08:51, Stefan Schulze Frielinghaus wrote:
> Bootstrapped and regtested on s390x.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (expand_perm_as_a_vlbr_vstbr_candidate):
>   New function which handles bswap patterns for vec_perm_const.
>   (vectorize_vec_perm_const_1): Call new function.
>   * config/s390/vector.md (*bswap): Fix operands in output
>   template.
>   (*vstbr): New insn.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/s390.exp: Add subdirectory vxe2.
>   * gcc.target/s390/vxe2/vlbr-1.c: New test.
>   * gcc.target/s390/vxe2/vstbr-1.c: New test.
>   * gcc.target/s390/vxe2/vstbr-2.c: New test.

Ok. Thanks!

Andreas


> ---
>  gcc/config/s390/s390.cc  | 55 
>  gcc/config/s390/vector.md| 16 --
>  gcc/testsuite/gcc.target/s390/s390.exp   |  3 ++
>  gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c  | 29 +++
>  gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c | 29 +++
>  gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c | 42 +++
>  6 files changed, 170 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index d9f10542473..91eb9232b10 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -17698,6 +17698,58 @@ expand_perm_with_vstbrq (const struct 
> expand_vec_perm_d )
>return false;
>  }
>  
> +/* Try to emit vlbr/vstbr.  Note, this is only a candidate insn since
> +   TARGET_VECTORIZE_VEC_PERM_CONST operates on vector registers only.  Thus,
> +   either fwprop, combine et al. "fixes" one of the input/output operands 
> into
> +   a memory operand or a splitter has to reverse this into a general vperm
> +   operation.  */
> +
> +static bool
> +expand_perm_as_a_vlbr_vstbr_candidate (const struct expand_vec_perm_d )
> +{
> +  static const char perm[4][MAX_VECT_LEN]
> += { { 1,  0,  3,  2,  5,  4,  7, 6, 9,  8,  11, 10, 13, 12, 15, 14 },
> + { 3,  2,  1,  0,  7,  6,  5, 4, 11, 10, 9,  8,  15, 14, 13, 12 },
> + { 7,  6,  5,  4,  3,  2,  1, 0, 15, 14, 13, 12, 11, 10, 9,  8  },
> + { 15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0  } };
> +
> +  if (!TARGET_VXE2 || d.vmode != V16QImode || d.op0 != d.op1)
> +return false;
> +
> +  if (memcmp (d.perm, perm[0], MAX_VECT_LEN) == 0)
> +{
> +  rtx target = gen_rtx_SUBREG (V8HImode, d.target, 0);
> +  rtx op0 = gen_rtx_SUBREG (V8HImode, d.op0, 0);
> +  emit_insn (gen_bswapv8hi (target, op0));
> +  return true;
> +}
> +
> +  if (memcmp (d.perm, perm[1], MAX_VECT_LEN) == 0)
> +{
> +  rtx target = gen_rtx_SUBREG (V4SImode, d.target, 0);
> +  rtx op0 = gen_rtx_SUBREG (V4SImode, d.op0, 0);
> +  emit_insn (gen_bswapv4si (target, op0));
> +  return true;
> +}
> +
> +  if (memcmp (d.perm, perm[2], MAX_VECT_LEN) == 0)
> +{
> +  rtx target = gen_rtx_SUBREG (V2DImode, d.target, 0);
> +  rtx op0 = gen_rtx_SUBREG (V2DImode, d.op0, 0);
> +  emit_insn (gen_bswapv2di (target, op0));
> +  return true;
> +}
> +
> +  if (memcmp (d.perm, perm[3], MAX_VECT_LEN) == 0)
> +{
> +  rtx target = gen_rtx_SUBREG (V1TImode, d.target, 0);
> +  rtx op0 = gen_rtx_SUBREG (V1TImode, d.op0, 0);
> +  emit_insn (gen_bswapv1ti (target, op0));
> +  return true;
> +}
> +
> +  return false;
> +}
>  
>  /* Try to find the best sequence for the vector permute operation
> described by D.  Return true if the operation could be
> @@ -17720,6 +17772,9 @@ vectorize_vec_perm_const_1 (const struct 
> expand_vec_perm_d )
>if (expand_perm_with_rot (d))
>  return true;
>  
> +  if (expand_perm_as_a_vlbr_vstbr_candidate (d))
> +return true;
> +
>return false;
>  }
>  
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 21bec729efa..f0e9ed3d263 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -47,6 +47,7 @@
>  (define_mode_iterator VI_HW [V16QI V8HI V4SI V2DI])
>  (define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI])
>  (define_mode_iterator VI_HW_HSD [V8HI  V4SI V2DI])
> +(define_mode_iterator VI_HW_HSDT [V8HI V4SI V2DI V1TI TI])
>  (define_mode_iterator VI_HW_HS  [V8HI  V4SI])
>  (define_mode_iterator VI_HW_QH  [V16QI V8HI])
>  
> @@ -2876,12 +2877,12 @@
>   (use (match_dup 2))])]
>"TARGET_VX"
>  {
> -  static char p[4][16] =
> +  static const char p[4][16] =
>  { { 1,  0,  3,  2,  5,  4,  7, 6, 9,  8,  11, 10, 13, 12, 15, 14 },   /* 
> H */
>{ 3,  2,  1,  0,  7,  6,  5, 4, 11, 10, 9,  8,  15, 14, 13, 12 },   /* 
> S */
>{ 7,  6,  5,  4,  3,  2,  1, 0, 15, 14, 13, 12, 11, 10, 9,  8  },   /* 
> D */
>{ 15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  

Re: [PATCH] s390: Enable vect_bswap test cases

2023-08-03 Thread Andreas Krebbel via Gcc-patches
On 8/3/23 08:48, Stefan Schulze Frielinghaus wrote:
> This enables the following tests which rely on instruction vperm which
> is available since z13 with the initial vector support.
> 
> testsuite/gcc.dg/vect/vect-bswap16.c
> 42:/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { 
> target { vect_bswap || sse4_runtime } } } } */
> 
> testsuite/gcc.dg/vect/vect-bswap32.c
> 42:/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { 
> target { vect_bswap || sse4_runtime } } } } */
> 
> testsuite/gcc.dg/vect/vect-bswap64.c
> 42:/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { 
> target { vect_bswap || sse4_runtime } } } } */
> 
> Ok for mainline?

Ok. Thanks!

Andreas

> 
> gcc/testsuite/ChangeLog:
> 
>   * lib/target-supports.exp (check_effective_target_vect_bswap):
>   Add s390.
> ---
>  gcc/testsuite/lib/target-supports.exp | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 4d04df2a709..2ccc0291442 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -7087,9 +7087,11 @@ proc check_effective_target_whole_vector_shift { } {
>  
>  proc check_effective_target_vect_bswap { } {
>  return [check_cached_effective_target_indexed vect_bswap {
> -  expr { [istarget aarch64*-*-*]
> -  || [is-effective-target arm_neon]
> -  || [istarget amdgcn-*-*] }}]
> +  expr { ([istarget aarch64*-*-*]
> +   || [is-effective-target arm_neon]
> +   || [istarget amdgcn-*-*])
> +  || ([istarget s390*-*-*]
> +  && [check_effective_target_s390_vx]) }}]
>  }
>  
>  # Return 1 if the target supports comparison of bool vectors for at



[Committed] IBM Z: Handle unaligned symbols

2023-08-01 Thread Andreas Krebbel via Gcc-patches
The IBM Z ELF ABI mandates every symbol to reside on a 2 byte boundary
in order to be able to use the larl instruction. However, in some
situations it is difficult to enforce this, e.g. for common linker
scripts as used in the Linux kernel. This patch introduces the
-munaligned-symbols option. When that option is used, external symbols
without an explicit alignment are considered unaligned and its address
will be pushed into GOT or the literal pool.

If the symbol in the final linker step turns out end up on a 2 byte
boundary the linker is able to take this back and replace the indirect
reference with larl again. This should minimize the effect to symbols
which are actually unaligned in the end.

Bootstrapped and regression tested on s390x. Committed to mainline.

Backports to stable branches will follow.

gcc/ChangeLog:

* config/s390/s390.cc (s390_encode_section_info): Assume external
symbols without explicit alignment to be unaligned if
-munaligned-symbols has been specified.
* config/s390/s390.opt (-munaligned-symbols): New option.

gcc/testsuite/ChangeLog:

* gcc.target/s390/aligned-1.c: New test.
* gcc.target/s390/unaligned-1.c: New test.
---
 gcc/config/s390/s390.cc |  9 +++--
 gcc/config/s390/s390.opt|  7 +++
 gcc/testsuite/gcc.target/s390/aligned-1.c   | 20 
 gcc/testsuite/gcc.target/s390/unaligned-1.c | 20 
 4 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/aligned-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/unaligned-1.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 13970edcb5e..89474fd487a 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -13709,8 +13709,13 @@ s390_encode_section_info (tree decl, rtx rtl, int 
first)
 a larl/load-relative instruction.  We only handle the cases
 that can go wrong (i.e. no FUNC_DECLs).
 All symbols without an explicit alignment are assumed to be 2
-byte aligned as mandated by our ABI.  */
-  if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) % 16)
+byte aligned as mandated by our ABI.  This behavior can be
+overridden for external symbols with the -munaligned-symbols
+switch.  */
+  if (DECL_ALIGN (decl) % 16
+ && (DECL_USER_ALIGN (decl)
+ || (!SYMBOL_REF_LOCAL_P (XEXP (rtl, 0))
+ && s390_unaligned_symbols_p)))
SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
   else if (DECL_ALIGN (decl) % 32)
SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index 344aa551f44..496572046f7 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -329,3 +329,10 @@ Target Undocumented Var(unroll_only_small_loops) Init(0) 
Save
 mpreserve-args
 Target Var(s390_preserve_args_p) Init(0)
 Store all argument registers on the stack.
+
+munaligned-symbols
+Target Var(s390_unaligned_symbols_p) Init(0)
+Assume external symbols to be potentially unaligned.  By default all
+symbols without explicit alignment are assumed to reside on a 2 byte
+boundary as mandated by the IBM Z ABI.
+
diff --git a/gcc/testsuite/gcc.target/s390/aligned-1.c 
b/gcc/testsuite/gcc.target/s390/aligned-1.c
new file mode 100644
index 000..2dc99cf66bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/aligned-1.c
@@ -0,0 +1,20 @@
+/* Even symbols without explicite alignment are assumed to reside on a
+   2 byte boundary, as mandated by the IBM Z ELF ABI, and therefore
+   can be accessed using the larl instruction.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z900 -fno-section-anchors" } */
+
+extern unsigned char extern_implicitly_aligned;
+extern unsigned char extern_explicitly_aligned __attribute__((aligned(2)));
+unsigned char aligned;
+
+unsigned char
+foo ()
+{
+  return extern_implicitly_aligned + extern_explicitly_aligned + aligned;
+}
+
+/* { dg-final { scan-assembler-times 
"larl\t%r\[0-9\]*,extern_implicitly_aligned\n" 1 } } */
+/* { dg-final { scan-assembler-times 
"larl\t%r\[0-9\]*,extern_explicitly_aligned\n" 1 } } */
+/* { dg-final { scan-assembler-times "larl\t%r\[0-9\]*,aligned\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/s390/unaligned-1.c 
b/gcc/testsuite/gcc.target/s390/unaligned-1.c
new file mode 100644
index 000..421330aded1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/unaligned-1.c
@@ -0,0 +1,20 @@
+/* With the -munaligned-symbols option all external symbols without
+   explicite alignment are assumed to be potentially unaligned and
+   therefore cannot be accessed with larl.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z900 -fno-section-anchors -munaligned-symbols" } */
+
+extern unsigned char extern_unaligned;
+extern unsigned char extern_explicitly_aligned __attribute__((aligned(2)));
+unsigned char aligned;
+
+unsigned 

Re: [PATCH] s390: Optimize vec_cmpge followed by vec_sel

2023-07-18 Thread Andreas Krebbel via Gcc-patches
On 7/17/23 17:09, Juergen Christ wrote:
> A vec_cmpge produces a negation.  Replace this negation by swapping the two
> selection choices of a vec_sel based on the result of the vec_cmpge.
> 
> Bootstrapped and regression tested on s390x.
> 
> gcc/ChangeLog:
> 
>   * config/s390/vx-builtins.md: New vsel pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vec-cmpge.c: New test.
> 
> Signed-off-by: Juergen Christ 

Committed to mainline. Thanks!

Bye,

Andreas



Re: [PATCH] s390: Fix vec_init default expander

2023-07-07 Thread Andreas Krebbel via Gcc-patches
On 7/7/23 15:51, Juergen Christ wrote:
> Do not reinitialize vector lanes to zero since they are already initialized to
> zero.
> 
> Bootstrapped and regression tested on s390x.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (vec_init): Fix default case
> 
> gcc/Testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vec-init-3.c: New test.

Ok. Pushed to mainline. Thanks!

Andreas



[Committed] IBM zSystems: Assume symbols without explicit alignment to be ok

2023-06-26 Thread Andreas Krebbel via Gcc-patches
A change we have committed back in 2015 relies on the backend
requested ABI alignment to be applied to ALL symbols by the
middle-end. However, this does not appear to be the case for external
symbols. With this commit we assume all symbols without explicit
alignment to be aligned according to the ABI. That's the behavior we
had before.
This fixes a performance regression caused by the 2015 patch. Since
then the address of external char type symbols have been pushed to the
literal pool, although it is safe to access them with larl (which
requires symbols to reside at even addresses).

Bootstrapped and regression tested on s390x.

gcc/
* config/s390/s390.cc (s390_encode_section_info): Set
SYMBOL_FLAG_SET_NOTALIGN2 only if the symbol has explicitely been
misaligned.

gcc/testsuite/
* gcc.target/s390/larl-1.c: New test.
---
 gcc/config/s390/s390.cc|  6 +++--
 gcc/testsuite/gcc.target/s390/larl-1.c | 32 ++
 2 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/larl-1.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 9284477396d..d9f10542473 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -13706,8 +13706,10 @@ s390_encode_section_info (tree decl, rtx rtl, int 
first)
 {
   /* Store the alignment to be able to check if we can use
 a larl/load-relative instruction.  We only handle the cases
-that can go wrong (i.e. no FUNC_DECLs).  */
-  if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
+that can go wrong (i.e. no FUNC_DECLs).
+All symbols without an explicit alignment are assumed to be 2
+byte aligned as mandated by our ABI.  */
+  if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) % 16)
SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
   else if (DECL_ALIGN (decl) % 32)
SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
diff --git a/gcc/testsuite/gcc.target/s390/larl-1.c 
b/gcc/testsuite/gcc.target/s390/larl-1.c
new file mode 100644
index 000..5ef2ef63f82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/larl-1.c
@@ -0,0 +1,32 @@
+/* Check if load-address-relative instructions are created */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O2 -march=z10 -mzarch -fno-section-anchors" } */
+
+/* An explicitely misaligned symbol.  This symbol is NOT aligned as
+   mandated by our ABI.  However, the back-end needs to handle that in
+   order to make things like __attribute__((packed)) work.  The symbol
+   address is expected to be loaded from literal pool.  */
+/* { dg-final { scan-assembler "lgrl\t%r2," { target { lp64 } } } } */
+/* { dg-final { scan-assembler "lrl\t%r2," { target { ! lp64 } } } } */
+extern char align1 __attribute__((aligned(1)));
+
+/* { dg-final { scan-assembler "larl\t%r2,align2" } } */
+extern char align2 __attribute__((aligned(2)));
+
+/* { dg-final { scan-assembler "larl\t%r2,align4" } } */
+extern char align4 __attribute__((aligned(4)));
+
+/* An external char symbol without explicit alignment has a DECL_ALIGN
+   of just 8. In contrast to local definitions DATA_ABI_ALIGNMENT is
+   NOT applied to DECL_ALIGN in that case.  Make sure the backend
+   still assumes this symbol to be aligned according to ABI
+   requirements.  */
+/* { dg-final { scan-assembler "larl\t%r2,align_default" } } */
+extern char align_default;
+
+char * foo1 () { return  }
+char * foo2 () { return  }
+char * foo3 () { return  }
+char * foo4 () { return _default; }
+
-- 
2.41.0



Re: [PATCH] libgcc: Use initarray section type for .init_stack

2023-05-25 Thread Andreas Krebbel via Gcc-patches
On 3/20/23 07:33, Kewen.Lin wrote:
> Hi,
> 
> One of my workmates found there is a warning like:
> 
>   libgcc/config/rs6000/morestack.S:402: Warning: ignoring
> incorrect section type for .init_array.0
> 
> when compiling libgcc/config/rs6000/morestack.S.
> 
> Since commit r13-6545 touched that file recently, which was
> suspected to be responsible for this warning, I did some
> investigation and found this is a warning staying for a long
> time.  For section .init_stack*, it's preferred to use
> section type SHT_INIT_ARRAY.  So this patch is use
> "@init_array" to replace "@progbits".
> 
> Although the warning is trivial, Segher suggested me to
> post this to fix it, in order to avoid any possible
> misunderstanding/confusion on the warning.
> 
> As Alan confirmed, this doesn't require a premise check
> on if the existing binutils supports "@init_array" or not,
> "because if you want split-stack to work, you must link
> with gold, any version of binutils that has gold has an
> assembler that understands @init_array". (Thanks Alan!)
> 
> Bootstrapped and regtested on x86_64-redhat-linux
> and powerpc64{,le}-linux-gnu.
> 
> Is it ok for trunk when next stage 1 comes?
> 
> BR,
> Kewen
> -
> libgcc/ChangeLog:
> 
>   * config/i386/morestack.S: Use @init_array rather than
>   @progbits for section type of section .init_array.
>   * config/rs6000/morestack.S: Likewise.
>   * config/s390/morestack.S: Likewise.

s390 parts are ok. I did run a bootstrap and regression. Looks all good. Thanks!

Andreas



Re: [PATCH] s390: Implement TARGET_ATOMIC_ALIGN_FOR_MODE

2023-05-16 Thread Andreas Krebbel via Gcc-patches
On 5/16/23 08:43, Stefan Schulze Frielinghaus wrote:
> So far atomic objects are aligned according to their default alignment.
> For 128 bit scalar types like int128 or long double this results in an
> 8 byte alignment which is wrong and must be 16 byte.
> 
> libstdc++ already computes a correct alignment, though, still adding a
> test case in order to make sure that both implementations are
> compatible.
> 
> Bootstrapped and regtested.  Ok for mainline?  Since this is an ABI
> break, is a backport to GCC 13 reasonable?

Ok for mainline.

I would also like to have it in GCC 13. It is an ABI breakage but on the other 
hand it also fixes an
ABI inconsistency between C and C++ which we should fix asap I think.

Andreas


> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (TARGET_ATOMIC_ALIGN_FOR_MODE):
>   New.
>   (s390_atomic_align_for_mode): New.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.target/s390/atomic-align-1.C: New test.
>   * gcc.target/s390/atomic-align-1.c: New test.
>   * gcc.target/s390/atomic-align-2.c: New test.
> ---
>  gcc/config/s390/s390.cc   |  8 ++
>  .../g++.target/s390/atomic-align-1.C  | 25 +++
>  .../gcc.target/s390/atomic-align-1.c  | 23 +
>  .../gcc.target/s390/atomic-align-2.c  | 18 +
>  4 files changed, 74 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/s390/atomic-align-1.C
>  create mode 100644 gcc/testsuite/gcc.target/s390/atomic-align-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/atomic-align-2.c
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index 505de995da8..4813bf91dc4 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -450,6 +450,14 @@ s390_preserve_fpr_arg_p (int regno)
> && regno >= FPR0_REGNUM);
>  }
>  
> +#undef TARGET_ATOMIC_ALIGN_FOR_MODE
> +#define TARGET_ATOMIC_ALIGN_FOR_MODE s390_atomic_align_for_mode
> +static unsigned int
> +s390_atomic_align_for_mode (machine_mode mode)
> +{
> +  return GET_MODE_BITSIZE (mode);
> +}
> +
>  /* A couple of shortcuts.  */
>  #define CONST_OK_FOR_J(x) \
>   CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
> diff --git a/gcc/testsuite/g++.target/s390/atomic-align-1.C 
> b/gcc/testsuite/g++.target/s390/atomic-align-1.C
> new file mode 100644
> index 000..43aa0bc39ed
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/s390/atomic-align-1.C
> @@ -0,0 +1,25 @@
> +/* { dg-do compile { target int128 } } */
> +/* { dg-options "-std=c++11" } */
> +/* { dg-final { scan-assembler-times {\.align\t2} 2 } } */
> +/* { dg-final { scan-assembler-times {\.align\t4} 2 } } */
> +/* { dg-final { scan-assembler-times {\.align\t8} 3 } } */
> +/* { dg-final { scan-assembler-times {\.align\t16} 2 } } */
> +
> +#include 
> +
> +// 2
> +std::atomic var_char;
> +std::atomic var_short;
> +// 4
> +std::atomic var_int;
> +// 8
> +std::atomic var_long;
> +std::atomic var_long_long;
> +// 16
> +std::atomic<__int128> var_int128;
> +// 4
> +std::atomic var_float;
> +// 8
> +std::atomic var_double;
> +// 16
> +std::atomic var_long_double;
> diff --git a/gcc/testsuite/gcc.target/s390/atomic-align-1.c 
> b/gcc/testsuite/gcc.target/s390/atomic-align-1.c
> new file mode 100644
> index 000..b2e1233e3ee
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/atomic-align-1.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile { target int128 } } */
> +/* { dg-options "-std=c11" } */
> +/* { dg-final { scan-assembler-times {\.align\t2} 2 } } */
> +/* { dg-final { scan-assembler-times {\.align\t4} 2 } } */
> +/* { dg-final { scan-assembler-times {\.align\t8} 3 } } */
> +/* { dg-final { scan-assembler-times {\.align\t16} 2 } } */
> +
> +// 2
> +_Atomic char var_char;
> +_Atomic short var_short;
> +// 4
> +_Atomic int var_int;
> +// 8
> +_Atomic long var_long;
> +_Atomic long long var_long_long;
> +// 16
> +_Atomic __int128 var_int128;
> +// 4
> +_Atomic float var_float;
> +// 8
> +_Atomic double var_double;
> +// 16
> +_Atomic long double var_long_double;
> diff --git a/gcc/testsuite/gcc.target/s390/atomic-align-2.c 
> b/gcc/testsuite/gcc.target/s390/atomic-align-2.c
> new file mode 100644
> index 000..0bf17341bf8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/atomic-align-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target int128 } } */
> +/* { dg-options "-O -std=c11" } */
> +/* { dg-final { scan-assembler-not {abort} } } */
> +
> +/* The stack is 8 byte aligned which means GCC has to manually align a 16 
> byte
> +   aligned object.  This is done by allocating not 16 but rather 24 bytes for
> +   variable X and then manually aligning a pointer inside the memory block.
> +   Validate this by ensuring that the if-statement is optimized out.  */
> +
> +void bar (_Atomic unsigned __int128 *ptr);
> +
> +void foo (void) {
> +  _Atomic unsigned __int128 x;
> +  unsigned long n = (unsigned long)
> +  if (n % 16 != 0)
> +__builtin_abort ();
> +  bar ();
> +}



Re: [PATCH 0/3] Refactor memory block operations

2023-05-15 Thread Andreas Krebbel via Gcc-patches
On 5/15/23 09:17, Stefan Schulze Frielinghaus wrote:
> Bootstrapped and regtested.  Ok for mainline?
> 
> Stefan Schulze Frielinghaus (3):
>   s390: Refactor block operation cpymem
>   s390: Add block operation movmem
>   s390: Refactor block operation setmem
> 
>  gcc/config/s390/s390-protos.h|   5 +-
>  gcc/config/s390/s390.cc  | 301 ---
>  gcc/config/s390/s390.md  |  61 -
>  gcc/testsuite/gcc.target/s390/memset-1.c |   7 +-
>  4 files changed, 331 insertions(+), 43 deletions(-)
> 

Ok. Thanks!

Andreas



Re: [PATCH] s390: Fix ifcvt test cases

2023-03-03 Thread Andreas Krebbel via Gcc-patches
On 3/2/23 19:13, Robin Dapp wrote:
> Hi,
> 
> we seem to flip flop between the "high" and "not low" variants of load on
> condition.  Accept both in the affected test cases.
> 
> Going to commit this as obvious.
> 
> Regards
>  Robin
> 
> --
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/ifcvt-two-insns-bool.c: Allow "high" and
>   "not low or equal" load on condition variant.
>   * gcc.target/s390/ifcvt-two-insns-int.c: Dito.
>   * gcc.target/s390/ifcvt-two-insns-long.c: Dito.

Ok. Thanks!

Andreas

> ---
>  gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c | 4 ++--
>  gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c  | 4 ++--
>  gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c | 4 ++--
>  3 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c 
> b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c
> index 1027ddceb935..a56bc4676143 100644
> --- a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c
> +++ b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c
> @@ -3,8 +3,8 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2 -march=z13 -mzarch --save-temps" } */
>  
> -/* { dg-final { scan-assembler "lochih\t%r.?,1" } } */
> -/* { dg-final { scan-assembler "locrh\t.*" } } */
> +/* { dg-final { scan-assembler "lochi(?:h|nle)\t%r.?,1" } } */
> +/* { dg-final { scan-assembler "locr(?:h|nle)\t.*" } } */
>  #include 
>  #include 
>  #include 
> diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c 
> b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c
> index fc6946f2466d..64b8a732290e 100644
> --- a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c
> +++ b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c
> @@ -3,8 +3,8 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2 -march=z13 -mzarch --save-temps" } */
>  
> -/* { dg-final { scan-assembler "lochih\t%r.?,1" } } */
> -/* { dg-final { scan-assembler "locrh\t.*" } } */
> +/* { dg-final { scan-assembler "lochi(h|nle)\t%r.?,1" } } */
> +/* { dg-final { scan-assembler "locr(?:h|nle)\t.*" } } */
>  #include 
>  #include 
>  #include 
> diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c 
> b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c
> index 51af4985247a..f2d784e762a8 100644
> --- a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c
> +++ b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c
> @@ -3,8 +3,8 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2 -march=z13 -mzarch --save-temps" } */
>  
> -/* { dg-final { scan-assembler "locghih\t%r.?,1" } } */
> -/* { dg-final { scan-assembler "locgrh\t.*" } } */
> +/* { dg-final { scan-assembler "locghi(?:h|nle)\t%r.?,1" } } */
> +/* { dg-final { scan-assembler "locgr(?:h|nle)\t.*" } } */
>  
>  #include 
>  #include 



Re: [PATCH] s390: libatomic: Fix 16 byte atomic {cas,load,store}

2023-03-03 Thread Andreas Krebbel via Gcc-patches
On 3/2/23 16:24, Stefan Schulze Frielinghaus wrote:
> This is a follow-up to commit a4c6bd0821099f6b8c0f64a96ffd9d01a025c413
> introducing a runtime check for alignment for 16 byte atomic
> compare-exchange, load, and store.
> 
> Bootstrapped and regtested on s390.
> Ok for mainline and gcc-{12,11,10}?
> 
> libatomic/ChangeLog:
> 
>   * config/s390/cas_n.c: New file.
>   * config/s390/load_n.c: New file.
>   * config/s390/store_n.c: New file.

Ok. Thanks!

Andreas

> ---
>  libatomic/config/s390/cas_n.c   | 65 +
>  libatomic/config/s390/load_n.c  | 57 +
>  libatomic/config/s390/store_n.c | 54 +++
>  3 files changed, 176 insertions(+)
>  create mode 100644 libatomic/config/s390/cas_n.c
>  create mode 100644 libatomic/config/s390/load_n.c
>  create mode 100644 libatomic/config/s390/store_n.c
> 
> diff --git a/libatomic/config/s390/cas_n.c b/libatomic/config/s390/cas_n.c
> new file mode 100644
> index 000..44b7152ca5d
> --- /dev/null
> +++ b/libatomic/config/s390/cas_n.c
> @@ -0,0 +1,65 @@
> +/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU Atomic Library (libatomic).
> +
> +   Libatomic is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   Libatomic is distributed in the hope that it will be useful, but WITHOUT 
> ANY
> +   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> +   more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   .  */
> +
> +#include 
> +
> +
> +/* Analog to config/s390/exch_n.c.  */
> +
> +#if !DONE && N == 16
> +bool
> +SIZE(libat_compare_exchange) (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
> +   int smodel, int fmodel UNUSED)
> +{
> +  if (!((uintptr_t)mptr & 0xf))
> +{
> +  return __atomic_compare_exchange_n (
> + (UTYPE *)__builtin_assume_aligned (mptr, 16), eptr, newval, false,
> + __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
> +}
> +  else
> +{
> +  UTYPE oldval;
> +  UWORD magic;
> +  bool ret;
> +
> +  pre_seq_barrier (smodel);
> +  magic = protect_start (mptr);
> +
> +  oldval = *mptr;
> +  ret = (oldval == *eptr);
> +  if (ret)
> + *mptr = newval;
> +  else
> + *eptr = oldval;
> +
> +  protect_end (mptr, magic);
> +  post_seq_barrier (smodel);
> +
> +  return ret;
> +}
> +}
> +#define DONE 1
> +#endif /* N == 16 */
> +
> +#include "../../cas_n.c"
> diff --git a/libatomic/config/s390/load_n.c b/libatomic/config/s390/load_n.c
> new file mode 100644
> index 000..335d2f8b2c3
> --- /dev/null
> +++ b/libatomic/config/s390/load_n.c
> @@ -0,0 +1,57 @@
> +/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU Atomic Library (libatomic).
> +
> +   Libatomic is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   Libatomic is distributed in the hope that it will be useful, but WITHOUT 
> ANY
> +   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> +   more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   .  */
> +
> +#include 
> +
> +
> +/* Analog to config/s390/exch_n.c.  */
> +
> +#if !DONE && N == 16
> +UTYPE
> +SIZE(libat_load) (UTYPE *mptr, int smodel)
> +{
> +  if (!((uintptr_t)mptr & 0xf))
> +{
> +  return __atomic_load_n ((UTYPE *)__builtin_assume_aligned (mptr, 16),
> +   __ATOMIC_SEQ_CST);
> +}
> +  else
> +{
> +  UTYPE ret;
> +  UWORD magic;
> +
> +  pre_seq_barrier (smodel);
> +  magic = protect_start 

Re: [PATCH] s390: Use arch14 instead of z16 for -march=native.

2023-03-03 Thread Andreas Krebbel via Gcc-patches
On 3/2/23 19:17, Robin Dapp wrote:
> Hi,
> 
> When compiling on a system where binutils do not yet support the 'z16'
> name assembling fails with -march=native which we currently interpret
> as -march=z16 (on a z16 machine).  This patch uses -march=arch14
> instead.
> 
> Is it OK?

Ok. Thanks!

Andreas


> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
>   * config/s390/driver-native.cc (s390_host_detect_local_cpu): Use
>   arch14 instead of z16.
> ---
>  gcc/config/s390/driver-native.cc | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/s390/driver-native.cc 
> b/gcc/config/s390/driver-native.cc
> index 563da45c7f6e..3b9c1e1ca5df 100644
> --- a/gcc/config/s390/driver-native.cc
> +++ b/gcc/config/s390/driver-native.cc
> @@ -125,10 +125,10 @@ s390_host_detect_local_cpu (int argc, const char **argv)
> break;
>   case 0x3931:
>   case 0x3932:
> -   cpu = "z16";
> +   cpu = "arch14";
> break;
>   default:
> -   cpu = "z16";
> +   cpu = "arch14";
> break;
>   }
>   }



Re: [PATCH] s390: Add LEN_LOAD/LEN_STORE support.

2023-02-27 Thread Andreas Krebbel via Gcc-patches
On 2/27/23 11:13, Robin Dapp wrote:
>> Do you really need a copy of the address register? Couldn't you just do a
>> src = adjust_address (operands[1], BLKmode, 0);
>> You create a paradoxical subreg of the QImode input but vll actually
>> uses the whole 32 bit value. Couldn't we end up with uninitialized
>> bytes being used as part of the length then? Do we need a zero-extend
>> here?
> 
> v2 attached with these problems addressed.
> 
> Testsuite and bootstrap as before.

Ok. Thanks!

Andreas




Re: [PATCH] IBM zSystems: Do not propagate scheduler state across basic blocks [PR108102]

2023-02-13 Thread Andreas Krebbel via Gcc-patches
On 2/11/23 16:59, Stefan Schulze Frielinghaus wrote:
> So far we propagate scheduler state across basic blocks within EBBs and
> reset the state otherwise.  In certain circumstances the entry block of
> an EBB might be empty, i.e., no_real_insns_p is true.  In those cases
> scheduler state is not reset and subsequently wrong state is propagated
> to following blocks of the same EBB.
> 
> Since the performance benefit of tracking state across basic blocks is
> questionable on modern hardware, simply reset the state for each basic
> block.
> 
> Fix also resetting f{p,x}d_longrunning.
> 
> Bootstrapped and regtested on IBM zSystems.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_bb_fallthru_entry_likely): Remove.
>   (struct s390_sched_state): Initialise to zero.
>   (s390_sched_variable_issue): For better debuggability also emit
>   the current side.
>   (s390_sched_init): Unconditionally reset scheduler state.

Ok. Thanks!

Andreas




Re: [PATCH] IBM zSystems: Fix predicate execute_operation

2023-02-13 Thread Andreas Krebbel via Gcc-patches
On 2/11/23 17:10, Stefan Schulze Frielinghaus wrote:
> Use constrain_operands in order to check whether there exists a valid
> alternative instead of extract_constrain_insn which ICEs in case no
> alternative is found.
> 
> Bootstrapped and regtested on IBM zSystems.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/predicates.md (execute_operation): Use
>   constrain_operands instead of extract_constrain_insn in order to
>   determine wheter there exists a valid alternative.

Ok. Thanks!

Andreas



Re: [PATCH] s390: Add LEN_LOAD/LEN_STORE support.

2023-02-13 Thread Andreas Krebbel via Gcc-patches
On 2/2/23 09:43, Robin Dapp wrote:
> Hi,
> 
> this patch adds LEN_LOAD/LEN_STORE support for z14 and newer.
> It defines a bias value of -1 and implements the LEN_LOAD and LEN_STORE
> optabs.
> 
> It also includes various vll/vstl testcases adapted from Kewen Lin's patch
> for Power.
> 
> Bootstrapped and regtested on z13-z16.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * config/s390/predicates.md (vll_bias_operand): Add -1 bias.
>   * config/s390/s390.cc (s390_option_override_internal): Make
>   partial vector usage the default from z13 on.
>   * config/s390/vector.md (len_load_v16qi): Add.
>   (len_store_v16qi): Add.

...

> +;
> +; Implement len_load/len_store optabs with vll/vstl.
> +(define_expand "len_load_v16qi"
> +  [(match_operand:V16QI 0 "register_operand")
> +   (match_operand:V16QI 1 "memory_operand")
> +   (match_operand:QI 2 "register_operand")
> +   (match_operand:QI 3 "vll_bias_operand")
> +  ]
> +  "TARGET_VX && TARGET_64BIT"
> +{
> +  rtx src1 = XEXP (operands[1], 0);
> +  rtx src = gen_reg_rtx (Pmode);
> +  emit_move_insn (src, src1);
> +  rtx mem = gen_rtx_MEM (BLKmode, src);

Do you really need a copy of the address register? Couldn't you just do a
src = adjust_address (operands[1], BLKmode, 0);

> +
> +  rtx len = gen_lowpart (SImode, operands[2]);
> +  emit_insn (gen_vllv16qi (operands[0], len, mem));

You create a paradoxical subreg of the QImode input but vll actually uses the 
whole 32 bit value.
Couldn't we end up with uninitialized bytes being used as part of the length 
then? Do we need a
zero-extend here?

Bye,

Andreas



[PATCH 2/3] IBM zSystems: Make stack_tie to work with hard frame-pointer

2023-02-01 Thread Andreas Krebbel via Gcc-patches
With this patch a scheduling barrier is created to prevent the insn
setting up the frame-pointer and instructions which save GPRs to the
stack to be swapped.  Otherwise broken CFI information would be
generated since the stack save insns would use a base register which
is not currently declared as holding the CFA.

Without -mpreserve-args this did not happen because the store multiple
we used for saving the GPRs would also cover the frame-pointer
register and therefore creates a dependency on the frame-pointer
hardreg. However, with this patch the stack_tie is emitted regardless
of -mpreserve-args since this in general appears to be the safer
approach.

* config/s390/s390.cc (save_gprs): Use gen_frame_mem.
(restore_gprs): Likewise.
(s390_emit_stack_tie): Make the stack_tie to be dependent on the
frame pointer if a frame-pointer is used.
(s390_emit_prologue): Emit stack_tie when frame-pointer is needed.
* config/s390/s390.md (stack_tie): Add a register operand and
rename to ...
(@stack_tie): ... this.
---
 gcc/config/s390/s390.cc | 17 -
 gcc/config/s390/s390.md |  5 +++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index a9bb610385b..4db5677ce29 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -10898,9 +10898,7 @@ save_gprs (rtx base, int offset, int first, int last)
   int i;
 
   addr = plus_constant (Pmode, base, offset);
-  addr = gen_rtx_MEM (Pmode, addr);
-
-  set_mem_alias_set (addr, get_frame_alias_set ());
+  addr = gen_frame_mem (Pmode, addr);
 
   /* Special-case single register.  */
   if (first == last)
@@ -11012,8 +11010,7 @@ restore_gprs (rtx base, int offset, int first, int last)
   rtx addr, insn;
 
   addr = plus_constant (Pmode, base, offset);
-  addr = gen_rtx_MEM (Pmode, addr);
-  set_mem_alias_set (addr, get_frame_alias_set ());
+  addr = gen_frame_mem (Pmode, addr);
 
   /* Special-case single register.  */
   if (first == last)
@@ -11062,10 +11059,11 @@ s390_load_got (void)
 static void
 s390_emit_stack_tie (void)
 {
-  rtx mem = gen_frame_mem (BLKmode,
-  gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
-
-  emit_insn (gen_stack_tie (mem));
+  rtx mem = gen_frame_mem (BLKmode, stack_pointer_rtx);
+  if (frame_pointer_needed)
+emit_insn (gen_stack_tie (Pmode, mem, hard_frame_pointer_rtx));
+  else
+emit_insn (gen_stack_tie (Pmode, mem, stack_pointer_rtx));
 }
 
 /* Copy GPRS into FPR save slots.  */
@@ -11676,6 +11674,7 @@ s390_emit_prologue (void)
 
   if (frame_pointer_needed)
 {
+  s390_emit_stack_tie ();
   insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
   RTX_FRAME_RELATED_P (insn) = 1;
 }
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 4828aa08be6..00d39608e1d 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -11590,9 +11590,10 @@
 ; This is used in s390_emit_prologue in order to prevent insns
 ; adjusting the stack pointer to be moved over insns writing stack
 ; slots using a copy of the stack pointer in a different register.
-(define_insn "stack_tie"
+(define_insn "@stack_tie"
   [(set (match_operand:BLK 0 "memory_operand" "+m")
-(unspec:BLK [(match_dup 0)] UNSPEC_TIE))]
+(unspec:BLK [(match_dup 0)
+(match_operand:P 1 "register_operand" "r")] UNSPEC_TIE))]
   ""
   ""
   [(set_attr "length" "0")])
-- 
2.39.1



[PATCH 3/3] IBM zSystems: Save argument registers to the stack -mpreserve-args

2023-02-01 Thread Andreas Krebbel via Gcc-patches
This adds support for preserving the content of parameter registers to
the stack and emit CFI for it. This useful for applications which want
to implement their own stack unwinding and need access to function
arguments without having to rely on debug information.

With the -mpreserve-args option GPRs and FPRs are save to the stack
slots which are reserved for stdargs in the register save area.

gcc/ChangeLog:

* config/s390/s390.cc (s390_restore_gpr_p): New function.
(s390_preserve_gpr_arg_in_range_p): New function.
(s390_preserve_gpr_arg_p): New function.
(s390_preserve_fpr_arg_p): New function.
(s390_register_info_stdarg_fpr): Rename to ...
(s390_register_info_arg_fpr): ... this. Add -mpreserve-args handling.
(s390_register_info_stdarg_gpr): Rename to ...
(s390_register_info_arg_gpr): ... this. Add -mpreserve-args handling.
(s390_register_info): Use the renamed functions above.
(s390_optimize_register_info): Likewise.
(save_fpr): Generate CFI for -mpreserve-args.
(save_gprs): Generate CFI for -mpreserve-args. Drop return value.
(s390_emit_prologue): Adjust to changed calling convention of save_gprs.
(s390_optimize_prologue): Likewise.
* config/s390/s390.opt: New option -mpreserve-args

gcc/testsuite/ChangeLog:

* gcc.target/s390/preserve-args-1.c: New test.
* gcc.target/s390/preserve-args-2.c: New test.
---
 gcc/config/s390/s390.cc   | 254 +-
 gcc/config/s390/s390.opt  |   4 +
 .../gcc.target/s390/preserve-args-1.c |  17 ++
 .../gcc.target/s390/preserve-args-2.c |  19 ++
 .../gcc.target/s390/preserve-args-3.c |  19 ++
 5 files changed, 239 insertions(+), 74 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/preserve-args-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/preserve-args-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/preserve-args-3.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 4db5677ce29..708b48b5ab6 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -411,6 +411,45 @@ struct s390_address
 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
 #define VEC_ARG_NUM_REG 8
 
+/* Return TRUE if GPR REGNO is supposed to be restored in the function
+   epilogue.  */
+static inline bool
+s390_restore_gpr_p (int regno)
+{
+  return (cfun_frame_layout.first_restore_gpr != -1
+ && regno >= cfun_frame_layout.first_restore_gpr
+ && regno <= cfun_frame_layout.last_restore_gpr);
+}
+
+/* Return TRUE if any of the registers in range [FIRST, LAST] is saved
+   because of -mpreserve-args.  */
+static inline bool
+s390_preserve_gpr_arg_in_range_p (int first, int last)
+{
+  int num_arg_regs = MIN (crtl->args.info.gprs + cfun->va_list_gpr_size,
+ GP_ARG_NUM_REG);
+  return (num_arg_regs
+ && s390_preserve_args_p
+ && first <= GPR2_REGNUM + num_arg_regs - 1
+ && last >= GPR2_REGNUM);
+}
+
+static inline bool
+s390_preserve_gpr_arg_p (int regno)
+{
+  return s390_preserve_gpr_arg_in_range_p (regno, regno);
+}
+
+static inline bool
+s390_preserve_fpr_arg_p (int regno)
+{
+  int num_arg_regs = MIN (crtl->args.info.fprs + cfun->va_list_fpr_size,
+ FP_ARG_NUM_REG);
+  return (s390_preserve_args_p
+ && regno <= FPR0_REGNUM + num_arg_regs - 1
+ && regno >= FPR0_REGNUM);
+}
+
 /* A couple of shortcuts.  */
 #define CONST_OK_FOR_J(x) \
CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
@@ -9893,61 +9932,89 @@ s390_register_info_gprtofpr ()
 }
 
 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
-   stdarg.
+   stdarg or -mpreserve-args.
This is a helper routine for s390_register_info.  */
-
 static void
-s390_register_info_stdarg_fpr ()
+s390_register_info_arg_fpr ()
 {
   int i;
-  int min_fpr;
-  int max_fpr;
+  int min_stdarg_fpr = INT_MAX, max_stdarg_fpr = -1;
+  int min_preserve_fpr = INT_MAX, max_preserve_fpr = -1;
+  int min_fpr, max_fpr;
 
   /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
  f0-f4 for 64 bit.  */
-  if (!cfun->stdarg
-  || !TARGET_HARD_FLOAT
-  || !cfun->va_list_fpr_size
-  || crtl->args.info.fprs >= FP_ARG_NUM_REG)
-return;
+  if (cfun->stdarg
+  && TARGET_HARD_FLOAT
+  && cfun->va_list_fpr_size
+  && crtl->args.info.fprs < FP_ARG_NUM_REG)
+{
+  min_stdarg_fpr = crtl->args.info.fprs;
+  max_stdarg_fpr = min_stdarg_fpr + cfun->va_list_fpr_size - 1;
+  if (max_stdarg_fpr >= FP_ARG_NUM_REG)
+   max_stdarg_fpr = FP_ARG_NUM_REG - 1;
+
+  /* FPR argument regs start at f0.  */
+  min_stdarg_fpr += FPR0_REGNUM;
+  max_stdarg_fpr += FPR0_REGNUM;
+}
 
-  min_fpr = crtl->args.info.fprs;
-  max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
-  if (max_fpr >= FP_ARG_NUM_REG)
-max_fpr = FP_ARG_NUM_REG - 1;
+  if 

[PATCH 1/3] New reg note REG_CFA_NORESTORE

2023-02-01 Thread Andreas Krebbel via Gcc-patches
This patch introduces a new reg note which can be used to tell the CFI
verification in dwarf2cfi that a register is stored without intending
to restore from it.

This is useful when storing e.g. register contents to the stack and
generate CFI for it although the register is not really supposed to be
restored.

gcc/ChangeLog:

* dwarf2cfi.cc (dwarf2out_frame_debug_cfa_restore): Add
EMIT_CFI parameter.
(dwarf2out_frame_debug): Add case for REG_CFA_NORESTORE.
* reg-notes.def (REG_CFA_NOTE): New reg note definition.
---
 gcc/dwarf2cfi.cc  | 15 ++-
 gcc/reg-notes.def |  5 +
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
index 1c70bd83f28..57283c10a29 100644
--- a/gcc/dwarf2cfi.cc
+++ b/gcc/dwarf2cfi.cc
@@ -1496,10 +1496,12 @@ dwarf2out_frame_debug_cfa_val_expression (rtx set)
   update_row_reg_save (cur_row, dwf_regno (dest), cfi);
 }
 
-/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_RESTORE note.  */
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_RESTORE
+   note. When called with EMIT_CFI set to false emitting a CFI
+   statement is suppressed.  */
 
 static void
-dwarf2out_frame_debug_cfa_restore (rtx reg)
+dwarf2out_frame_debug_cfa_restore (rtx reg, bool emit_cfi)
 {
   gcc_assert (REG_P (reg));
 
@@ -1507,7 +1509,8 @@ dwarf2out_frame_debug_cfa_restore (rtx reg)
   if (!span)
 {
   unsigned int regno = dwf_regno (reg);
-  add_cfi_restore (regno);
+  if (emit_cfi)
+   add_cfi_restore (regno);
   update_row_reg_save (cur_row, regno, NULL);
 }
   else
@@ -1522,7 +1525,8 @@ dwarf2out_frame_debug_cfa_restore (rtx reg)
  reg = XVECEXP (span, 0, par_index);
  gcc_assert (REG_P (reg));
  unsigned int regno = dwf_regno (reg);
- add_cfi_restore (regno);
+ if (emit_cfi)
+   add_cfi_restore (regno);
  update_row_reg_save (cur_row, regno, NULL);
}
 }
@@ -2309,6 +2313,7 @@ dwarf2out_frame_debug (rtx_insn *insn)
break;
 
   case REG_CFA_RESTORE:
+  case REG_CFA_NO_RESTORE:
n = XEXP (note, 0);
if (n == NULL)
  {
@@ -2317,7 +2322,7 @@ dwarf2out_frame_debug (rtx_insn *insn)
  n = XVECEXP (n, 0, 0);
n = XEXP (n, 0);
  }
-   dwarf2out_frame_debug_cfa_restore (n);
+   dwarf2out_frame_debug_cfa_restore (n, REG_NOTE_KIND (note) == 
REG_CFA_RESTORE);
handled_one = true;
break;
 
diff --git a/gcc/reg-notes.def b/gcc/reg-notes.def
index 23de1f13ee9..1f74a605b3e 100644
--- a/gcc/reg-notes.def
+++ b/gcc/reg-notes.def
@@ -157,6 +157,11 @@ REG_CFA_NOTE (CFA_VAL_EXPRESSION)
first pattern is the register to be restored.  */
 REG_CFA_NOTE (CFA_RESTORE)
 
+/* Like CFA_RESTORE but without actually emitting CFI.  This can be
+   used to tell the verification infrastructure that a register is
+   saved without intending to restore it.  */
+REG_CFA_NOTE (CFA_NO_RESTORE)
+
 /* Attached to insns that are RTX_FRAME_RELATED_P, marks insn that sets
vDRAP from DRAP.  If vDRAP is a register, vdrap_reg is initalized
to the argument, if it is a MEM, it is ignored.  */
-- 
2.39.1



[Committed 0/3] IBM zSystems: Add -mpreserve-args option

2023-02-01 Thread Andreas Krebbel via Gcc-patches
This adds support for preserving the content of parameter registers to
the stack and emit CFI for it. This useful for applications which want
to implement their own stack unwinding and need access to function
arguments without having to rely on debug information.

With the -mpreserve-args option GPRs and FPRs are save to the stack
slots which are reserved for stdargs in the register save area.

The introduction of REG_CFA_NORESTORE is a common code change which
has been approved last year already.

Bootstrapped and regtested on s390x. Committed to mainline. 

Andreas Krebbel (3):
  New reg note REG_CFA_NORESTORE
  IBM zSystems: Make stack_tie to work with hard frame pointer
  IBM zSystems: Save argument registers to the stack -mpreserve-args

 gcc/config/s390/s390.cc   | 271 --
 gcc/config/s390/s390.md   |   5 +-
 gcc/config/s390/s390.opt  |   4 +
 gcc/dwarf2cfi.cc  |  15 +-
 gcc/reg-notes.def |   5 +
 .../gcc.target/s390/preserve-args-1.c |  17 ++
 .../gcc.target/s390/preserve-args-2.c |  19 ++
 .../gcc.target/s390/preserve-args-3.c |  19 ++
 8 files changed, 265 insertions(+), 90 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/preserve-args-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/preserve-args-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/preserve-args-3.c

-- 
2.39.1



Re: [PATCH v2] IBM zSystems: Fix TARGET_D_CPU_VERSIONS

2023-01-24 Thread Andreas Krebbel via Gcc-patches
On 1/24/23 09:47, Stefan Schulze Frielinghaus wrote:
> In the context of D the interpretation of S390, S390X, and SystemZ is a
> bit fuzzy.  The wording S390X was wrongly deprecated in favour of
> SystemZ by commit
> https://github.com/dlang/dlang.org/commit/3b50a4c3faf01c32234d0ef8be5f82915a61c23f
> Thus, SystemZ is used for 64-bit targets, now, and S390 for 31-bit
> targets.  However, in TARGET_D_CPU_VERSIONS depending on TARGET_ZARCH we
> set the CPU version to SystemZ.  This is also the case if compiled for
> 31-bit targets leading to the following error:
> 
> libphobos/libdruntime/core/sys/posix/sys/stat.d:967:13: error: static assert: 
>  '96u == 144u' is false
>   967 | static assert(stat_t.sizeof == 144);
>   | ^
> 
> Thus in order to keep this patch simple I went for keeping SystemZ for
> 64-bit targets and S390, as usual, for 31-bit targets and dropped the
> distinction between ESA and z/Architecture.
> 
> Bootstrapped and regtested on IBM zSystems.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390-d.cc (s390_d_target_versions): Fix detection
>   of CPU version.

Ok, thanks!

Andreas



Re: PING: New reg note REG_CFA_NORESTORE

2023-01-11 Thread Andreas Krebbel via Gcc-patches
On 12/27/22 19:23, Jeff Law wrote:
> 
> 
> On 12/13/22 01:55, Andreas Krebbel via Gcc-patches wrote:
>> Hi,
>>
>> I need a way to save registers on the stack and generate proper CFI for it. 
>> Since I do not intend to
>> restore them I needed a way to tell the CFI generation step about it:
>>
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606128.html
>>
>> Is this ok for mainline?
> Presumably there's validation bits that want to validate that everything 
> saved eventually gets restored?
> 
> There's only one call to dwarf2out_frame_debug_cfa_restore, so ISTM that 
> providing an initializer for the argument isn't needed and just creates 
> an overload (and associated code) that isn't needed.  Why not just 
> remove the default initializer?
> 
> Ok with that change or a good reason why you need to keep the initializer.

Right. I'll remove it. Thanks for having a look!

Bye,

Andreas



[Committed] IBM zSystems: Use NAND instruction to implement bit not

2023-01-11 Thread Andreas Krebbel via Gcc-patches
Bootstrapped and regression tested on s390x.

Committed to mainline.

gcc/ChangeLog:

* config/s390/s390.md (*not): New pattern.

gcc/testsuite/ChangeLog:

* gcc.target/s390/not.c: New test.
---
 gcc/config/s390/s390.md |  8 
 gcc/testsuite/gcc.target/s390/not.c | 11 +++
 2 files changed, 19 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/not.c

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 0e56fbad44d..4828aa08be6 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -8302,6 +8302,14 @@
   "nrk\t%0,%1,%2"
   [(set_attr "op_type" "RRF")])
 
+; Use NAND for bit inversion
+(define_insn "*not"
+  [(set (match_operand:GPR  0 "register_operand" "=d")
+   (not:GPR (match_operand:GPR 1 "register_operand"  "d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z15"
+  "nnrk\t%0,%1,%1"
+  [(set_attr "op_type" "RRF")])
 
 ;
 ; Block inclusive or (OC) patterns.
diff --git a/gcc/testsuite/gcc.target/s390/not.c 
b/gcc/testsuite/gcc.target/s390/not.c
new file mode 100644
index 000..dae95f7d8a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/not.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z15 -mzarch" } */
+
+unsigned long
+foo (unsigned long a)
+{
+  return ~a;
+}
+
+/* { dg-final { scan-assembler-times "\tnngrk\t" 1 { target { lp64 } } } } */
+/* { dg-final { scan-assembler-times "\tnnrk\t" 1 { target { ! lp64 } } } } */
-- 
2.39.0



[Committed] IBM zSystems: Make -fcall-saved-... work.

2023-01-10 Thread Andreas Krebbel via Gcc-patches
Committed to mainline. Bootstrap and regression tests are clean.

gcc/ChangeLog:

* config/s390/s390.cc (s390_register_info): Check call_used_regs
instead of hard-coding the register numbers for call saved
registers.
(s390_optimize_register_info): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/s390/fcall-saved.c: New test.
---
 gcc/config/s390/s390.cc | 10 --
 gcc/testsuite/gcc.target/s390/fcall-saved.c | 11 +++
 2 files changed, 15 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/fcall-saved.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 42177c204f6..a9bb610385b 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -10075,8 +10075,8 @@ s390_register_info ()
 
   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
 
-  for (i = 6; i < 16; i++)
-if (clobbered_regs[i])
+  for (i = 0; i < 16; i++)
+if (clobbered_regs[i] && !call_used_regs[i])
   cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
 
   s390_register_info_stdarg_fpr ();
@@ -10136,10 +10136,8 @@ s390_optimize_register_info ()
|| cfun_frame_layout.save_return_addr_p
|| crtl->calls_eh_return);
 
-  memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
-
-  for (i = 6; i < 16; i++)
-if (!clobbered_regs[i])
+  for (i = 0; i < 16; i++)
+if (!clobbered_regs[i] || call_used_regs[i])
   cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
 
   s390_register_info_set_ranges ();
diff --git a/gcc/testsuite/gcc.target/s390/fcall-saved.c 
b/gcc/testsuite/gcc.target/s390/fcall-saved.c
new file mode 100644
index 000..a08155372f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/fcall-saved.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -fcall-saved-r4" } */
+
+void test(void) {
+asm volatile("nop" ::: "r4");
+}
+
+/* { dg-final { scan-assembler-times "\tstg\t" 1 { target { lp64 } } } } */
+/* { dg-final { scan-assembler-times "\tlg\t" 1 { target { lp64 } } } } */
+/* { dg-final { scan-assembler-times "\tst\t" 1 { target { ! lp64 } } } } */
+/* { dg-final { scan-assembler-times "\tl\t" 1 { target { ! lp64 } } } } */
-- 
2.39.0



PING: New reg note REG_CFA_NORESTORE

2022-12-13 Thread Andreas Krebbel via Gcc-patches
Hi,

I need a way to save registers on the stack and generate proper CFI for it. 
Since I do not intend to
restore them I needed a way to tell the CFI generation step about it:

https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606128.html

Is this ok for mainline?

Bye,

Andreas


[PATCH 2/2] IBM zSystems: Save argument registers to the stack -mpreserve-args

2022-11-14 Thread Andreas Krebbel via Gcc-patches
This adds support for preserving the content of parameter registers to
the stack and emit CFI for it. This useful for applications which want
to implement their own stack unwinding and need access to function
arguments.

With the -mpreserve-args option GPRs and FPRs are save to the stack
slots which are reserved for stdargs in the register save area.

gcc/ChangeLog:

* config/s390/s390.cc (s390_restore_gpr_p): New function.
(s390_preserve_gpr_arg_in_range_p): New function.
(s390_preserve_gpr_arg_p): New function.
(s390_preserve_fpr_args_p): New function.
(s390_preserve_fpr_arg_p): New function.
(s390_register_info_stdarg_fpr): Rename to ...
(s390_register_info_arg_fpr): ... this. Add -mpreserve-args handling.
(s390_register_info_stdarg_gpr): Rename to ...
(s390_register_info_arg_gpr): ... this. Add -mpreserve-args handling.
(s390_register_info): Use the renamed functions above.
(s390_optimize_register_info): Likewise.
(save_fpr): Generate CFI for -mpreserve-args.
(save_gprs): Generate CFI for -mpreserve-args. Drop return value.
(s390_emit_prologue): Adjust to changed calling convention of save_gprs.
(s390_optimize_prologue): Likewise.
* config/s390/s390.opt: New option -mpreserve-args

gcc/testsuite/ChangeLog:

* gcc.target/s390/preserve-args-1.c: New test.
* gcc.target/s390/preserve-args-2.c: New test.
---
 gcc/config/s390/s390.cc   | 263 +-
 gcc/config/s390/s390.opt  |   4 +
 .../gcc.target/s390/preserve-args-1.c |  17 ++
 .../gcc.target/s390/preserve-args-2.c |  19 ++
 4 files changed, 229 insertions(+), 74 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/preserve-args-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/preserve-args-2.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index f5c75395cf3..5e197b5314b 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -411,6 +411,53 @@ struct s390_address
 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
 #define VEC_ARG_NUM_REG 8
 
+/* Return TRUE if GPR REGNO is supposed to be restored in the function
+   epilogue.  */
+static inline bool
+s390_restore_gpr_p (int regno)
+{
+  return (cfun_frame_layout.first_restore_gpr != -1
+ && regno >= cfun_frame_layout.first_restore_gpr
+ && regno <= cfun_frame_layout.last_restore_gpr);
+}
+
+/* Return TRUE if any of the registers in range [FIRST, LAST] is saved
+   because of -mpreserve-args.  */
+static inline bool
+s390_preserve_gpr_arg_in_range_p (int first, int last)
+{
+  int num_arg_regs = MIN (crtl->args.info.gprs + cfun->va_list_gpr_size,
+ GP_ARG_NUM_REG);
+  return (num_arg_regs
+ && s390_preserve_args_p
+ && first <= GPR2_REGNUM + num_arg_regs - 1
+ && last >= GPR2_REGNUM);
+}
+
+static inline bool
+s390_preserve_gpr_arg_p (int regno)
+{
+  return s390_preserve_gpr_arg_in_range_p (regno, regno);
+}
+
+/* Return TRUE if FPR arguments need to be saved onto the stack due to 
-mpreserve-args.  */
+static inline bool
+s390_preserve_fpr_args_p (void)
+{
+  return (s390_preserve_args_p
+ && (crtl->args.info.fprs + cfun->va_list_fpr_size));
+}
+
+static inline bool
+s390_preserve_fpr_arg_p (int regno)
+{
+  int num_arg_regs = MIN (crtl->args.info.fprs + cfun->va_list_fpr_size,
+ FP_ARG_NUM_REG);
+  return (s390_preserve_args_p
+ && regno <= FPR0_REGNUM + num_arg_regs - 1
+ && regno >= FPR0_REGNUM);
+}
+
 /* A couple of shortcuts.  */
 #define CONST_OK_FOR_J(x) \
CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
@@ -9893,61 +9940,90 @@ s390_register_info_gprtofpr ()
 }
 
 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
-   stdarg.
+   stdarg or -mpreserve-args.
This is a helper routine for s390_register_info.  */
-
 static void
-s390_register_info_stdarg_fpr ()
+s390_register_info_arg_fpr ()
 {
   int i;
-  int min_fpr;
-  int max_fpr;
+  int min_stdarg_fpr = INT_MAX, max_stdarg_fpr = -1;
+  int min_preserve_fpr = INT_MAX, max_preserve_fpr = -1;
+  int min_fpr, max_fpr;
 
   /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
  f0-f4 for 64 bit.  */
-  if (!cfun->stdarg
-  || !TARGET_HARD_FLOAT
-  || !cfun->va_list_fpr_size
-  || crtl->args.info.fprs >= FP_ARG_NUM_REG)
-return;
+  if (cfun->stdarg
+  && TARGET_HARD_FLOAT
+  && cfun->va_list_fpr_size
+  && crtl->args.info.fprs < FP_ARG_NUM_REG)
+{
+  min_stdarg_fpr = crtl->args.info.fprs;
+  max_stdarg_fpr = min_stdarg_fpr + cfun->va_list_fpr_size - 1;
+  if (max_stdarg_fpr >= FP_ARG_NUM_REG)
+   max_stdarg_fpr = FP_ARG_NUM_REG - 1;
+
+  /* FPR argument regs start at f0.  */
+  min_stdarg_fpr += FPR0_REGNUM;
+  max_stdarg_fpr += FPR0_REGNUM;
+}
+
+  if (s390_preserve_fpr_args_p ())
+{
+  

[PATCH 1/2] New reg note REG_CFA_NORESTORE

2022-11-14 Thread Andreas Krebbel via Gcc-patches
This patch introduces a new reg note which can be used to tell the CFI
verification in dwarf2cfi that a register is stored without intending
to restore from it.

This is useful when storing e.g. register contents to the stack and
generate CFI for it although the register is not really supposed to be
restored.

gcc/ChangeLog:

* dwarf2cfi.cc (dwarf2out_frame_debug_cfa_restore): Add
EMIT_CFI parameter.
(dwarf2out_frame_debug): Add case for REG_CFA_NORESTORE.
* reg-notes.def (REG_CFA_NOTE): New reg note definition.
---
 gcc/dwarf2cfi.cc  | 15 ++-
 gcc/reg-notes.def |  5 +
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
index bef3165e691..6686498d7cc 100644
--- a/gcc/dwarf2cfi.cc
+++ b/gcc/dwarf2cfi.cc
@@ -1496,10 +1496,12 @@ dwarf2out_frame_debug_cfa_val_expression (rtx set)
   update_row_reg_save (cur_row, dwf_regno (dest), cfi);
 }
 
-/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_RESTORE note.  */
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_RESTORE
+   note. When called with EMIT_CFI set to false emitting a CFI
+   statement is suppressed.  */
 
 static void
-dwarf2out_frame_debug_cfa_restore (rtx reg)
+dwarf2out_frame_debug_cfa_restore (rtx reg, bool emit_cfi = true)
 {
   gcc_assert (REG_P (reg));
 
@@ -1507,7 +1509,8 @@ dwarf2out_frame_debug_cfa_restore (rtx reg)
   if (!span)
 {
   unsigned int regno = dwf_regno (reg);
-  add_cfi_restore (regno);
+  if (emit_cfi)
+   add_cfi_restore (regno);
   update_row_reg_save (cur_row, regno, NULL);
 }
   else
@@ -1522,7 +1525,8 @@ dwarf2out_frame_debug_cfa_restore (rtx reg)
  reg = XVECEXP (span, 0, par_index);
  gcc_assert (REG_P (reg));
  unsigned int regno = dwf_regno (reg);
- add_cfi_restore (regno);
+ if (emit_cfi)
+   add_cfi_restore (regno);
  update_row_reg_save (cur_row, regno, NULL);
}
 }
@@ -2309,6 +2313,7 @@ dwarf2out_frame_debug (rtx_insn *insn)
break;
 
   case REG_CFA_RESTORE:
+  case REG_CFA_NORESTORE:
n = XEXP (note, 0);
if (n == NULL)
  {
@@ -2317,7 +2322,7 @@ dwarf2out_frame_debug (rtx_insn *insn)
  n = XVECEXP (n, 0, 0);
n = XEXP (n, 0);
  }
-   dwarf2out_frame_debug_cfa_restore (n);
+   dwarf2out_frame_debug_cfa_restore (n, REG_NOTE_KIND (note) == 
REG_CFA_RESTORE);
handled_one = true;
break;
 
diff --git a/gcc/reg-notes.def b/gcc/reg-notes.def
index 704bc75b0e7..ab08e65eedc 100644
--- a/gcc/reg-notes.def
+++ b/gcc/reg-notes.def
@@ -157,6 +157,11 @@ REG_CFA_NOTE (CFA_VAL_EXPRESSION)
first pattern is the register to be restored.  */
 REG_CFA_NOTE (CFA_RESTORE)
 
+/* Like CFA_RESTORE but without actually emitting CFI.  This can be
+   used to tell the verification infrastructure that a register is
+   saved without intending to restore it.  */
+REG_CFA_NOTE (CFA_NORESTORE)
+
 /* Attached to insns that are RTX_FRAME_RELATED_P, marks insn that sets
vDRAP from DRAP.  If vDRAP is a register, vdrap_reg is initalized
to the argument, if it is a MEM, it is ignored.  */
-- 
2.38.1



[PATCH 0/2] Preserve argument registers

2022-11-14 Thread Andreas Krebbel via Gcc-patches
This adds support for preserving the content of parameter registers to
the stack and emit CFI for it. This useful for applications which want
to implement their own stack unwinding and need access to function
arguments.

A small common code patch was needed to prevent the CFI verification
in dwarf2cfi from complaining about the register saves without restores.

Andreas Krebbel (2):
  New reg note REG_CFA_NORESTORE
  IBM zSystems: Save argument registers to the stack -mpreserve-args

 gcc/config/s390/s390.cc   | 263 +-
 gcc/config/s390/s390.opt  |   4 +
 gcc/dwarf2cfi.cc  |  15 +-
 gcc/reg-notes.def |   5 +
 .../gcc.target/s390/preserve-args-1.c |  17 ++
 .../gcc.target/s390/preserve-args-2.c |  19 ++
 6 files changed, 244 insertions(+), 79 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/preserve-args-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/preserve-args-2.c

-- 
2.38.1



Re: [PATCH] IBM zSystems: Fix function_ok_for_sibcall [PR106355]

2022-10-19 Thread Andreas Krebbel via Gcc-patches
On 8/17/22 13:50, Stefan Schulze Frielinghaus wrote:
> For a parameter with BLKmode we cannot use REG_NREGS in order to
> determine the number of consecutive registers.  Streamlined this with
> the implementation of s390_function_arg.
> 
> Fix some indentation whitespace, too.
> 
> Assuming bootstrap and regtest are ok for mainline and gcc-{10,11,12},
> ok to install for all of those?
> 
> PR target/106355
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_call_saved_register_used): For a
>   parameter with BLKmode fix determining number of consecutive
>   registers.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/pr106355.h: Common code for new tests.
>   * gcc.target/s390/pr106355-1.c: New test.
>   * gcc.target/s390/pr106355-2.c: New test.
>   * gcc.target/s390/pr106355-3.c: New test.

Ok for all those branches. Please check if the branches are currently open 
before committing. GCC 11
and 12 appear to be but I'm not sure if GCC 10 has been re-opened again. There 
should be a final
10.5 release some day though.

Thanks!

Andreas


Re: [PATCH] s390: Fix bootstrap error with checking and -m31

2022-10-19 Thread Andreas Krebbel via Gcc-patches
On 10/19/22 08:22, Robin Dapp wrote:
> Hi,
> 
> since r13-2746 we hit an ICE when bootstrapping with -m31 and
> --enable-checking=all.
> 
> ../../../../libgfortran/ieee/ieee_helper.c: In function
> 'ieee_class_helper_16':
> ../../../../libgfortran/ieee/ieee_helper.c:77:3: internal compiler
> error: RTL check: expected code 'reg', have 'subreg' in rhs_regno, at
> rtl.h:1932
>77 |   }
>   |   ^
> ../../../../libgfortran/ieee/ieee_helper.c:87:1: note: in expansion of
> macro 'CLASSMACRO'
>87 | CLASSMACRO(16)
>   | ^~
> 
> This patch fixes the problem by first checking for reload_completed
> and also ensuring that REGNO is only called on reg operands rather
> than subregs.
> 
> Bootstrapped and regtested --with-arch=arch14 and --enable-checking=all.
> 
> Is it OK?
Ok. Thanks!

Andreas



Re: [PATCH] s390: Recognize reverse/element swap permute patterns.

2022-08-22 Thread Andreas Krebbel via Gcc-patches
On 8/22/22 17:10, Robin Dapp wrote:
> Hi,
> 
> after discussing off-list, here is v2 of the patch.  We now recognize if
> the permutation mask only refers to the first or the second operand and
> use this later when emitting vpdi.
> 
> Regtested and bootstrapped, no regressions.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> From 1f11a6b89c9b0ad64b480229cd4db06e887a Mon Sep 17 00:00:00 2001
> From: Robin Dapp 
> Date: Fri, 24 Jun 2022 15:17:08 +0200
> Subject: [PATCH v2] s390: Recognize reverse/element swap permute patterns.
> 
> This adds functions to recognize reverse/element swap permute patterns
> for vler, vster as well as vpdi and rotate.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (expand_perm_with_vpdi): Recognize swap pattern.
>   (is_reverse_perm_mask): New function.
>   (expand_perm_with_rot): Recognize reverse pattern.
>   (expand_perm_with_vstbrq): New function.
>   (expand_perm_with_vster): Use vler/vster for element reversal on z15.
>   (vectorize_vec_perm_const_1): Use.
>   (s390_vectorize_vec_perm_const): Add expand functions.
>   * config/s390/vx-builtins.md: Prefer vster over vler.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vperm-rev-z14.c: New test.
>   * gcc.target/s390/vector/vperm-rev-z15.c: New test.
>   * gcc.target/s390/zvector/vec-reve-store-byte.c: Adjust test
>   expectation.

Ok, thanks!

Andreas


Re: [PATCH] s390: Implement vec_set with vec_merge and, vec_duplicate.

2022-08-16 Thread Andreas Krebbel via Gcc-patches
On 8/12/22 16:48, Robin Dapp wrote:
> Hi,
> 
> similar to other backends this patch implements vec_set via
> vec_merge and vec_duplicate instead of an unspec.  This opens up
> more possibilites to combine instructions.
> 
> Bootstrapped and regtested. No regressions.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md: Implement vec_set with vec_merge and
>   vec_duplicate.
>   * config/s390/vector.md: Likewise.
>   * config/s390/vx-builtins.md: Likewise.
>   * config/s390/s390.cc (s390_expand_vec_init): Emit new pattern.
>   (print_operand_address): New output modifier.
>   (print_operand): New output modifier.

The way you handle the element selector doesn't look right to me. It appears to 
be an index if it is
a CONST_INT and a bitmask otherwise. I don't think it is legal to change 
operand semantics like this
depending on the operand type. This would break e.g. if LRA would decide to 
load the immediate index
in a register.

Couldn't you make the shift part of the RTX instead and have the parameter 
always as an index?

Bye,

Andreas

> ---
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index c86b26933d7a..ff89fb83360a 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -7073,11 +7073,10 @@ s390_expand_vec_init (rtx target, rtx vals)
>if (!general_operand (elem, GET_MODE (elem)))
>   elem = force_reg (inner_mode, elem);
> 
> -  emit_insn (gen_rtx_SET (target,
> -   gen_rtx_UNSPEC (mode,
> -   gen_rtvec (3, elem,
> -  GEN_INT (i), target),
> -   UNSPEC_VEC_SET)));
> +  emit_insn
> + (gen_rtx_SET
> +  (target, gen_rtx_VEC_MERGE
> +   (mode, gen_rtx_VEC_DUPLICATE (mode, elem), target, GEN_INT (1 << 
> i;
>  }
>  }
> 
> @@ -8057,6 +8056,8 @@ print_operand_address (FILE *file, rtx addr)
>  'S': print S-type memory reference (base+displacement).
>  'Y': print address style operand without index (e.g. shift count or
> setmem
>operand).
> +'P': print address-style operand without index but with the offset as
> +  if it were specified by a 'p' format flag.
> 
>  'b': print integer X as if it's an unsigned byte.
>  'c': print integer X as if it's an signed byte.
> @@ -8068,6 +8069,7 @@ print_operand_address (FILE *file, rtx addr)
>  'k': print the first nonzero SImode part of X.
>  'm': print the first SImode part unequal to -1 of X.
>  'o': print integer X as if it's an unsigned 32bit word.
> +'p': print N such that 2^N == X (X must be a power of 2 and const int).
>  's': "start" of contiguous bitmask X in either DImode or vector
> inner mode.
>  't': CONST_INT: "start" of contiguous bitmask X in SImode.
>CONST_VECTOR: Generate a bitmask for vgbm instruction.
> @@ -8237,6 +8239,16 @@ print_operand (FILE *file, rtx x, int code)
>print_shift_count_operand (file, x);
>return;
> 
> +case 'P':
> +  if (CONST_INT_P (x))
> + {
> +   ival = exact_log2 (INTVAL (x));
> +   fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
> + }
> +  else
> + print_shift_count_operand (file, x);
> +  return;
> +
>  case 'K':
>/* Append @PLT to both local and non-local symbols in order to
> support
>Linux Kernel livepatching: patches contain individual functions and
> @@ -8321,6 +8333,9 @@ print_operand (FILE *file, rtx x, int code)
>   case 'o':
> ival &= 0x;
> break;
> + case 'p':
> +   ival = exact_log2 (INTVAL (x));
> +   break;
>   case 'e': case 'f':
>   case 's': case 't':
> {
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index f37d8fd33a15..a82db4c624fa 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -183,7 +183,6 @@ (define_c_enum "unspec" [
> UNSPEC_VEC_GFMSUM_128
> UNSPEC_VEC_GFMSUM_ACCUM
> UNSPEC_VEC_GFMSUM_ACCUM_128
> -   UNSPEC_VEC_SET
> 
> UNSPEC_VEC_VSUMG
> UNSPEC_VEC_VSUMQ
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index c50451a8326c..bde3a39db3d4 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -467,12 +467,17 @@ (define_insn "mov"
>  ; vec_set is supposed to *modify* an existing vector so operand 0 is
>  ; duplicated as input operand.
>  (define_expand "vec_set"
> -  [(set (match_operand:V0 "register_operand"  "")
> - (unspec:V [(match_operand: 1 "general_operand"   "")
> -(match_operand:SI2 "nonmemory_operand" "")
> -(match_dup 0)]
> -UNSPEC_VEC_SET))]
> -  "TARGET_VX")
> +  [(set (match_operand:V  0 "register_operand" "")
> + (vec_merge:V
> +   (vec_duplicate:V
> + (match_operand: 1 "general_operand" ""))

Re: [PATCH] s390: Implement vec_extract via vec_select.

2022-08-16 Thread Andreas Krebbel via Gcc-patches
On 8/12/22 16:19, Robin Dapp wrote:
> Hi,
> 
> vec_select can handle dynamic/runtime masks nowadays.  Therefore we can
> get rid of the UNSPEC_VEC_EXTRACT that was preventing further
> optimizations like combining instructions with vec_extract patterns.
> 
> Bootstrapped and regtested. No regressions.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md: Remove UNSPEC_VEC_EXTRACT.
>   * config/s390/vector.md: Rewrite patterns to use vec_select.
>   * config/s390/vx-builtins.md (vec_scatter_element_SI):
>   Likewise.

Ok. Thanks!

Andreas


Re: [PATCH] s390: Use vpdi and verllg in vec_reve.

2022-08-15 Thread Andreas Krebbel via Gcc-patches
On 8/12/22 12:13, Robin Dapp wrote:
> Hi,
> 
> swapping the two elements of a V2DImode or V2DFmode vector can be done
> with vpdi instead of using the generic way of loading a permutation mask
> from the literal pool and vperm.
> 
> Analogous to the V2DI/V2DF case reversing the elements of a four-element
> vector can be done by first swapping the elements of the first
> doubleword as well the ones of the second one and subsequently rotate
> the doublewords by 32 bits.
> 
> Bootstrapped and regtested, no regressions.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   PR target/100869
>   * config/s390/vector.md (@vpdi4_2): New pattern.
>   (rotl3_di): New pattern.
>   * config/s390/vx-builtins.md: Use vpdi and verll for reversing
>   elements.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/zvector/vec-reve-int-long.c: New test.

Ok. Thanks!

Andreas


Re: [PATCH] s390: Add z15 to s390_issue_rate.

2022-08-15 Thread Andreas Krebbel via Gcc-patches
On 8/12/22 12:02, Robin Dapp wrote:
> Hi,
> 
> this patch tries to be more explicit by mentioning z15 in s390_issue_rate.
> 
> No changes in testsuite, bootstrap or SPEC obviously.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_issue_rate): Add z15.
> ---
>  gcc/config/s390/s390.cc | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index ef38fbe68c84..528cd8c7f0f6 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -8582,6 +8582,7 @@ s390_issue_rate (void)
>  case PROCESSOR_2827_ZEC12:
>  case PROCESSOR_2964_Z13:
>  case PROCESSOR_3906_Z14:
> +case PROCESSOR_8561_Z15:
>  case PROCESSOR_3931_Z16:
>  default:
>return 1;

Ok. Thanks!

Andreas



Re: [PATCH] s390: Add -munroll-only-small-loops.

2022-08-15 Thread Andreas Krebbel via Gcc-patches
On 8/12/22 12:00, Robin Dapp wrote:
> Hi,
> 
> inspired by Power we also introduce -munroll-only-small-loops.  This
> implies activating -funroll-loops and -munroll-only-small-loops at -O2
> and above.
> 
> Bootstrapped and regtested.
> 
> This introduces one regression in gcc.dg/sms-compare-debug-1.c but
> currently dumps for sms are broken as well.  The difference is in the
> location of some INSN_DELETED notes so I would consider this a minor issue.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * common/config/s390/s390-common.cc: Enable -funroll-loops and
>   -munroll-only-small-loops for OPT_LEVELS_2_PLUS_SPEED_ONLY.
>   * config/s390/s390.cc (s390_loop_unroll_adjust): Do not unroll
>   loops larger than 12 instructions.
>   (s390_override_options_after_change): Set unroll options.
>   (s390_option_override_internal): Likewise.
>   * config/s390/s390.opt: Document munroll-only-small-loops.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vec-copysign.c: Do not unroll.
>   * gcc.target/s390/zvector/autovec-double-quiet-uneq.c: Dito.
>   * gcc.target/s390/zvector/autovec-double-signaling-ltgt.c: Dito.
>   * gcc.target/s390/zvector/autovec-float-quiet-uneq.c: Dito.
>   * gcc.target/s390/zvector/autovec-float-signaling-ltgt.c: Dito.

Ok. Thanks!

Andreas


Re: [PATCH] PR106342 - IBM zSystems: Provide vsel for all vector modes

2022-08-10 Thread Andreas Krebbel via Gcc-patches
On 8/10/22 13:42, Ilya Leoshkevich wrote:
> On Wed, 2022-08-03 at 12:20 +0200, Ilya Leoshkevich wrote:
>> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
>>
>>
>>
>> dg.exp=pr104612.c fails with an ICE on s390x, because copysignv2sf3
>> produces an insn that vsel is supposed to recognize, but can't,
>> because it's not defined for V2SF.  Fix by defining it for all vector
>> modes supported by copysign3.
>>
>> gcc/ChangeLog:
>>
>> * config/s390/vector.md (V_HW_FT): New iterator.
>> * config/s390/vx-builtins.md (vsel): Use V instead of
>> V_HW.
>> ---
>>  gcc/config/s390/vector.md  |  6 ++
>>  gcc/config/s390/vx-builtins.md | 12 ++--
>>  2 files changed, 12 insertions(+), 6 deletions(-)
> 
> Jakub pointed out that this is broken in gcc-12 as well.
> The patch applies cleanly, and I started a bootstrap/regtest.
> Ok for gcc-12?

Yes. Thanks!

Andreas


Re: [PATCH] PR106342 - IBM zSystems: Provide vsel for all vector modes

2022-08-03 Thread Andreas Krebbel via Gcc-patches
On 8/3/22 12:20, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> 
> 
> dg.exp=pr104612.c fails with an ICE on s390x, because copysignv2sf3
> produces an insn that vsel is supposed to recognize, but can't,
> because it's not defined for V2SF.  Fix by defining it for all vector
> modes supported by copysign3.
> 
> gcc/ChangeLog:
> 
>   * config/s390/vector.md (V_HW_FT): New iterator.
>   * config/s390/vx-builtins.md (vsel): Use V instead of
>   V_HW.

Ok. There is a typo in the changelog:
"Use *V* instead ..." should probably read "Use V_HW_FT instead ..."

Thanks,

Andreas

> ---
>  gcc/config/s390/vector.md  |  6 ++
>  gcc/config/s390/vx-builtins.md | 12 ++--
>  2 files changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index a6c4b4eb974..624729814af 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -63,6 +63,12 @@
>  V1DF V2DF
>  (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
>  
> +; All modes present in V_HW and VFT.
> +(define_mode_iterator V_HW_FT [V16QI V8HI V4SI V2DI (V1TI "TARGET_VXE") V1DF
> +V2DF (V1SF "TARGET_VXE") (V2SF "TARGET_VXE")
> +(V4SF "TARGET_VXE") (V1TF "TARGET_VXE")
> +(TF "TARGET_VXE")])
> +
>  ; FP vector modes directly supported by the HW.  This does not include
>  ; vector modes using only part of a vector register and should be used
>  ; for instructions which might trigger IEEE exceptions.
> diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
> index d5130799804..98ee08b2683 100644
> --- a/gcc/config/s390/vx-builtins.md
> +++ b/gcc/config/s390/vx-builtins.md
> @@ -517,12 +517,12 @@
>  ; swapped in s390-c.cc when we get here.
>  
>  (define_insn "vsel"
> -  [(set (match_operand:V_HW  0 "register_operand" "=v")
> - (ior:V_HW
> -  (and:V_HW (match_operand:V_HW   1 "register_operand"  "v")
> -(match_operand:V_HW   3 "register_operand"  "v"))
> -  (and:V_HW (not:V_HW (match_dup 3))
> -(match_operand:V_HW   2 "register_operand"  "v"]
> +  [(set (match_operand:V_HW_FT   0 "register_operand" "=v")
> + (ior:V_HW_FT
> +  (and:V_HW_FT (match_operand:V_HW_FT 1 "register_operand"  "v")
> +   (match_operand:V_HW_FT 3 "register_operand"  "v"))
> +  (and:V_HW_FT (not:V_HW_FT (match_dup 3))
> +   (match_operand:V_HW_FT 2 "register_operand"  "v"]
>"TARGET_VX"
>"vsel\t%v0,%1,%2,%3"
>[(set_attr "op_type" "VRR")])



[PATCH 1/1] PR 106101: IBM zSystems: Fix strict_low_part problem

2022-07-29 Thread Andreas Krebbel via Gcc-patches
This avoids generating illegal (strict_low_part (reg ...)) RTXs. This
required two changes:

1. Do not use gen_lowpart to generate the inner expression of a
STRICT_LOW_PART.  gen_lowpart might fold the SUBREG either because
there is already a paradoxical subreg or because it can directly be
applied to the register. A new wrapper function makes sure that we
always end up having an actual SUBREG.

2. Change the movstrict patterns to enforce a SUBREG as inner operand
of the STRICT_LOW_PARTs.  The new predicate introduced for the
destination operand requires a SUBREG expression with a
register_operand as inner operand.  However, since reload strips away
the majority of the SUBREGs we have to accept single registers as well
once we reach reload.

Bootstrapped and regression tested on IBM zSystems 64 bit.

gcc/ChangeLog:

PR target/106101
* config/s390/predicates.md (subreg_register_operand): New
predicate.
* config/s390/s390-protos.h (s390_gen_lowpart_subreg): New
function prototype.
* config/s390/s390.cc (s390_gen_lowpart_subreg): New function.
(s390_expand_insv): Use s390_gen_lowpart_subreg instead of
gen_lowpart.
* config/s390/s390.md ("*get_tp_64", "*zero_extendhisi2_31")
("*zero_extendqisi2_31", "*zero_extendqihi2_31"): Likewise.
("movstrictqi", "movstricthi", "movstrictsi"): Use the
subreg_register_operand predicate instead of register_operand.

gcc/testsuite/ChangeLog:

PR target/106101
* gcc.c-torture/compile/pr106101.c: New test.
---
 gcc/config/s390/predicates.md | 12 
 gcc/config/s390/s390-protos.h |  1 +
 gcc/config/s390/s390.cc   | 27 +++-
 gcc/config/s390/s390.md   | 36 +--
 .../gcc.c-torture/compile/pr106101.c  | 62 +++
 5 files changed, 116 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106101.c

diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index 33194d3f3d6..430cf6edfd6 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -594,3 +594,15 @@
 (define_predicate "addv_const_operand"
   (and (match_code "const_int")
(match_test "INTVAL (op) >= -32768 && INTVAL (op) <= 32767")))
+
+; Match (subreg (reg ...)) operands.
+; Used for movstrict destination operands
+; When replacing pseudos with hard regs reload strips away the
+; subregs. Accept also plain registers then to prevent the insn from
+; becoming unrecognizable.
+(define_predicate "subreg_register_operand"
+  (ior (and (match_code "subreg")
+   (match_test "register_operand (SUBREG_REG (op), GET_MODE 
(SUBREG_REG (op)))"))
+   (and (match_code "reg")
+   (match_test "reload_completed || reload_in_progress")
+   (match_test "register_operand (op, GET_MODE (op))"
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index fd4acaae44a..765d843a418 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -50,6 +50,7 @@ extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
 extern bool s390_return_addr_from_memory(void);
+extern rtx s390_gen_lowpart_subreg (machine_mode, rtx);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
 extern tree s390_valid_target_attribute_tree (tree args,
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 5aaf76a9490..5e06bf9350c 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -458,6 +458,31 @@ s390_return_addr_from_memory ()
   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
 }
 
+/* Generate a SUBREG for the MODE lowpart of EXPR.
+
+   In contrast to gen_lowpart it will always return a SUBREG
+   expression.  This is useful to generate STRICT_LOW_PART
+   expressions.  */
+rtx
+s390_gen_lowpart_subreg (machine_mode mode, rtx expr)
+{
+  rtx lowpart = gen_lowpart (mode, expr);
+
+  /* There might be no SUBREG in case it could be applied to the hard
+ REG rtx or it could be folded with a paradoxical subreg.  Bring
+ it back.  */
+  if (!SUBREG_P (lowpart))
+{
+  machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode;
+  gcc_assert (REG_P (lowpart));
+  lowpart = gen_lowpart_SUBREG (mode,
+   gen_rtx_REG (reg_mode,
+REGNO (lowpart)));
+}
+
+  return lowpart;
+}
+
 /* Return nonzero if it's OK to use fused multiply-add for MODE.  */
 bool
 s390_fma_allowed_p (machine_mode mode)
@@ -6520,7 +6545,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
   /* Emit a strict_low_part pattern if possible.  */
   if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
{
- rtx 

Re: CFI for saved argument registers

2022-05-31 Thread Andreas Krebbel via Gcc
On 5/16/22 08:29, Andreas Krebbel via Gcc wrote:
> Hi,
> 
> I'm trying to provide a simple dwarf unwinder with access to the
> argument register content. The goal is to make this information
> available for optimized code without having to access debug
> information for things like call site args. The extra overhead
> of saving the values to the stack is acceptable in that case.
> 
> For that purpose I save the argument registers to the stack as we
> would do for a variable argument lists. But this time I also provide
> the CFI to allow the unwinder to locate the save slots.  Since I never
> actually intend to restore the content there is no matching
> cfi_restore for the cfi_offset and dwarf2cfi complains about the
> traces being inconsistent because of that. I couldn't find a way to
> prevent this.
> 
> The only way I see right now is adding a new reg note to invalidate
> the save information in the reg_save array in dwarf2cfi.
> 
> Would this be acceptable? Is there perhaps an easier way to achieve that?

Attached is a small patch adding a new reg note REG_CFA_NORESTORE.

I would attach that new note to the register save insn to indicate that this 
register will not be
restored. I'll have to emit the REG_CFA_OFFSET note as well then.

An insn saving r2-r6 without restoring them would look like this then:

(insn/f 31 21 32 2 (parallel [
(set/f (mem/c:DI (plus:DI (reg/f:DI 15 %r15)
(const_int 16 [0x10])) [4  S8 A8])
(reg:DI 2 %r2))
(set/f (mem/c:DI (plus:DI (reg/f:DI 15 %r15)
(const_int 24 [0x18])) [4  S8 A8])
(reg:DI 3 %r3))
(set/f (mem/c:DI (plus:DI (reg/f:DI 15 %r15)
(const_int 32 [0x20])) [4  S8 A8])
(reg:DI 4 %r4))
(set/f (mem/c:DI (plus:DI (reg/f:DI 15 %r15)
(const_int 40 [0x28])) [4  S8 A8])
(reg:DI 5 %r5))
(set/f (mem/c:DI (plus:DI (reg/f:DI 15 %r15)
(const_int 48 [0x30])) [4  S8 A8])
(reg:DI 6 %r6))
]) "/home/andreas/preserveargs/reduce2/t.c":2:51 -1
 (expr_list:REG_CFA_OFFSET (set (mem/c:DI (plus:DI (reg/f:DI 15 %r15)
(const_int 48 [0x30])) [4  S8 A8])
(reg:DI 6 %r6))
(expr_list:REG_CFA_NO_RESTORE (reg:DI 6 %r6)
(expr_list:REG_CFA_OFFSET (set (mem/c:DI (plus:DI (reg/f:DI 15 %r15)
(const_int 40 [0x28])) [4  S8 A8])
(reg:DI 5 %r5))
(expr_list:REG_CFA_NO_RESTORE (reg:DI 5 %r5)
(expr_list:REG_CFA_OFFSET (set (mem/c:DI (plus:DI (reg/f:DI 
15 %r15)
(const_int 32 [0x20])) [4  S8 A8])
(reg:DI 4 %r4))
(expr_list:REG_CFA_NO_RESTORE (reg:DI 4 %r4)
(expr_list:REG_CFA_OFFSET (set (mem/c:DI (plus:DI 
(reg/f:DI 15 %r15)
(const_int 24 [0x18])) [4  S8 A8])
(reg:DI 3 %r3))
(expr_list:REG_CFA_NO_RESTORE (reg:DI 3 %r3)
(expr_list:REG_CFA_OFFSET (set (mem/c:DI 
(plus:DI (reg/f:DI 15 %r15)
(const_int 16 [0x10])) [4  
S8 A8])
(reg:DI 2 %r2))
(expr_list:REG_CFA_NO_RESTORE (reg:DI 2 
%r2)
(nil

diff --git a/gcc/dwarf2cfi.c b/gcc/dwarf2cfi.c
index 362ff3fdac27..2cbc2465c3a7 100644
--- a/gcc/dwarf2cfi.c
+++ b/gcc/dwarf2cfi.c
@@ -1346,7 +1346,7 @@ dwarf2out_frame_debug_cfa_val_expression (rtx set)
 /* A subroutine of dwarf2out_frame_debug, process a REG_CFA_RESTORE note.  */

 static void
-dwarf2out_frame_debug_cfa_restore (rtx reg)
+dwarf2out_frame_debug_cfa_restore (rtx reg, bool emit_cfi)
 {
   gcc_assert (REG_P (reg));

@@ -1354,7 +1354,8 @@ dwarf2out_frame_debug_cfa_restore (rtx reg)
   if (!span)
 {
   unsigned int regno = dwf_regno (reg);
-  add_cfi_restore (regno);
+  if (emit_cfi)
+   add_cfi_restore (regno);
   update_row_reg_save (cur_row, regno, NULL);
 }
   else
@@ -1369,7 +1370,8 @@ dwarf2out_frame_debug_cfa_restore (rtx reg)
  reg = XVECEXP (span, 0, par_index);
  gcc_assert (REG_P (reg));
  unsigned int regno = dwf_regno (reg);
- add_cfi_restore (regno);
+ if (emit_cfi)
+   add_cfi_restore (regno);
  update_row_reg_save (cur_row, regno, NULL);
}
 }
@@ -2155,6 +2157,7 @@ dwarf2out_frame_debug (rtx_insn *insn)
break;

   case REG_CFA_RESTORE:
+  case REG_CFA_NO_RESTORE:
n = XEXP (note, 0);
if (n == NULL)
  {
@@ -2163,7 +2166,7 @@ dwarf2out_frame_debug (rtx_in

Re: CFI for saved argument registers

2022-05-17 Thread Andreas Krebbel via Gcc
On 5/16/22 16:39, Andreas Schwab wrote:
> On Mai 16 2022, Andreas Krebbel via Gcc wrote:
> 
>> The only way I see right now is adding a new reg note to invalidate
>> the save information in the reg_save array in dwarf2cfi.
>>
>> Would this be acceptable? Is there perhaps an easier way to achieve that?
> 
> Doesn't it work to use .cfi_remember_state/.cfi_restore_state?
> 
This might work but I think also for that I would have to implement means to 
trigger it explicitely
with an RTX somehow.

Also I rather would avoid emitting special CFI for that purpose. I think the 
CFI I currently have
(with more .cfi_offset's than .cfi_restore's) is valid. I only have to convince 
the validation step
in dwarf2cfi to accept this. Therefore the idea was to introduce a new regnote 
to say that at a
certain point it is intentional that there is no .cfi_restore for a register. 
The action in
dwarf2cfi should only be to remove the reg from reg_save and be done with it.

Bye,

Andreas



CFI for saved argument registers

2022-05-16 Thread Andreas Krebbel via Gcc
Hi,

I'm trying to provide a simple dwarf unwinder with access to the
argument register content. The goal is to make this information
available for optimized code without having to access debug
information for things like call site args. The extra overhead
of saving the values to the stack is acceptable in that case.

For that purpose I save the argument registers to the stack as we
would do for a variable argument lists. But this time I also provide
the CFI to allow the unwinder to locate the save slots.  Since I never
actually intend to restore the content there is no matching
cfi_restore for the cfi_offset and dwarf2cfi complains about the
traces being inconsistent because of that. I couldn't find a way to
prevent this.

The only way I see right now is adding a new reg note to invalidate
the save information in the reg_save array in dwarf2cfi.

Would this be acceptable? Is there perhaps an easier way to achieve that?

Bye,

Andreas


Re: GCC 11.2.1 Status Report (2022-04-13), branch frozen for release

2022-04-14 Thread Andreas Krebbel via Gcc
On 4/13/22 09:30, Richard Biener via Gcc wrote:
> 
> Status
> ==
> 
> The gcc-11 branch is now frozen in preparation for a GCC 11.3 release
> candidate and the GCC 11.3 release next week.  All changes now require
> release manager approval.

Hi,

I would like to push:

https://gcc.gnu.org/pipermail/gcc-patches/2022-April/593103.html

to GCC 11 branch before 11.3 release. Ok?

Bye,

Andreas


Re: GCC 11.2.1 Status Report (2022-04-13), branch frozen for release

2022-04-14 Thread Andreas Krebbel via Gcc-patches
On 4/13/22 09:30, Richard Biener via Gcc wrote:
> 
> Status
> ==
> 
> The gcc-11 branch is now frozen in preparation for a GCC 11.3 release
> candidate and the GCC 11.3 release next week.  All changes now require
> release manager approval.

Hi,

I would like to push:

https://gcc.gnu.org/pipermail/gcc-patches/2022-April/593103.html

to GCC 11 branch before 11.3 release. Ok?

Bye,

Andreas


Re: [PATCH] s390: Add scheduler description for z16

2022-04-14 Thread Andreas Krebbel via Gcc-patches
On 4/13/22 12:23, Robin Dapp wrote:
> Hi,
> 
> this patch adds the scheduler description for z16.  Bootstrapped and
> regtested with --with-arch=z16.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_get_sched_attrmask): Add z16.
>   (s390_get_unit_mask): Likewise.
>   (s390_is_fpd): Likewise.
>   (s390_is_fxd): Likewise.
>   * config/s390/s390.md 
> (z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15):
>   Add z16.
>   (z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16):
>   Likewise.
>   * config/s390/3931.md: New file.

Ok. Thanks!

Andreas




Re: [PATCH] testsuite/s390: Silence warning in pr80725.c

2022-04-14 Thread Andreas Krebbel via Gcc-patches
On 4/13/22 09:35, Robin Dapp wrote:
> Hi,
> 
> this test case checks that we do not ICE but FAILs because of
> -Wint-to-pointer-cast.  Silence this warning.
> 
> Is it OK?

Ok. Thanks!

Andreas



Re: [PATCH] testsuite: Skip pr105250.c for powerpc and s390 [PR105266]

2022-04-14 Thread Andreas Krebbel via Gcc-patches
On 4/14/22 05:10, Kewen.Lin wrote:
> Hi,
> 
> The test case pr105250.c is like its related pr105140.c, which
> suffers the error with message like "{AltiVec,vector} argument
> passed to unprototyped" on powerpc and s390.  So like commits
> r12-8025 and r12-8039, this fix is to add the dg-skip-if for
> powerpc*-*-* and s390*-*-*.
> 
> Tested on powerpc64le-linux-gnu P9 and it should work on s390
> as its similar PR105147.
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -
> 
> gcc/testsuite/ChangeLog:
> 
>   PR testsuite/105266
>   * gcc.dg/pr105250.c: Skip for powerpc*-*-* and s390*-*-*.

Ok for s390. Thanks!

Andreas


[Committed] IBM zSystems: Add support for z16 as CPU name.

2022-04-12 Thread Andreas Krebbel via Gcc-patches
So far z16 was identified as arch14. After the machine has been
announced we can now add the real name.

gcc/ChangeLog:

* common/config/s390/s390-common.cc: Rename PF_ARCH14 to PF_Z16.
* config.gcc: Add z16 as march/mtune switch.
* config/s390/driver-native.cc (s390_host_detect_local_cpu):
Recognize z16 with -march=native.
* config/s390/s390-opts.h (enum processor_type): Rename
PROCESSOR_ARCH14 to PROCESSOR_3931_Z16.
* config/s390/s390.cc (PROCESSOR_ARCH14): Rename to ...
(PROCESSOR_3931_Z16): ... throughout the file.
(s390_processor processor_table): Add z16 as cpu string.
* config/s390/s390.h (enum processor_flags): Rename PF_ARCH14 to
PF_Z16.
(TARGET_CPU_ARCH14): Rename to ...
(TARGET_CPU_Z16): ... this.
(TARGET_CPU_ARCH14_P): Rename to ...
(TARGET_CPU_Z16_P): ... this.
(TARGET_ARCH14): Rename to ...
(TARGET_Z16): ... this.
(TARGET_ARCH14_P): Rename to ...
(TARGET_Z16_P): ... this.
* config/s390/s390.md (cpu_facility): Rename arch14 to z16 and
check TARGET_Z16 instead of TARGET_ARCH14.
* config/s390/s390.opt: Add z16 to processor_type.
* doc/invoke.texi: Document z16 and arch14.
---
 gcc/common/config/s390/s390-common.cc |  4 ++--
 gcc/config.gcc|  2 +-
 gcc/config/s390/driver-native.cc  |  6 +-
 gcc/config/s390/s390-opts.h   |  2 +-
 gcc/config/s390/s390.cc   | 14 --
 gcc/config/s390/s390.h| 16 
 gcc/config/s390/s390.md   |  6 +++---
 gcc/config/s390/s390.opt  |  5 -
 gcc/doc/invoke.texi   |  3 ++-
 9 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/gcc/common/config/s390/s390-common.cc 
b/gcc/common/config/s390/s390-common.cc
index caec2f14c6c..72a5ef47eaa 100644
--- a/gcc/common/config/s390/s390-common.cc
+++ b/gcc/common/config/s390/s390-common.cc
@@ -50,10 +50,10 @@ EXPORTED_CONST int processor_flags_table[] =
 /* z15 */PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
 | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX
 | PF_Z13 | PF_VX | PF_VXE | PF_Z14 | PF_VXE2 | PF_Z15,
-/* arch14 */ PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
+/* z16 */PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
 | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX
 | PF_Z13 | PF_VX | PF_VXE | PF_Z14 | PF_VXE2 | PF_Z15
-| PF_NNPA | PF_ARCH14
+| PF_NNPA | PF_Z16
   };
 
 /* Change optimizations to be performed, depending on the
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 48a5bbcf787..c5064dd3766 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -5532,7 +5532,7 @@ case "${target}" in
for which in arch tune; do
eval "val=\$with_$which"
case ${val} in
-   "" | native | z900 | z990 | z9-109 | z9-ec | z10 | z196 
| zEC12 | z13 | z14 | z15 | arch5 | arch6 | arch7 | arch8 | arch9 | arch10 | 
arch11 | arch12 | arch13 | arch14 )
+   "" | native | z900 | z990 | z9-109 | z9-ec | z10 | z196 
| zEC12 | z13 | z14 | z15 | z16 | arch5 | arch6 | arch7 | arch8 | arch9 | 
arch10 | arch11 | arch12 | arch13 | arch14 )
# OK
;;
*)
diff --git a/gcc/config/s390/driver-native.cc b/gcc/config/s390/driver-native.cc
index 48524c49251..b5eb222872d 100644
--- a/gcc/config/s390/driver-native.cc
+++ b/gcc/config/s390/driver-native.cc
@@ -123,8 +123,12 @@ s390_host_detect_local_cpu (int argc, const char **argv)
case 0x8562:
  cpu = "z15";
  break;
+   case 0x3931:
+   case 0x3932:
+ cpu = "z16";
+ break;
default:
- cpu = "arch14";
+ cpu = "z16";
  break;
}
}
diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h
index 1ec84631a5f..4ef82ac5d34 100644
--- a/gcc/config/s390/s390-opts.h
+++ b/gcc/config/s390/s390-opts.h
@@ -38,7 +38,7 @@ enum processor_type
   PROCESSOR_2964_Z13,
   PROCESSOR_3906_Z14,
   PROCESSOR_8561_Z15,
-  PROCESSOR_ARCH14,
+  PROCESSOR_3931_Z16,
   PROCESSOR_NATIVE,
   PROCESSOR_max
 };
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d2af6d8813d..1342a2e7db0 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -337,7 +337,7 @@ const struct s390_processor processor_table[] =
   { "z13","z13",PROCESSOR_2964_Z13,_cost,  11 },
   { "z14","arch12", PROCESSOR_3906_Z14,_cost,  12 },
   { "z15","arch13", PROCESSOR_8561_Z15,_cost,  13 },
-  { "arch14", "arch14", PROCESSOR_ARCH14,  _cost,  14 },
+  { "z16","arch14", PROCESSOR_3931_Z16,_cost,  14 },
   { 

[PATCH] v2 PR102024 - IBM Z: Add psabi diagnostics

2022-04-11 Thread Andreas Krebbel via Gcc-patches
v2:

- Remove redundant num_zero_width_bf_seen and num_fields_seen
  tracking. (Thanks Stefan Schulze-Frielinghaus)

Re-tested with testsuite and ABI tests.



For IBM Z in particular there is a problem with structs like:

struct A { float a; int :0; };

Our ABI document allows passing a struct in an FPR only if it has
exactly one member. On the other hand it says that structs of 1,2,4,8
bytes are passed in a GPR. So this struct is expected to be passed in
a GPR. Since we don't return structs in registers (regardless of the
number of members) it is always returned in memory.

Situation is as follows:

All compiler versions tested return it in memory - as expected.

gcc 11, gcc 12, g++ 12, and clang 13 pass it in a GPR - as expected.

g++ 11 as well as clang++ 13 pass in an FPR

For IBM Z we stick to the current GCC 12 behavior, i.e. zero-width
bitfields are NOT ignored.  A struct as above will be passed in a
GPR. Rational behind this is that not affecting the C ABI is more
important here.

A patch for clang is in progress: https://reviews.llvm.org/D122388

In addition to the usual regression test I ran the compat and
struct-layout-1 testsuites comparing the compiler before and after the
patch.

gcc/ChangeLog:
PR target/102024
* config/s390/s390-protos.h (s390_function_arg_vector): Remove
prototype.
* config/s390/s390.cc (s390_single_field_struct_p): New function.
(s390_function_arg_vector): Invoke s390_single_field_struct_p.
(s390_function_arg_float): Likewise.

gcc/testsuite/ChangeLog:
PR target/102024
* g++.target/s390/pr102024-1.C: New test.
* g++.target/s390/pr102024-2.C: New test.
* g++.target/s390/pr102024-3.C: New test.
* g++.target/s390/pr102024-4.C: New test.
* g++.target/s390/pr102024-5.C: New test.
* g++.target/s390/pr102024-6.C: New test.
---
 gcc/config/s390/s390-protos.h  |   1 -
 gcc/config/s390/s390.cc| 208 +++--
 gcc/testsuite/g++.target/s390/pr102024-1.C |  12 ++
 gcc/testsuite/g++.target/s390/pr102024-2.C |  14 ++
 gcc/testsuite/g++.target/s390/pr102024-3.C |  15 ++
 gcc/testsuite/g++.target/s390/pr102024-4.C |  15 ++
 gcc/testsuite/g++.target/s390/pr102024-5.C |  14 ++
 gcc/testsuite/g++.target/s390/pr102024-6.C |  12 ++
 8 files changed, 187 insertions(+), 104 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-1.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-2.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-3.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-4.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-5.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-6.C

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index e6251595870..fd4acaae44a 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -49,7 +49,6 @@ extern void s390_function_profiler (FILE *, int);
 extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
-extern bool s390_function_arg_vector (machine_mode, const_tree);
 extern bool s390_return_addr_from_memory(void);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d2af6d8813d..c091d2a692a 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -12148,29 +12148,26 @@ s390_function_arg_size (machine_mode mode, const_tree 
type)
   gcc_unreachable ();
 }
 
-/* Return true if a function argument of type TYPE and mode MODE
-   is to be passed in a vector register, if available.  */
-
-bool
-s390_function_arg_vector (machine_mode mode, const_tree type)
+/* Return true if a variable of TYPE should be passed as single value
+   with type CODE. If STRICT_SIZE_CHECK_P is true the sizes of the
+   record type and the field type must match.
+
+   The ABI says that record types with a single member are treated
+   just like that member would be.  This function is a helper to
+   detect such cases.  The function also produces the proper
+   diagnostics for cases where the outcome might be different
+   depending on the GCC version.  */
+static bool
+s390_single_field_struct_p (enum tree_code code, const_tree type,
+   bool strict_size_check_p)
 {
-  if (!TARGET_VX_ABI)
-return false;
-
-  if (s390_function_arg_size (mode, type) > 16)
-return false;
-
-  /* No type info available for some library calls ...  */
-  if (!type)
-return VECTOR_MODE_P (mode);
-
-  /* The ABI says that record types with a single member are treated
- just like that member would be.  */
   int empty_base_seen = 0;
+  bool zero_width_bf_skipped_p = false;
   const_tree orig_type = type;
+
   while (TREE_CODE (type) == RECORD_TYPE)
 {
-  tree field, 

Re: [PATCH] rs6000/testsuite: Skip pr105140.c

2022-04-06 Thread Andreas Krebbel via Gcc-patches
On 4/6/22 17:32, Segher Boessenkool wrote:
> This test fails with error "AltiVec argument passed to unprototyped
> function", but the code (in rs6000.c:invalid_arg_for_unprototyped_fn,
> from 2005) actually tests for any vector type argument.  It also does
> not fail on Darwin, not reflected here though.
> 
> Andreas, s390 has this same hook code, you may need to do the same?

Yes, thanks for the pointer. I've just committed the following:

IBM zSystems/testsuite: PR105147: Skip pr105140.c

pr105140.c fails on IBM zSystems with "vector argument passed to
unprototyped function".  s390_invalid_arg_for_unprototyped_fn in
s390.cc is triggered by that.

gcc/testsuite/ChangeLog:

PR target/105147
* gcc.dg/pr105140.c: Skip for s390*-*-*.
---
 gcc/testsuite/gcc.dg/pr105140.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr105140.c b/gcc/testsuite/gcc.dg/pr105140.c
index da34e7ad656..7d30985e850 100644
--- a/gcc/testsuite/gcc.dg/pr105140.c
+++ b/gcc/testsuite/gcc.dg/pr105140.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-Os -w -Wno-psabi" } */
-/* { dg-skip-if "PR105147" { powerpc*-*-* } } */
+/* { dg-skip-if "PR105147" { powerpc*-*-* s390*-*-* } } */

 typedef char __attribute__((__vector_size__ (16 * sizeof (char U;
 typedef int __attribute__((__vector_size__ (16 * sizeof (int V;


Re: [PATCH] testsuite/s390: Adapt test expections.

2022-04-04 Thread Andreas Krebbel via Gcc-patches
On 4/4/22 13:52, Robin Dapp wrote:
> Hi,
> 
> some tests expect a convert instruction but nowadays the conversion is
> already done at compile time.  This results in a literal-pool load.
> Change the tests accordingly.
> 
> OK for trunk?
> 
> Regards
>  Robin
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/zvector/vec-double-compile.c: Expect vl
> instead of vc*.
>   * gcc.target/s390/zvector/vec-float-compile.c: Dito.
>   * gcc.target/s390/zvector/vec-signed-compile.c: Dito.
>   * gcc.target/s390/zvector/vec-unsigned-compile.c: Dito.

I've seen Mike's comment but I'm not opposed to checking it in that way. These 
kind of comments have
probably saved me a few hours of bisecting already. Next time you might 
consider moving it to the
commit message instead.

Ok. Thanks!

Bye,

Andreas


Re: [PATCH] testsuite/s390: Change nle -> h in ifcvt tests.

2022-04-04 Thread Andreas Krebbel via Gcc-patches
On 4/4/22 13:51, Robin Dapp wrote:
> Hi,
> 
> we have been emitting the "higher" variantes instead of the "not less or
> equal" ones for a while.  Change the test expectations accordingly.
> 
> OK for trunk?
> 
> Regards
>  Robin
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/ifcvt-two-insns-bool.c: Change nle to h.
>   * gcc.target/s390/ifcvt-two-insns-int.c: Dito.
>   * gcc.target/s390/ifcvt-two-insns-long.c: Dito.

Ok. Thanks!

Andreas


Re: [PATCH] testsuite: Add -fno-tree-loop-distribute-patterns for s390.

2022-04-04 Thread Andreas Krebbel via Gcc-patches
On 4/4/22 13:51, Robin Dapp wrote:
> Hi,
> 
> in gcc.dg/Wuse-after-free-2.c we try to detect a use-after-free.  On
> s390 the test's while loop is converted into a rawmemchr builtin making
> it impossible to determine that the pointers *p and *q are related.
> 
> Therefore, disable the tree loop distribute patterns pass on s390 for
> this test.
> 
> OK for trunk?
> 
> Regards
>  Robin
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/Wuse-after-free-2.c:
>   Add -fno-tree-loop-distribute-patterns for s390*.

Ok. Thanks!

Andreas


[PATCH] PR102024 - IBM Z: Add psabi diagnostics

2022-03-25 Thread Andreas Krebbel via Gcc-patches
For IBM Z in particular there is a problem with structs like:

struct A { float a; int :0; };

Our ABI document allows passing a struct in an FPR only if it has
exactly one member. On the other hand it says that structs of 1,2,4,8
bytes are passed in a GPR. So this struct is expected to be passed in
a GPR. Since we don't return structs in registers (regardless of the
number of members) it is always returned in memory.

Situation is as follows:

All compiler versions tested return it in memory - as expected.

gcc 11, gcc 12, g++ 12, and clang 13 pass it in a GPR - as expected.

g++ 11 as well as clang++ 13 pass in an FPR

For IBM Z we stick to the current GCC 12 behavior, i.e. zero-width
bitfields are NOT ignored.  A struct as above will be passed in a
GPR. Rational behind this is that not affecting the C ABI is more
important here.

A patch for clang is in progress: https://reviews.llvm.org/D122388

In addition to the usual regression test I ran the compat and
struct-layout-1 testsuites comparing the compiler before and after the
patch.

gcc/ChangeLog:
PR target/102024
* config/s390/s390-protos.h (s390_function_arg_vector): Remove
prototype.
* config/s390/s390.cc (s390_single_field_struct_p): New function.
(s390_function_arg_vector): Invoke s390_single_field_struct_p.
(s390_function_arg_float): Likewise.

gcc/testsuite/ChangeLog:
PR target/102024
* g++.target/s390/pr102024-1.C: New test.
* g++.target/s390/pr102024-2.C: New test.
* g++.target/s390/pr102024-3.C: New test.
* g++.target/s390/pr102024-4.C: New test.
* g++.target/s390/pr102024-5.C: New test.
* g++.target/s390/pr102024-6.C: New test.
---
 gcc/config/s390/s390-protos.h  |   1 -
 gcc/config/s390/s390.cc| 212 +++--
 gcc/testsuite/g++.target/s390/pr102024-1.C |  12 ++
 gcc/testsuite/g++.target/s390/pr102024-2.C |  14 ++
 gcc/testsuite/g++.target/s390/pr102024-3.C |  15 ++
 gcc/testsuite/g++.target/s390/pr102024-4.C |  15 ++
 gcc/testsuite/g++.target/s390/pr102024-5.C |  14 ++
 gcc/testsuite/g++.target/s390/pr102024-6.C |  12 ++
 8 files changed, 195 insertions(+), 100 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-1.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-2.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-3.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-4.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-5.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-6.C

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index e6251595870..fd4acaae44a 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -49,7 +49,6 @@ extern void s390_function_profiler (FILE *, int);
 extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
-extern bool s390_function_arg_vector (machine_mode, const_tree);
 extern bool s390_return_addr_from_memory(void);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d2af6d8813d..6cfa586b9cd 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -12148,29 +12148,29 @@ s390_function_arg_size (machine_mode mode, const_tree 
type)
   gcc_unreachable ();
 }
 
-/* Return true if a function argument of type TYPE and mode MODE
-   is to be passed in a vector register, if available.  */
-
-bool
-s390_function_arg_vector (machine_mode mode, const_tree type)
+/* Return true if a variable of TYPE should be passed as single value
+   with type CODE. If STRICT_SIZE_CHECK_P is true the sizes of the
+   record type and the field type must match.
+
+   The ABI says that record types with a single member are treated
+   just like that member would be.  This function is a helper to
+   detect such cases.  The function also produces the proper
+   diagnostics for cases where the outcome might be different
+   depending on the GCC version.  */
+static bool
+s390_single_field_struct_p (enum tree_code code, const_tree type,
+   bool strict_size_check_p)
 {
-  if (!TARGET_VX_ABI)
-return false;
-
-  if (s390_function_arg_size (mode, type) > 16)
-return false;
-
-  /* No type info available for some library calls ...  */
-  if (!type)
-return VECTOR_MODE_P (mode);
-
-  /* The ABI says that record types with a single member are treated
- just like that member would be.  */
   int empty_base_seen = 0;
+  bool zero_width_bf_seen_p = false;
   const_tree orig_type = type;
+  bool single_p = true;
+
   while (TREE_CODE (type) == RECORD_TYPE)
 {
-  tree field, single = NULL_TREE;
+  tree field, single_type = NULL_TREE;
+  int num_zero_width_bf_seen = 0;
+  int num_fields_seen = 0;
 

Re: Urgent GCC ABI backend maintainer ping re zero width bitfield passing (PR102024)

2022-03-21 Thread Andreas Krebbel via Gcc
On 3/21/22 17:28, Jakub Jelinek wrote:
> Hi!
> 
> I'd like to ping port maintainers about
> https://gcc.gnu.org/PR102024
> 
> As I wrote, the int : 0 bitfields are present early in the TYPE_FIELDS
> during structure layout and intentionally affect the layout.
> We had some code to remove those from TYPE_FIELDS chains in the C and C++
> FEs, but for C that removal never worked correctly (never removed any)
> and the non-working removal was eventually removed.  For C++ it
> didn't initially work either, but for GCC 4.5 that was fixed in PR42217,
> so on various backends where TYPE_FIELDS are analyzed for how to pass or
> return certain aggregates starting with GCC 4.5 the C++ and C ABI diverged.
> In August, I have removed that zero width bitfield removal from C++ FE
> as the FE needs to take those bitfields into account later on as well.
> 
> The x86_64 backend was changed in r12-6418-g3159da6c46 to match recently
> approved clarification of the x86-64 psABI and the zero width bitfields
> are now ignored for both C and C++ (so an ABI change for C from 11.x and
> earlier to 12.x and for C++ from GCC 4.4 and earlier to 4.5 and later)
> with a -Wpsabi diagnostics about it.
> 
> The rs6000 backend was changed in r12-3843-g16e3d6b8b2 to never ignore
> those bitfields (so no ABI change for C, for C++ ...-4.4 and 12+ are
> ABI incompatible with 4.5 through 11.x; note, it affects I think just
> ppc64le ABI, which didn't really exist before 4.8 I think) and diagnostics
> has been added about the ABI change.
> 
> As I wrote in the PR, I believe most of the GCC backends are unaffected,
> x86_64 and rs6000 are handled, riscv was changed already in GCC 10 to
> ignore those bitfields and emit a -Wpsabi diagnostics.
> 
> I can see code-generation differences certainly on armv7hl and aarch64.
> ia64, iq2000, mips, s390 and sparc are maybe affected, haven't checked.

On s390 we walk the field chain to figure out whether it is a struct with a 
single floating point
field. Consequently I see different code being generated by g++ for { float a; 
int :0; } which is
passed in a floating point register with g++ 11 and in memory with g++ 12.

I'm not sure what is best for our target. I'll try to come back with an answer 
soon.

Bye,

Andreas

> 
> Simple testcase could be e.g.:
> struct S { float a; int : 0; float b; };
> 
> __attribute__((noipa)) struct S
> foo (struct S x)
> {
>   return x;
> }
> 
> void
> bar (void)
> {
>   struct S s = { 0.0f, 0.0f };
>   foo (s);
> }
> where one should look at the argument and return value passing
> in GCC 11 C, GCC 11 C++, GCC trunk C, GCC trunk C++.
> 
> The FE now sets bits on the bitfields that make it possible to
> differentiate between the different cases, so each port may decide to do
> one of the 3 things:
> 1) keep ABI exactly compatible between GCC 11 and 12, which means
>C and C++ will continue to be incompatible
> 2) keep the G++ 4.5 through 11 ABI of ignoring zero width bitfields and
>change C ABI
> 3) keep the GCC < 11 C ABI of not ignoring zero width bitfields and
>change the C++ ABI (which means restoring ABI compatibility in
>this regard between G++ 4.4 and earlier with G++ 12 and later)
> Furthermore, it would be very nice to emit -Wpsabi diagnostics for the
> changed ABI unless 1) is decided.
> One should take into account psABI as well as what other compilers do.
> 
> The current state of GCC trunk is that 3) is done except that x86_64
> did 2) and riscv did 2 already for GCC 10 and all of x86_64, riscv and
> rs6000 emit -Wpsabi diagnostics (though I think rs6000 doesn't guard
> it with -Wpsabi).
> 
> I can help with the backend implementations if needed, but I can't
> decide which possibility you want to choose for each backend.
> It would be really nice to decide about this soon, because changing
> the ABI in GCC 12 only to change it again in GCC 13 doesn't look much
> desirable and even if 3) is the choice, it is really nice to have
> some diagnostics about ABI changes.
> 
> Thanks
> 
>   Jakub
> 



Re: [PATCH] s390: Fix up *cmp_and_trap_unsigned_int constraints [PR104775]

2022-03-07 Thread Andreas Krebbel via Gcc-patches
On 3/5/22 09:33, Jakub Jelinek wrote:
> Hi!
> 
> The following testcase fails to assemble due to clgte %r6,0(%r1,%r10)
> insn not being accepted by assembler.
> My rough understanding is that in the RSY-b insn format the spot
> in other formats used for index registers is used instead for M3 what
> kind of comparison it is, so this patch follows what other similar
> instructions use for constraint (i.e. one without index register).
> 
> Bootstrapped on s390x-linux, regtest there still pending, ok for
> trunk if it passes it?
> 
> 2022-03-05  Jakub Jelinek  
> 
>   PR target/104775
>   * config/s390/s390.md (*cmp_and_trap_unsigned_int): Use
>   S constraint instead of T in the last alternative.
> 
>   * gcc.target/s390/pr104775.c: New test.

Ok. Thanks for the fix!

Bye,

Andreas


Re: [PATCH] s390: Change SET rtx_cost handling.

2022-02-25 Thread Andreas Krebbel via Gcc-patches
On 2/25/22 12:38, Robin Dapp wrote:
> Hi,
> 
> the IF_THEN_ELSE detection currently prevents us from properly costing
> register-register moves which causes the lower-subreg pass to assume
> that a VR-VR move is as expensive as two GPR-GPR moves.
> 
> This patch adds handling for SETs containing REGs as well as MEMs and is
> inspired by the aarch64 implementation.
> 
> Bootstrapped and regtested on z900 up to z15. Is it OK?
> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_address_cost): Declare.
>   (s390_hard_regno_nregs): Declare.
>   (s390_rtx_costs): Add handling for REG and MEM in SET.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c: New
> test.

Ok. Thanks

Andreas


Re: [PATCH] Check always_inline flag in s390_can_inline_p [PR104327]

2022-02-07 Thread Andreas Krebbel via Gcc-patches
On 2/7/22 09:11, Jakub Jelinek wrote:
...
> 1) formatting, = should be at the start of next line rather than end of the
>line
> 2) all_masks, always_inline_safe_masks and caller_required_masks aren't
>ever modified, perhaps make them const?
> 3) I wonder if there is any advantage to have all_masks with all the masks
>enumerated, compared to
>const HOST_WIDE_INT all_masks
>  = (caller_required_masks | must_match_masks | always_inline_safe_masks
>   | MASK_DEBUG_ARG | MASK_PACKED_STACK | MASK_ZVECTOR);
>i.e. when you add a new mask, instead of listing it in all_masks
>and one or more of the other vars you'd just stick it either in one
>or more of those vars or in all_masks.

I've just committed the patch with these changes. Thanks Jakub!

Andreas


diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 5c2a830f9f0..c6cfe41ad7b 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -16091,6 +16091,23 @@ s390_valid_target_attribute_p (tree fndecl,
 static bool
 s390_can_inline_p (tree caller, tree callee)
 {
+  /* Flags which if present in the callee are required in the caller as well.  
*/
+  const unsigned HOST_WIDE_INT caller_required_masks = MASK_OPT_HTM;
+
+  /* Flags which affect the ABI and in general prevent inlining.  */
+  unsigned HOST_WIDE_INT must_match_masks
+= (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_SOFT_FLOAT
+   | MASK_LONG_DOUBLE_128 | MASK_OPT_VX);
+
+  /* Flags which we in general want to prevent inlining but accept for
+ always_inline.  */
+  const unsigned HOST_WIDE_INT always_inline_safe_masks
+= MASK_MVCLE | MASK_BACKCHAIN | MASK_SMALL_EXEC;
+
+  const HOST_WIDE_INT all_masks
+ = (caller_required_masks | must_match_masks | always_inline_safe_masks
+   | MASK_DEBUG_ARG | MASK_PACKED_STACK | MASK_ZVECTOR);
+
   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);

@@ -16103,16 +16120,18 @@ s390_can_inline_p (tree caller, tree callee)

   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
-  bool ret = true;

-  if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
-  != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
-ret = false;
+  /* If one of these triggers make sure to add proper handling of your
+ new flag to this hook.  */
+  gcc_assert (!(caller_opts->x_target_flags & ~all_masks));
+  gcc_assert (!(callee_opts->x_target_flags & ~all_masks));

-  /* Don't inline functions to be compiled for a more recent arch into a
- function for an older arch.  */
-  else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
-ret = false;
+  bool always_inline
+= (DECL_DISREGARD_INLINE_LIMITS (callee)
+   && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)));
+
+  if (!always_inline)
+must_match_masks |= always_inline_safe_masks;

   /* Inlining a hard float function into a soft float function is only
  allowed if the hard float function doesn't actually make use of
@@ -16120,16 +16139,27 @@ s390_can_inline_p (tree caller, tree callee)

  We are called from FEs for multi-versioning call optimization, so
  beware of ipa_fn_summaries not available.  */
-  else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
-&& !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
-   || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
-   && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
-  && (! ipa_fn_summaries
-  || ipa_fn_summaries->get
-  (cgraph_node::get (callee))->fp_expressions))
-ret = false;
+  if (always_inline && ipa_fn_summaries
+  && !ipa_fn_summaries->get(cgraph_node::get (callee))->fp_expressions)
+must_match_masks &= ~(MASK_HARD_DFP | MASK_SOFT_FLOAT);

-  return ret;
+  if ((caller_opts->x_target_flags & must_match_masks)
+  != (callee_opts->x_target_flags & must_match_masks))
+return false;
+
+  if (~(caller_opts->x_target_flags & caller_required_masks)
+  & (callee_opts->x_target_flags & caller_required_masks))
+return false;
+
+  /* Don't inline functions to be compiled for a more recent arch into a
+ function for an older arch.  */
+  if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
+return false;
+
+  if (!always_inline && caller_opts->x_s390_tune != callee_opts->x_s390_tune)
+return false;
+
+  return true;
 }
 #endif

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr104327.c
b/gcc/testsuite/gcc.c-torture/compile/pr104327.c
new file mode 100644
index 000..d54e5d58cc4
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr104327.c
@@ -0,0 +1,15 @@
+/* PR target/104327 */
+
+void foo (int *);
+
+static inline __attribute__((always_inline)) void
+bar (int *x)
+{
+  foo (x);
+}
+
+__attribute__((cold, 

[PATCH] Check always_inline flag in s390_can_inline_p [PR104327]

2022-02-06 Thread Andreas Krebbel via Gcc-patches
MASK_MVCLE is set for -Os but not for other optimization levels. In
general it should not make much sense to inline across calls where the
flag is different but we have to allow it for always_inline.

The patch also rearranges the hook implementation a bit based on the
recommendations from Jakub und Martin in the PR.

Bootstrapped and regression tested on s390x with various arch flags.
Will commit after giving a few days for comments.

gcc/ChangeLog:

PR target/104327
* config/s390/s390.cc (s390_can_inline_p): Accept a few more flags
if always_inline is set. Don't inline when tune differs without
always_inline.

gcc/testsuite/ChangeLog:

PR target/104327
* gcc.c-torture/compile/pr104327.c: New test.
---
 gcc/config/s390/s390.cc   | 66 ++-
 .../gcc.c-torture/compile/pr104327.c  | 15 +
 2 files changed, 64 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr104327.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 5c2a830f9f0..bbf2dd8dfb4 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -16091,6 +16091,25 @@ s390_valid_target_attribute_p (tree fndecl,
 static bool
 s390_can_inline_p (tree caller, tree callee)
 {
+  unsigned HOST_WIDE_INT all_masks =
+(MASK_64BIT | MASK_BACKCHAIN | MASK_DEBUG_ARG | MASK_ZARCH
+ | MASK_HARD_DFP | MASK_SOFT_FLOAT
+ | MASK_OPT_HTM | MASK_LONG_DOUBLE_128 | MASK_MVCLE | MASK_PACKED_STACK
+ | MASK_SMALL_EXEC | MASK_OPT_VX | MASK_ZVECTOR);
+
+  /* Flags which if present in the callee are required in the caller as well.  
*/
+  unsigned HOST_WIDE_INT caller_required_masks = MASK_OPT_HTM;
+
+  /* Flags which affect the ABI and in general prevent inlining.  */
+  unsigned HOST_WIDE_INT must_match_masks =
+(MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_SOFT_FLOAT
+ | MASK_LONG_DOUBLE_128 | MASK_OPT_VX);
+
+  /* Flags which we in general want to prevent inlining but accept for
+ always_inline.  */
+  unsigned HOST_WIDE_INT always_inline_safe_masks =
+MASK_MVCLE | MASK_BACKCHAIN | MASK_SMALL_EXEC;
+
   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
 
@@ -16103,16 +16122,18 @@ s390_can_inline_p (tree caller, tree callee)
 
   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
-  bool ret = true;
 
-  if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
-  != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
-ret = false;
+  /* If one of these triggers make sure to add proper handling of your
+ new flag to this hook.  */
+  gcc_assert (!(caller_opts->x_target_flags & ~all_masks));
+  gcc_assert (!(callee_opts->x_target_flags & ~all_masks));
 
-  /* Don't inline functions to be compiled for a more recent arch into a
- function for an older arch.  */
-  else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
-ret = false;
+  bool always_inline
+= (DECL_DISREGARD_INLINE_LIMITS (callee)
+   && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)));
+
+  if (!always_inline)
+must_match_masks |= always_inline_safe_masks;
 
   /* Inlining a hard float function into a soft float function is only
  allowed if the hard float function doesn't actually make use of
@@ -16120,16 +16141,27 @@ s390_can_inline_p (tree caller, tree callee)
 
  We are called from FEs for multi-versioning call optimization, so
  beware of ipa_fn_summaries not available.  */
-  else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
-&& !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
-   || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
-   && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
-  && (! ipa_fn_summaries
-  || ipa_fn_summaries->get
-  (cgraph_node::get (callee))->fp_expressions))
-ret = false;
+  if (always_inline && ipa_fn_summaries
+  && !ipa_fn_summaries->get(cgraph_node::get (callee))->fp_expressions)
+must_match_masks &= ~(MASK_HARD_DFP | MASK_SOFT_FLOAT);
 
-  return ret;
+  if ((caller_opts->x_target_flags & must_match_masks)
+  != (callee_opts->x_target_flags & must_match_masks))
+return false;
+
+  if (~(caller_opts->x_target_flags & caller_required_masks)
+  & (callee_opts->x_target_flags & caller_required_masks))
+return false;
+
+  /* Don't inline functions to be compiled for a more recent arch into a
+ function for an older arch.  */
+  if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
+return false;
+
+  if (!always_inline && caller_opts->x_s390_tune != callee_opts->x_s390_tune)
+return false;
+
+  return true;
 }
 #endif
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr104327.c 

Re: [PATCH][GCC11] IBM Z: fix `section type conflict` with -mindirect-branch-table

2022-02-02 Thread Andreas Krebbel via Gcc-patches
On 2/2/22 12:57, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for
> releases/gcc-11?
> 
> 
> 
> s390_code_end () puts indirect branch tables into separate sections and
> tries to switch back to wherever it was in the beginning by calling
> switch_to_section (current_function_section ()).
> 
> First of all, this is unnecessary - the other backends don't do it.
> 
> Furthermore, at this time there is no current function, but if the
> last processed function was cold, in_cold_section_p remains set.  This
> causes targetm.asm_out.function_section () to call
> targetm.section_type_flags (), which in absence of current function
> decl classifies the section as SECTION_WRITE.  This causes a section
> type conflict with the existing SECTION_CODE.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.c (s390_code_end): Do not switch back to
>   code section.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/nobp-section-type-conflict.c: New test.

Ok. Thanks!

Andreas

> 
> (cherry picked from commit 8753b13a31c777cdab0265dae0b68534247908f7)
> ---
>  gcc/config/s390/s390.c|  1 -
>  .../s390/nobp-section-type-conflict.c | 22 +++
>  2 files changed, 22 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> 
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 8895dd7cc76..2d2e6522eb4 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -16700,7 +16700,6 @@ s390_code_end (void)
> assemble_name_raw (asm_out_file, label_start);
> fputs ("-.\n", asm_out_file);
>   }
> -   switch_to_section (current_function_section ());
>   }
>  }
>  }
> diff --git a/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c 
> b/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> new file mode 100644
> index 000..5d78bc99bb5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> @@ -0,0 +1,22 @@
> +/* Checks that we don't get error: section type conflict with ‘put_page’.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-mindirect-branch=thunk-extern 
> -mfunction-return=thunk-extern -mindirect-branch-table -O2" } */
> +
> +int a;
> +int b (void);
> +void c (int);
> +
> +static void
> +put_page (void)
> +{
> +  if (b ())
> +c (a);
> +}
> +
> +__attribute__ ((__section__ (".init.text"), __cold__)) void
> +d (void)
> +{
> +  put_page ();
> +  put_page ();
> +}



Re: [PATCH] IBM Z: fix `section type conflict` with -mindirect-branch-table

2022-02-01 Thread Andreas Krebbel via Gcc-patches
On 2/1/22 21:49, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> 
> s390_code_end () puts indirect branch tables into separate sections and
> tries to switch back to wherever it was in the beginning by calling
> switch_to_section (current_function_section ()).
> 
> First of all, this is unnecessary - the other backends don't do it.
> 
> Furthermore, at this time there is no current function, but if the
> last processed function was cold, in_cold_section_p remains set.  This
> causes targetm.asm_out.function_section () to call
> targetm.section_type_flags (), which in absence of current function
> decl classifies the section as SECTION_WRITE.  This causes a section
> type conflict with the existing SECTION_CODE.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_code_end): Do not switch back to
>   code section.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/nobp-section-type-conflict.c: New test.

Ok. Thanks!

Andreas


> ---
>  gcc/config/s390/s390.cc   |  1 -
>  .../s390/nobp-section-type-conflict.c | 22 +++
>  2 files changed, 22 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index 43c5c72554a..2db12d4ba4b 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -16809,7 +16809,6 @@ s390_code_end (void)
> assemble_name_raw (asm_out_file, label_start);
> fputs ("-.\n", asm_out_file);
>   }
> -   switch_to_section (current_function_section ());
>   }
>  }
>  }
> diff --git a/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c 
> b/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> new file mode 100644
> index 000..5d78bc99bb5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> @@ -0,0 +1,22 @@
> +/* Checks that we don't get error: section type conflict with ‘put_page’.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-mindirect-branch=thunk-extern 
> -mfunction-return=thunk-extern -mindirect-branch-table -O2" } */
> +
> +int a;
> +int b (void);
> +void c (int);
> +
> +static void
> +put_page (void)
> +{
> +  if (b ())
> +c (a);
> +}
> +
> +__attribute__ ((__section__ (".init.text"), __cold__)) void
> +d (void)
> +{
> +  put_page ();
> +  put_page ();
> +}



[PATCH] PR101260 regcprop: Add mode change check for copy reg

2022-01-21 Thread Andreas Krebbel via Gcc-patches
When propagating a multi-word register into an access with a smaller
mode the can_change_mode backend hook is already consulted for the
original register.  This however is also required for the intermediate
copy in copy_regno which might use a different register class.

Bootstrapped on x86_64 and s390x. No testsuite regressions.

Ok for mainline?

gcc/ChangeLog:

PR rtl-optimization/101260
* regcprop.cc (maybe_mode_change): Invoke mode_change_ok also for
copy_regno.
---
 gcc/regcprop.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/regcprop.cc b/gcc/regcprop.cc
index 1a9bcf0a1ad..8e966f2b5ac 100644
--- a/gcc/regcprop.cc
+++ b/gcc/regcprop.cc
@@ -426,7 +426,8 @@ maybe_mode_change (machine_mode orig_mode, machine_mode 
copy_mode,
 
   if (orig_mode == new_mode)
 return gen_raw_REG (new_mode, regno);
-  else if (mode_change_ok (orig_mode, new_mode, regno))
+  else if (mode_change_ok (orig_mode, new_mode, regno)
+  && mode_change_ok (copy_mode, new_mode, copy_regno))
 {
   int copy_nregs = hard_regno_nregs (copy_regno, copy_mode);
   int use_nregs = hard_regno_nregs (copy_regno, new_mode);
-- 
2.34.1



Re: [PATCH] s390: Change costs for load on condition.

2022-01-21 Thread Andreas Krebbel via Gcc-patches
On 1/20/22 11:10, Robin Dapp wrote:
> Hi,
> 
> this patch is a follow-up patch to the recent ifcvt changes. It
> increased costs for a load on condition to 6.  This ensures that we
> if-convert sequences of three regular instructions (of cost 4) e.g. a
> compare and two SETs into two loads on condition (of cost 6).  With a
> cost of 5, four-insn sequences (three SETs) would also be if-converted.
> 
> The adjustment to the mov[qi/si]cc expander makes sure we if-convert a
> QImode/bool.  Before, combine would create a paradoxical subreg itself
> but need an additional insn.
> 
> Bootstrapped and regtested on s390x.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_rtx_costs): Increase costs for load
>   on condition.
>   * config/s390/s390.md: Change mov[qi/si]cc expander.

Could you please add two tests for the sequences which are improved here. Just 
to make sure we get
aware once it breaks again.

Patch is ok. Thanks!

Andreas


Re: [PATCH] s390: Split CCSmode into CCSINT and CCSFP

2022-01-21 Thread Andreas Krebbel via Gcc-patches
On 1/20/22 17:13, Robin Dapp wrote:
> Hi,
> 
> this patch splits the CCSmode into an integer and a floating point
> variant.  This allows ifcvt to consider floating point compares which
> would be rejected before because they could not be reversed.
> 
> Bootstrapped and regtested on s390x.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
>   * config/s390/predicates.md: Add CCSINTmode and CCSFPmode.
>   * config/s390/s390-modes.def (UNORDERED): Likewise.
>   (CC_MODE): Likewise.
>   * config/s390/s390.cc (s390_cc_modes_compatible): Likewise.
>   (s390_match_ccmode_set): Likewise.
>   (s390_select_ccmode): Likewise.
>   (s390_branch_condition_mask): Likewise.
>   (s390_reverse_condition): Likewise.
>   * config/s390/s390.h (REVERSIBLE_CC_MODE): Likewise.
>   * config/s390/s390.md: Likewise.
>   * config/s390/subst.md: Likewise.

> diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
> index 33194d3f3d6..ec47416cc1b 100644
> --- a/gcc/config/s390/predicates.md
> +++ b/gcc/config/s390/predicates.md
> @@ -325,7 +325,8 @@
>  case E_CCURmode:
>return GET_CODE (op) == LTU;
>
> -case E_CCSmode:
> +case E_CCSINTmode:
> +case E_CCSFPmode:
>return GET_CODE (op) == UNGT;

Can we get an UNGT for CCSINTmode here? Shouldn't this be just GT?

>
>  case E_CCSRmode:
> @@ -370,7 +371,8 @@
>  case E_CCURmode:
>return GET_CODE (op) == GEU;
>
> -case E_CCSmode:
> +case E_CCSINTmode:
> +case E_CCSFPmode:
>return GET_CODE (op) == LE;
>
>  case E_CCSRmode:
> diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def
> index b419907960e..eafe1e12938 100644
> --- a/gcc/config/s390/s390-modes.def
> +++ b/gcc/config/s390/s390-modes.def
> @@ -48,12 +48,12 @@ CCUR: EQ  GTU  LTU NE 
> (CLGF/R)
>
>  Signed compares
>
> -CCS:  EQ  LT   GT  UNORDERED  (LTGFR, LTGR, LTR, 
> ICM/Y,
> -   LTDBR, LTDR, LTEBR, 
> LTER,
> +CCSINT: EQLT   GT  UNORDERED  (LTGFR, LTGR, LTR, 
> ICM/Y,

CC3 for signed integer compares should not occur. So perhaps '-' instead of 
UNORDERED?

> CG/R, C/R/Y, CGHI, 
> CHI,
> -   CDB/R, CD/R, CEB/R, 
> CE/R,
> -   ADB/R, AEB/R, SDB/R, 
> SEB/R,
> SRAG, SRA, SRDA)
> +CCSFP:  EQLT   GT  UNORDERED  (CDB/R, CD/R, CEB/R, 
> CE/R,
> +   LTDBR, LTDR, LTEBR, 
> LTER,
> +   ADB/R, AEB/R, SDB/R, 
> SEB/R)
>  CCSR: EQ  GT   LT  UNORDERED  (CGF/R, CH/Y)
>  CCSFPS: EQLT   GT  UNORDERED  (KEB/R, KDB/R, KXBR, 
> KDTR,
>  KXTR, WFK)
...
> @@ -2139,7 +2148,8 @@ s390_branch_condition_mask (rtx code)
>   }
>break;
>
> -case E_CCSmode:
> +case E_CCSINTmode:
> +case E_CCSFPmode:
>  case E_CCSFPSmode:
>switch (GET_CODE (code))
>   {

We will need a new switch statement for CCSINT without all the FP only 
comparison operators.

Andreas


Re: [PATCH v2] Disable -fsplit-stack support on non-glibc targets

2022-01-20 Thread Andreas Krebbel via Gcc-patches
On 1/20/22 23:52, Richard Sandiford wrote:
> cc:ing the x86 and s390 maintainers
> 
> soeren--- via Gcc-patches  writes:
>> From: Sören Tempel 
>>
>> The -fsplit-stack option requires the pthread_t TCB definition in the
>> libc to provide certain struct fields at specific hardcoded offsets. As
>> far as I know, only glibc provides these fields at the required offsets.
>> Most notably, musl libc does not have these fields. However, since gcc
>> accesses the fields using a fixed offset, this does not cause a
>> compile-time error, but instead results in a silent memory corruption at
>> run-time with musl libc. For example, on s390x libgcc's
>> __stack_split_initialize CTOR will overwrite the cancel field in the
>> pthread_t TCB on musl.
>>
>> The -fsplit-stack option is used within the gcc code base itself by
>> gcc-go (if available). On musl-based systems with split-stack support
>> (i.e. s390x or x86) this causes Go programs compiled with gcc-go to
>> misbehave at run-time.
>>
>> This patch fixes gcc-go on musl by disabling -fsplit-stack in gcc itself
>> since it is not supported on non-glibc targets anyhow. This is achieved
>> by checking if gcc targets a glibc-based system. This check has been
>> added for x86 and s390x, the rs6000 config already checks for
>> TARGET_GLIBC_MAJOR. Other architectures do not have split-stack
>> support. With this patch applied, the gcc-go configure script will
>> detect that -fsplit-stack support is not available and will not use it.
>>
>> See https://www.openwall.com/lists/musl/2012/10/16/12
>>
>> This patch was written under the assumption that glibc is the only libc
>> implementation which supports the required fields at the required
>> offsets in the pthread_t TCB. The patch has been tested on Alpine Linux
>> Edge on the s390x and x86 architectures by bootstrapping Google's Go
>> implementation with gcc-go.
>>
>> Signed-off-by: Sören Tempel 
>>
>> gcc/ChangeLog:
>>
>>  * common/config/s390/s390-common.c (s390_supports_split_stack):
>>  Only support split-stack on glibc targets.
>>  * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
>>  * config/i386/gnu.h (defined): Ditto.

s390 parts are ok.

Thanks!

Andreas

>> ---
>> This version of the patch addresses feedback by Andrew Pinski and uses
>> OPTION_GLIBC as well as opts->x_linux_libc == LIBC_GLIBC to detect glibc
>> targets (instead of relying on TARGET_GLIBC_MAJOR).
>>
>>  gcc/common/config/s390/s390-common.c | 11 +--
>>  gcc/config/i386/gnu-user-common.h|  5 +++--
>>  gcc/config/i386/gnu.h|  6 +-
>>  3 files changed, 17 insertions(+), 5 deletions(-)
> 
> Sorry for the slow review.  The patch LGTM bar some minor formatting
> nits below, but target maintainers should have the final say.
> 
>> diff --git a/gcc/common/config/s390/s390-common.c 
>> b/gcc/common/config/s390/s390-common.c
>> index b6bc8501742..fc86e0bc5e7 100644
>> --- a/gcc/common/config/s390/s390-common.c
>> +++ b/gcc/common/config/s390/s390-common.c
>> @@ -116,13 +116,20 @@ s390_handle_option (struct gcc_options *opts 
>> ATTRIBUTE_UNUSED,
>>  
>>  /* -fsplit-stack uses a field in the TCB, available with glibc-2.23.
>> We don't verify it, since earlier versions just have padding at
>> -   its place, which works just as well.  */
>> +   its place, which works just as well. For other libc implementations
> 
> GCC style is to use 2 spaces after a full stop.  Same for the x86 part.
> 
>> +   we disable the feature entirely to avoid corrupting the TCB.  */
>>  
>>  static bool
>>  s390_supports_split_stack (bool report ATTRIBUTE_UNUSED,
>> struct gcc_options *opts ATTRIBUTE_UNUSED)
> 
> These parameters are no longer unused after the patch, so it'd be good
> to remove the attributes.
> 
>>  {
>> -  return true;
>> +  if (opts->x_linux_libc == LIBC_GLIBC) {
>> +return true;
>> +  } else {
>> +if (report)
>> +  error("%<-fsplit-stack%> currently only supported on GNU/Linux");
>> +return false;
>> +  }
> 
> Normal GCC formatting would be something like:
> 
>   if (opts->x_linux_libc == LIBC_GLIBC)
> return true;
> 
>   if (report)
> error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
>   return false;
> 
> Sorry for the fussy rules.
> 
> Thanks,
> Richard
> 
>>  }
>>  
>>  #undef TARGET_DEFAULT_TARGET_FLAGS
>> diff --git a/gcc/config/i386/gnu-user-common.h 
>> b/gcc/config/i386/gnu-user-common.h
>> index 00226f5a455..6e13315b5a3 100644
>> --- a/gcc/config/i386/gnu-user-common.h
>> +++ b/gcc/config/i386/gnu-user-common.h
>> @@ -66,7 +66,8 @@ along with GCC; see the file COPYING3.  If not see
>>  #define STACK_CHECK_STATIC_BUILTIN 1
>>  
>>  /* We only build the -fsplit-stack support in libgcc if the
>> -   assembler has full support for the CFI directives.  */
>> -#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
>> +   assembler has full support for the CFI directives and
>> +   targets glibc.  */
>> +#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE 

Re: [PATCH] cprop_hardreg: Workaround for narrow mode != lowpart targets

2022-01-14 Thread Andreas Krebbel via Gcc-patches
On 1/14/22 20:41, Andreas Krebbel via Gcc-patches wrote:
> On 1/14/22 08:37, Richard Biener wrote:
> ...
>> Can the gist of this bug be put into the GCC bugzilla so the rev can
>> refer to it? 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104034
> 
>> Can we have a testcase even?
> The testcase from Jakub is in the BZ. However, since it doesn't fail with 
> head I didn't try to
> include it in my patch.
> 
>> I'm not quite understanding the problem but is it that, say,
>>
>>  (subreg:DI (reg:V2DI ..) 0)
>>
>> isn't the same as
>>
>>  (lowpart:DI (reg:V2DI ...) 0)
> 
> (reg:DI v0) does not match the lower order bits of (reg:TI v0)
> 
>> ?  The regcprop code looks more like asking whether the larger reg
>> is a composition of multiple other hardregs and will return the specific
>> hardreg corresponding to the lowpart - so like if on s390 the vector
>> registers overlap with some other regset.  But then doing the actual
>> accesses via the other regset regs doesn't actually work?  Isn't the
>> backend then lying to us (aka the mode_change_ok returns the
>> wrong answer)?
> 
> can_change_mode_class should do the right thing. We return false in case 
> somebody wants to change TI
> to DI for a vector register. However, the hook never gets called like this 
> from regcprop. regcprop
> only asks whether it is ok to change (reg:TI r8) to (reg:DI r8) and that's 
> indeed ok.

After writing this I'm wondering whether this would be a better fix:

diff --git a/gcc/regcprop.c b/gcc/regcprop.c
index 18132425ab2..b6a3f4e3804 100644
--- a/gcc/regcprop.c
+++ b/gcc/regcprop.c
@@ -402,7 +402,8 @@ maybe_mode_change (machine_mode orig_mode, machine_mode 
copy_mode,

   if (orig_mode == new_mode)
 return gen_raw_REG (new_mode, regno);
-  else if (mode_change_ok (orig_mode, new_mode, regno))
+  else if (mode_change_ok (orig_mode, new_mode, regno)
+   && mode_change_ok (copy_mode, new_mode, copy_regno))
 {
   int copy_nregs = hard_regno_nregs (copy_regno, copy_mode);
   int use_nregs = hard_regno_nregs (copy_regno, new_mode);


Andreas


Re: [PATCH] cprop_hardreg: Workaround for narrow mode != lowpart targets

2022-01-14 Thread Andreas Krebbel via Gcc-patches
On 1/14/22 08:37, Richard Biener wrote:
...
> Can the gist of this bug be put into the GCC bugzilla so the rev can
> refer to it? 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104034

> Can we have a testcase even?
The testcase from Jakub is in the BZ. However, since it doesn't fail with head 
I didn't try to
include it in my patch.

> I'm not quite understanding the problem but is it that, say,
> 
>  (subreg:DI (reg:V2DI ..) 0)
> 
> isn't the same as
> 
>  (lowpart:DI (reg:V2DI ...) 0)

(reg:DI v0) does not match the lower order bits of (reg:TI v0)

> ?  The regcprop code looks more like asking whether the larger reg
> is a composition of multiple other hardregs and will return the specific
> hardreg corresponding to the lowpart - so like if on s390 the vector
> registers overlap with some other regset.  But then doing the actual
> accesses via the other regset regs doesn't actually work?  Isn't the
> backend then lying to us (aka the mode_change_ok returns the
> wrong answer)?

can_change_mode_class should do the right thing. We return false in case 
somebody wants to change TI
to DI for a vector register. However, the hook never gets called like this from 
regcprop. regcprop
only asks whether it is ok to change (reg:TI r8) to (reg:DI r8) and that's 
indeed ok.

Before cprop we have:

(insn 175 176 174 3 (set (reg/v:TI 16 %f0 [orig:69 __comp ] [69])
(reg:TI 8 %r8)) -1
 (nil))


(insn 155 124 156 3 (set (reg:DI 6 %r6 [ __comp ])
(reg:DI 16 %f0)) 1277 {*movdi_64}
 (nil))


(insn 156 155 128 3 (set (reg:DI 7 %r7 [orig:69 __comp+8 ] [69])
(unspec:DI [
(reg:V2DI 16 %f0)
(const_int 1 [0x1])
] UNSPEC_VEC_EXTRACT)) 409 {*vec_extractv2di}
 (expr_list:REG_DEAD (reg:V2DI 16 %f0)
(nil)))

So a copy of reg pair r8/r9 is kept in v0==f0. The problem comes from cprop 
assuming that (reg:DI
f0) refers to the low part of f0 and as a consequence replaces (reg:DI 16 %f0) 
with (reg:DI 9 %r9)
what would be the DImode lowpart of (reg:TI r8)

Insn 155 and 156 are the result of applying the following splitter:

; Split a VR -> GPR TImode move into 2 vector load GR from VR element.
; For the higher order bits we do simply a DImode move while the
; second part is done via vec extract.  Both will end up as vlgvg.
(define_split
  [(set (match_operand:TI 0 "register_operand" "")
(match_operand:TI 1 "register_operand" ""))]
  "TARGET_VX && reload_completed
   && GENERAL_REG_P (operands[0])
   && VECTOR_REG_P (operands[1])"
  [(set (match_dup 2) (match_dup 4))
   (set (match_dup 3) (unspec:DI [(match_dup 5) (const_int 1)]
 UNSPEC_VEC_EXTRACT))]
{
  operands[2] = operand_subword (operands[0], 0, 0, TImode);
  operands[3] = operand_subword (operands[0], 1, 0, TImode);
  operands[4] = gen_rtx_REG (DImode, REGNO (operands[1]));
  operands[5] = gen_rtx_REG (V2DImode, REGNO (operands[1]));
})

Introducing the (reg:DI 16 %f0) access to the TImode VR is something the middle 
end is not expected
to do - because we prevent it in can_change_mode_class. However, I don't see 
anything wrong with
doing that in the splitter. In our backend this is well-defined as being the 
first element in the
vector register - the high part of the TImode vector register value.

Unfortunately it confuses cprop :(

Andreas



> 
> How does the stage1 fix, aka "rewrite" of cprop, look like?  How can we
> be sure this hack isn't still present in 10 years from now?
> 
> Thanks,
> Richard.
> 
>> Bootstrapped and regression-tested on s390x.
>>
>> Ok?
>>
>> gcc/ChangeLog:
>>
>> * target.def (narrow_mode_refers_low_part_p): Add new target hook.
>> * config/s390/s390.c (s390_narrow_mode_refers_low_part_p):
>> Implement new target hook for IBM Z.
>> (TARGET_NARROW_MODE_REFERS_LOW_PART_P): New macro.
>> * regcprop.c (maybe_mode_change): Disable transformation depending
>> on the new target hook.
>> ---
>>  gcc/config/s390/s390.c | 14 ++
>>  gcc/regcprop.c |  3 ++-
>>  gcc/target.def | 12 +++-
>>  3 files changed, 27 insertions(+), 2 deletions(-)
>>
>> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
>> index 056002e4a4a..aafc6d63be6 100644
>> --- a/gcc/config/s390/s390.c
>> +++ b/gcc/config/s390/s390.c
>> @@ -10488,6 +10488,18 @@ s390_hard_regno_mode_ok (unsigned int regno, 
>> machine_mode mode)
>>return false;
>>  }
>>
>> +/* Implement TARGET_NARROW_MODE_REFERS_LOW_PART_P.  */
>> +
>> +static bool
>> +s390_narrow_mode_refers_low_part_p (unsigned int regno)
>> +{
>> +  if (reg_classes_intersect_p (VEC_REGS, REGNO_REG_CLASS (regno)))
>> +return false;
>> +
>> +  return true;
>> +}
>> +
>> +
>>  /* Implement TARGET_MODES_TIEABLE_P.  */
>>
>>  static bool
>> @@ -17472,6 +17484,8 @@ s390_vectorize_vec_perm_const (machine_mode vmode, 
>> rtx target, rtx op0, rtx op1,
>>  #undef TARGET_VECTORIZE_VEC_PERM_CONST
>>  #define 

Re: [PATCH] cprop_hardreg: Workaround for narrow mode != lowpart targets

2022-01-13 Thread Andreas Krebbel via Gcc-patches
On 1/13/22 18:11, Andreas Krebbel via Gcc-patches wrote:
...
> @@ -5949,7 +5959,7 @@ register if floating point arithmetic is not being 
> done.  As long as the\n\
>  floating registers are not in class @code{GENERAL_REGS}, they will not\n\
>  be used unless some pattern's constraint asks for one.",
>   bool, (unsigned int regno, machine_mode mode),
> - hook_bool_uint_mode_true)
> + hook_bool_uint_true)
>  
>  DEFHOOK
>  (modes_tieable_p,

That hunk was a copy and paste bug and does not belong to the patch.

Andreas


[PATCH] cprop_hardreg: Workaround for narrow mode != lowpart targets

2022-01-13 Thread Andreas Krebbel via Gcc-patches
The cprop_hardreg pass is built around the assumption that accessing a
register in a narrower mode is the same as accessing the lowpart of
the register.  This unfortunately is not true for vector registers on
IBM Z. This caused a miscompile of LLVM with GCC 8.5. The problem
could not be reproduced with upstream GCC unfortunately but we have to
assume that it is latent there. The right fix would require
substantial changes to the cprop pass and is certainly something we
would want for our platform. But since this would not be acceptable
for older GCCs I'll go with what Vladimir proposed in the RedHat BZ
and introduce a hopefully temporary and undocumented target hook to
disable that specific transformation in regcprop.c.

Here the RedHat BZ for reference:
https://bugzilla.redhat.com/show_bug.cgi?id=2028609

Bootstrapped and regression-tested on s390x.

Ok?

gcc/ChangeLog:

* target.def (narrow_mode_refers_low_part_p): Add new target hook.
* config/s390/s390.c (s390_narrow_mode_refers_low_part_p):
Implement new target hook for IBM Z.
(TARGET_NARROW_MODE_REFERS_LOW_PART_P): New macro.
* regcprop.c (maybe_mode_change): Disable transformation depending
on the new target hook.
---
 gcc/config/s390/s390.c | 14 ++
 gcc/regcprop.c |  3 ++-
 gcc/target.def | 12 +++-
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 056002e4a4a..aafc6d63be6 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -10488,6 +10488,18 @@ s390_hard_regno_mode_ok (unsigned int regno, 
machine_mode mode)
   return false;
 }
 
+/* Implement TARGET_NARROW_MODE_REFERS_LOW_PART_P.  */
+
+static bool
+s390_narrow_mode_refers_low_part_p (unsigned int regno)
+{
+  if (reg_classes_intersect_p (VEC_REGS, REGNO_REG_CLASS (regno)))
+return false;
+
+  return true;
+}
+
+
 /* Implement TARGET_MODES_TIEABLE_P.  */
 
 static bool
@@ -17472,6 +17484,8 @@ s390_vectorize_vec_perm_const (machine_mode vmode, rtx 
target, rtx op0, rtx op1,
 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 #define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const
 
+#undef TARGET_NARROW_MODE_REFERS_LOW_PART_P
+#define TARGET_NARROW_MODE_REFERS_LOW_PART_P s390_narrow_mode_refers_low_part_p
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
diff --git a/gcc/regcprop.c b/gcc/regcprop.c
index 1a9bcf0a1ad..aaf94ad9b51 100644
--- a/gcc/regcprop.c
+++ b/gcc/regcprop.c
@@ -426,7 +426,8 @@ maybe_mode_change (machine_mode orig_mode, machine_mode 
copy_mode,
 
   if (orig_mode == new_mode)
 return gen_raw_REG (new_mode, regno);
-  else if (mode_change_ok (orig_mode, new_mode, regno))
+  else if (mode_change_ok (orig_mode, new_mode, regno)
+  && targetm.narrow_mode_refers_low_part_p (regno))
 {
   int copy_nregs = hard_regno_nregs (copy_regno, copy_mode);
   int use_nregs = hard_regno_nregs (copy_regno, new_mode);
diff --git a/gcc/target.def b/gcc/target.def
index 8fd2533e90a..598eea501ff 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -5446,6 +5446,16 @@ value that the middle-end intended.",
  bool, (machine_mode from, machine_mode to, reg_class_t rclass),
  hook_bool_mode_mode_reg_class_t_true)
 
+/* This hook is used to work around a problem in regcprop. Hardcoded
+assumptions currently prevent it from working correctly for targets
+where the low part of a multi-word register doesn't align to accessing
+the register with a narrower mode.  */
+DEFHOOK_UNDOC
+(narrow_mode_refers_low_part_p,
+"",
+bool, (unsigned int regno),
+hook_bool_unit_true)
+
 /* Change pseudo allocno class calculated by IRA.  */
 DEFHOOK
 (ira_change_pseudo_allocno_class,
@@ -5949,7 +5959,7 @@ register if floating point arithmetic is not being done.  
As long as the\n\
 floating registers are not in class @code{GENERAL_REGS}, they will not\n\
 be used unless some pattern's constraint asks for one.",
  bool, (unsigned int regno, machine_mode mode),
- hook_bool_uint_mode_true)
+ hook_bool_uint_true)
 
 DEFHOOK
 (modes_tieable_p,
-- 
2.33.1



Re: [PATCH] IBM Z: Fix load-and-test peephole2 condition

2021-11-19 Thread Andreas Krebbel via Gcc-patches
On 11/19/21 10:45, Stefan Schulze Frielinghaus wrote:
...
> diff --git a/gcc/testsuite/gcc.target/s390/2029.c 
> b/gcc/testsuite/gcc.target/s390/2029.c
> new file mode 100644
> index 000..1a6df4f4b89
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/2029.c
> @@ -0,0 +1,12 @@
> +/* { dg-do run } */
> +/* { dg-options "-Os -march=z10" } */

Although z10 is pretty old we will need an effective target check here. Ok with 
that change.

Thanks!

Andreas


Re: [PATCH] Fix PR103028

2021-11-05 Thread Andreas Krebbel via Gcc-patches
On 11/5/21 20:34, Jeff Law wrote:
> 
> 
> On 11/5/2021 4:19 AM, Andreas Krebbel via Gcc-patches wrote:
>> This prevents find_cond_trap from being invoked after reload.  It may
>> generate compares which would require reloading.
>>
>> Bootstrapped and regression tested on s390x.
>>
>> Ok for mainline?
>>
>> gcc/ChangeLog:
>>
>>  PR rtl-optimization/103028
>>  * ifcvt.c (find_if_header): Invoke find_cond_trap only before
>>  reload.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  PR rtl-optimization/103028
>>  * gcc.dg/pr103028.c: New test.
> Shouldn't this be handled by the target by rejecting creating the trap 
> after reload has completed since the target seems to need new pseudos to 
> generate a conditional trap?  Otherwise we're penalizing targets which 
> don't need new pseudos to generate conditional traps.

In this case we do not explicitely create a new pseudo. It is rather that we 
emit a pattern which
would need to be handled be reload. I think passes which run after reload are 
not allowed to emit
patterns which would require reloading and it cannot be up to the backend to 
prevent this.

Instead of disabling this path after reload we could also try to check all the 
to be emitted insns
with constrain_operands to make sure at least one of the alternatives is an 
immediate match. This
should only reject cases which are really broken. I didn't try this because I 
haven't seen anything
like this in ifcvt.c while I have seen several places where we just bail out 
once reload_completed
is true.

Andreas


[PATCH] Fix PR103028

2021-11-05 Thread Andreas Krebbel via Gcc-patches
This prevents find_cond_trap from being invoked after reload.  It may
generate compares which would require reloading.

Bootstrapped and regression tested on s390x.

Ok for mainline?

gcc/ChangeLog:

PR rtl-optimization/103028
* ifcvt.c (find_if_header): Invoke find_cond_trap only before
reload.

gcc/testsuite/ChangeLog:

PR rtl-optimization/103028
* gcc.dg/pr103028.c: New test.
---
 gcc/ifcvt.c |  3 ++-
 gcc/testsuite/gcc.dg/pr103028.c | 16 
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr103028.c

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 017944f4f79..1f5b9476ac2 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -4341,7 +4341,8 @@ find_if_header (basic_block test_bb, int pass)
   && cond_exec_find_if_block (_info))
 goto success;
 
-  if (targetm.have_trap ()
+  if (!reload_completed
+  && targetm.have_trap ()
   && optab_handler (ctrap_optab, word_mode) != CODE_FOR_nothing
   && find_cond_trap (test_bb, then_edge, else_edge))
 goto success;
diff --git a/gcc/testsuite/gcc.dg/pr103028.c b/gcc/testsuite/gcc.dg/pr103028.c
new file mode 100644
index 000..e299ac5d5b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr103028.c
@@ -0,0 +1,16 @@
+/* PR rtl-optimization/103028 */
+/* { dg-do compile } */
+/* { dg-options "-Og -fif-conversion2 -fharden-conditional-branches" } */
+
+/* This used to fail on s390x only with -march=z9-109 and -march=z9-ec */
+/* { dg-additional-options "-march=z9-ec" { target s390*-*-* } } */
+
+unsigned char x;
+int foo(void)
+{
+  unsigned long long i = x;
+  i = i + 0x8000;
+  if (i > 0x)
+return x;
+  return 0;
+}
-- 
2.31.1



[Committed] IBM Z: Define STACK_CHECK_MOVING_SP

2021-11-04 Thread Andreas Krebbel via Gcc-patches
With -fstack-check the stack probes emitted access memory below the
stack pointer.

Bootstrapped and regression tested on s390x.

Committed to mainline

gcc/ChangeLog:

* config/s390/s390.h (STACK_CHECK_MOVING_SP): New macro
definition.
---
 gcc/config/s390/s390.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index fb16a455a03..186c5c6200b 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -332,6 +332,11 @@ extern const char *s390_host_detect_local_cpu (int argc, 
const char **argv);
 
 #define STACK_SIZE_MODE (Pmode)
 
+/* Make the stack pointer to be moved downwards while issuing stack probes with
+   -fstack-check.  We need this to prevent memory below the stack pointer from
+   being accessed.  */
+#define STACK_CHECK_MOVING_SP 1
+
 #ifndef IN_LIBGCC2
 
 /* Width of a word, in units (bytes).  */
-- 
2.31.1



Re: [PATCH] IBM Z: Free bbs in s390_loop_unroll_adjust

2021-11-03 Thread Andreas Krebbel via Gcc-patches
On 11/2/21 18:31, Stefan Schulze Frielinghaus wrote:
> Bootstrapped and regtested on IBM Z.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.c (s390_loop_unroll_adjust): In case of early
>   exit free bbs.

Ok. Thanks!

Andreas



Re: [PATCH] IBM Z: ldist-{rawmemchr,strlen} tests require vector extensions

2021-11-02 Thread Andreas Krebbel via Gcc-patches
On 11/2/21 15:54, Stefan Schulze Frielinghaus wrote:
> The tests require vector extensions which are only available for z13 and
> later while using the z/Architecture.
> 
> Bootstrapped and regtested on IBM Z.  Ok for mainline?
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/tree-ssa/ldist-rawmemchr-1.c: For IBM Z set arch to z13
>   and use z/Architecture since the tests require vector extensions.
>   * gcc.dg/tree-ssa/ldist-rawmemchr-2.c: Likewise.
>   * gcc.dg/tree-ssa/ldist-strlen-1.c: Likewise.
>   * gcc.dg/tree-ssa/ldist-strlen-3.c: Likewise.

Ok. Thanks!

Andreas


Re: [PATCH] IBM Z: Fix address of operands will never be NULL warnings

2021-11-02 Thread Andreas Krebbel via Gcc-patches
On 10/30/21 12:43, Stefan Schulze Frielinghaus wrote:
> Since a recent enhancement of -Waddress a couple of warnings are emitted
> and turned into errors during bootstrap:
> 
> gcc/config/s390/s390.md:12087:25: error: the address of 'operands' will never 
> be NULL [-Werror=address]
> 12087 |   "TARGET_HTM && operands != NULL
> build/gencondmd.c:59:12: note: 'operands' declared here
>59 | extern rtx operands[];
>   |^~~~
> 
> Fixed by removing those non-null checks.
> Bootstrapped and regtested on IBM Z.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md ("*cc_to_int", "tabort", "*tabort_1",
>   "*tabort_1_plus"): Remove operands non-null check.

Ok. Thanks!

Andreas


Re: [PATCH] IBM Z: Provide rawmemchr{qi,hi,si} expander

2021-10-08 Thread Andreas Krebbel via Gcc-patches
On 10/8/21 16:23, Stefan Schulze Frielinghaus wrote:
> On Thu, Oct 07, 2021 at 11:16:24AM +0200, Andreas Krebbel wrote:
>> On 9/20/21 11:24, Stefan Schulze Frielinghaus wrote:
>>> This patch implements the rawmemchr expander as introduced in
>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579649.html
>>>
>>> Bootstrapped and regtested in conjunction with the patch from above on
>>> IBM Z.  Ok for mainline?
>>>
>>
>>> From 551362cda54048dc1a51588112f11c070ed52020 Mon Sep 17 00:00:00 2001
>>> From: Stefan Schulze Frielinghaus 
>>> Date: Mon, 8 Feb 2021 10:35:39 +0100
>>> Subject: [PATCH 2/2] IBM Z: Provide rawmemchr{qi,hi,si} expander
>>>
>>> gcc/ChangeLog:
>>>
>>> * config/s390/s390-protos.h (s390_rawmemchrqi): Add prototype.
>>> (s390_rawmemchrhi): Add prototype.
>>> (s390_rawmemchrsi): Add prototype.
>>> * config/s390/s390.c (s390_rawmemchr): New function.
>>> (s390_rawmemchrqi): New function.
>>> (s390_rawmemchrhi): New function.
>>> (s390_rawmemchrsi): New function.
>>> * config/s390/s390.md (rawmemchr): New expander.
>>> (rawmemchr): New expander.
>>> * config/s390/vector.md (vec_vfees): Basically a copy of
>>> the pattern vfees from vx-builtins.md.
>>> * config/s390/vx-builtins.md (*vfees): Remove.
>>
>> Thanks! Would it make sense to also extend the strlen and movstr expanders
>> we have to support the additional character modes?
> 
> For strlen-like loops over non-character arrays the current
> implementation in the loop distribution pass uses rawmemchr and
> computes pointer difference in order to compute the length.  Thus we get
> strlen for free and don't need to reimplement it.

Good to know. Thanks!

...
> Please find a new version attached.  I did another bootstrap+regtest on
> IBM Z.  Ok for mainline?
> 
> Thanks for your detailed review!

Ok for mainline. Thanks!

Andreas


Re: [PATCH] IBM Z: Provide rawmemchr{qi,hi,si} expander

2021-10-07 Thread Andreas Krebbel via Gcc-patches
On 9/20/21 11:24, Stefan Schulze Frielinghaus wrote:
> This patch implements the rawmemchr expander as introduced in
> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579649.html
> 
> Bootstrapped and regtested in conjunction with the patch from above on
> IBM Z.  Ok for mainline?
> 

> From 551362cda54048dc1a51588112f11c070ed52020 Mon Sep 17 00:00:00 2001
> From: Stefan Schulze Frielinghaus 
> Date: Mon, 8 Feb 2021 10:35:39 +0100
> Subject: [PATCH 2/2] IBM Z: Provide rawmemchr{qi,hi,si} expander
>
> gcc/ChangeLog:
>
>   * config/s390/s390-protos.h (s390_rawmemchrqi): Add prototype.
>   (s390_rawmemchrhi): Add prototype.
>   (s390_rawmemchrsi): Add prototype.
>   * config/s390/s390.c (s390_rawmemchr): New function.
>   (s390_rawmemchrqi): New function.
>   (s390_rawmemchrhi): New function.
>   (s390_rawmemchrsi): New function.
>   * config/s390/s390.md (rawmemchr): New expander.
>   (rawmemchr): New expander.
>   * config/s390/vector.md (vec_vfees): Basically a copy of
>   the pattern vfees from vx-builtins.md.
>   * config/s390/vx-builtins.md (*vfees): Remove.

Thanks! Would it make sense to also extend the strlen and movstr expanders
we have to support the additional character modes?

A few style comments below.

>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/s390/rawmemchr-1.c: New test.
> ---
>  gcc/config/s390/s390-protos.h   |  4 +
>  gcc/config/s390/s390.c  | 89 ++
>  gcc/config/s390/s390.md | 20 +
>  gcc/config/s390/vector.md   | 26 ++
>  gcc/config/s390/vx-builtins.md  | 26 --
>  gcc/testsuite/gcc.target/s390/rawmemchr-1.c | 99 +
>  6 files changed, 238 insertions(+), 26 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/rawmemchr-1.c
>
> diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
> index 4b03c6e99f5..0d9619e8254 100644
> --- a/gcc/config/s390/s390-protos.h
> +++ b/gcc/config/s390/s390-protos.h
> @@ -66,6 +66,10 @@ s390_asm_declare_function_size (FILE *asm_out_file,
>   const char *fnname ATTRIBUTE_UNUSED, tree decl);
>  #endif
>
> +extern void s390_rawmemchrqi(rtx dst, rtx src, rtx pat);
> +extern void s390_rawmemchrhi(rtx dst, rtx src, rtx pat);
> +extern void s390_rawmemchrsi(rtx dst, rtx src, rtx pat);
> +
>  #ifdef RTX_CODE
>  extern int s390_extra_constraint_str (rtx, int, const char *);
>  extern int s390_const_ok_for_constraint_p (HOST_WIDE_INT, int, const char *);
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 54dd6332c3a..1435ce156e2 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -16559,6 +16559,95 @@ s390_excess_precision (enum excess_precision_type 
> type)
>  }
>  #endif
>
> +template  +   machine_mode elt_mode,
> +   rtx (*gen_vec_vfees) (rtx, rtx, rtx, rtx)>
> +static void
> +s390_rawmemchr(rtx dst, rtx src, rtx pat) {

I think it would be a bit easier to turn the vec_vfees expander into a
'parameterized name' and add the mode as parameter.  I'll attach a patch
to illustrate how this might look like.

> +  rtx lens = gen_reg_rtx (V16QImode);
> +  rtx pattern = gen_reg_rtx (vec_mode);
> +  rtx loop_start = gen_label_rtx ();
> +  rtx loop_end = gen_label_rtx ();
> +  rtx addr = gen_reg_rtx (Pmode);
> +  rtx offset = gen_reg_rtx (Pmode);
> +  rtx tmp = gen_reg_rtx (Pmode);
> +  rtx loadlen = gen_reg_rtx (SImode);
> +  rtx matchlen = gen_reg_rtx (SImode);
> +  rtx mem;
> +
> +  pat = GEN_INT (trunc_int_for_mode (INTVAL (pat), elt_mode));
> +  emit_insn (gen_rtx_SET (pattern, gen_rtx_VEC_DUPLICATE (vec_mode, pat)));
> +
> +  emit_move_insn (addr, XEXP (src, 0));
> +
> +  // alignment
> +  emit_insn (gen_vlbb (lens, gen_rtx_MEM (BLKmode, addr), GEN_INT (6)));
> +  emit_insn (gen_lcbb (loadlen, addr, GEN_INT (6)));
> +  lens = convert_to_mode (vec_mode, lens, 1);
> +  emit_insn (gen_vec_vfees (lens, lens, pattern, GEN_INT (0)));
> +  lens = convert_to_mode (V4SImode, lens, 1);
> +  emit_insn (gen_vec_extractv4sisi (matchlen, lens, GEN_INT (1)));
> +  lens = convert_to_mode (vec_mode, lens, 1);

That back and forth NOP conversion stuff is ugly but I couldn't find a
more elegant way to write this without generating worse code.  Of
course we want to benefit here from the fact that the result operand
of vfees is already zero-extended.  Perhaps factor this out into a
utility function or an extra expander because we appear to need this
frequently?! Not a requirement for this patch though.

> +  emit_cmp_and_jump_insns (matchlen, loadlen, LT, NULL_RTX, SImode, 1, 
> loop_end);
> +  force_expand_binop (Pmode, and_optab, addr, GEN_INT (15), tmp, 1, 
> OPTAB_DIRECT);
> +  force_expand_binop (Pmode, sub_optab, GEN_INT (16), tmp, tmp, 1, 
> OPTAB_DIRECT);
> +  force_expand_binop (Pmode, add_optab, addr, tmp, addr, 1, OPTAB_DIRECT);

Couldn't we just do this as '(addr + 16) & ~0xf' 

Re: [PATCH gcc-11 0/2] Backport kpatch changes

2021-09-30 Thread Andreas Krebbel via Gcc-patches
On 9/30/21 10:50, Ilya Leoshkevich wrote:
> Hi,
> 
> This series contains a backport of kpatch changes needed to support
> https://github.com/dynup/kpatch/pull/1203 so that it could be used in
> RHEL 9.  The patches have been in master for 4 months now without
> issues.
> 
> Bootstrapped and regtested on s390x-redhat-linux.
> 
> Ok for gcc-11?

Ok for both. Thanks!

Andreas


[Committed] IBM Z: TPF: Add cc clobber to profiling expanders

2021-09-22 Thread Andreas Krebbel via Gcc-patches
The code sequence emitted uses CC internally.

gcc/ChangeLog:

* config/s390/tpf.md (prologue_tpf, epilogue_tpf): Add cc clobber.
---
 gcc/config/s390/tpf.md | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/config/s390/tpf.md b/gcc/config/s390/tpf.md
index 297e9d1f755..35b37190705 100644
--- a/gcc/config/s390/tpf.md
+++ b/gcc/config/s390/tpf.md
@@ -21,7 +21,8 @@ (define_insn "prologue_tpf"
   [(unspec_volatile [(match_operand 0 "const_int_operand" "J")
 (match_operand 1 "const_int_operand" "J")]
UNSPECV_TPF_PROLOGUE)
-   (clobber (reg:DI 1))]
+   (clobber (reg:DI 1))
+   (clobber (reg:CC CC_REGNUM))]
   "TARGET_TPF_PROFILING"
   "larl\t%%r1,.+14\;tm\t%0,255\;bnz\t%1"
   [(set_attr "length"   "14")])
@@ -31,7 +32,8 @@ (define_insn "epilogue_tpf"
   [(unspec_volatile [(match_operand 0 "const_int_operand" "J")
 (match_operand 1 "const_int_operand" "J")]
UNSPECV_TPF_EPILOGUE)
-   (clobber (reg:DI 1))]
+   (clobber (reg:DI 1))
+   (clobber (reg:CC CC_REGNUM))]
   "TARGET_TPF_PROFILING"
   "larl\t%%r1,.+14\;tm\t%0,255\;bnz\t%1"
   [(set_attr "length"   "14")])
-- 
2.31.1



[Committed] IBM Z: Fix PR102222

2021-09-22 Thread Andreas Krebbel via Gcc-patches
Avoid emitting a strict low part move if the insv target actually
affects the whole target reg.

Bootstrapped and regression tested on s390x.

gcc/ChangeLog:

PR target/10
* config/s390/s390.c (s390_expand_insv): Emit a normal move if it
is actually a full copy of the source operand into the target.
Don't emit a strict low part move if source and target mode match.

gcc/testsuite/ChangeLog:

* gcc.target/s390/pr10.c: New test.
---
 gcc/config/s390/s390.c   | 10 ++
 gcc/testsuite/gcc.target/s390/pr10.c | 16 
 2 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/pr10.c

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 54dd6332c3a..e04385451cf 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -6414,6 +6414,15 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
   if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
 return false;
 
+  /* Just a move.  */
+  if (bitpos == 0
+  && bitsize == GET_MODE_BITSIZE (GET_MODE (src))
+  && mode == GET_MODE (src))
+{
+  emit_move_insn (dest, src);
+  return true;
+}
+
   /* Generate INSERT IMMEDIATE (IILL et al).  */
   /* (set (ze (reg)) (const_int)).  */
   if (TARGET_ZARCH
@@ -6510,6 +6519,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
   && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
   && MEM_P (src)
   && (mode == DImode || mode == SImode)
+  && mode != smode
   && register_operand (dest, mode))
 {
   /* Emit a strict_low_part pattern if possible.  */
diff --git a/gcc/testsuite/gcc.target/s390/pr10.c 
b/gcc/testsuite/gcc.target/s390/pr10.c
new file mode 100644
index 000..47d075e47fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/pr10.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m31 -mesa" } */
+
+struct squashfs_reg_inode_header_1 read_inode_inode;
+
+int read_inode_val;
+
+struct squashfs_reg_inode_header_1
+{
+  int file_size:32;
+} __attribute__((packed)) read_inode ();
+
+void foo (void)
+{
+  read_inode_inode.file_size = read_inode_val;
+}
-- 
2.31.1



[PATCH 1/5] IBM Z: Get rid of vec merge unspec

2021-07-29 Thread Andreas Krebbel via Gcc-patches
This patch gets rid of the unspecs we were using for the vector merge
instruction and replaces it with generic rtx.

gcc/ChangeLog:

* config/s390/s390-modes.def: Add more vector modes to support
concatenation of two vectors.
* config/s390/s390-protos.h (s390_expand_merge_perm_const): Add
prototype.
(s390_expand_merge): Likewise.
* config/s390/s390.c (s390_expand_merge_perm_const): New function.
(s390_expand_merge): New function.
* config/s390/s390.md (UNSPEC_VEC_MERGEH, UNSPEC_VEC_MERGEL):
Remove constant definitions.
* config/s390/vector.md (V_HW_2): Add mode iterators.
(VI_HW_4, V_HW_4): Rename VI_HW_4 to V_HW_4.
(vec_2x_nelts, vec_2x_wide): New mode attributes.
(*vmrhb, *vmrlb, *vmrhh, *vmrlh, *vmrhf, *vmrlf, *vmrhg, *vmrlg):
New pattern definitions.
(vec_widen_umult_lo_, vec_widen_umult_hi_)
(vec_widen_smult_lo_, vec_widen_smult_hi_)
(vec_unpacks_lo_v4sf, vec_unpacks_hi_v4sf, vec_unpacks_lo_v2df)
(vec_unpacks_hi_v2df): Adjust expanders to emit non-unspec RTX for
vec merge.
* config/s390/vx-builtins.md (V_HW_4): Remove mode iterator. Now
in vector.md.
(vec_mergeh, vec_mergel): Use s390_expand_merge to
emit vec merge pattern.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/long-double-asm-in-out-hard-fp-reg.c:
Instead of vpdi with 0 and 5 vmrlg and vmrhg are used now.
* gcc.target/s390/vector/long-double-asm-inout-hard-fp-reg.c: Likewise.
* gcc.target/s390/zvector/vec-types.h: New test.
* gcc.target/s390/zvector/vec_merge.c: New test.
---
 gcc/config/s390/s390-modes.def|  11 +-
 gcc/config/s390/s390-protos.h |   2 +
 gcc/config/s390/s390.c|  36 
 gcc/config/s390/s390.md   |   2 -
 gcc/config/s390/vector.md | 204 +++---
 gcc/config/s390/vx-builtins.md|  35 ++-
 .../long-double-asm-in-out-hard-fp-reg.c  |   8 +-
 .../long-double-asm-inout-hard-fp-reg.c   |   6 +-
 .../gcc.target/s390/zvector/vec-types.h   |  37 
 .../gcc.target/s390/zvector/vec_merge.c   |  88 
 10 files changed, 367 insertions(+), 62 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-types.h
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec_merge.c

diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def
index 6d814fc490c..245c2b811d4 100644
--- a/gcc/config/s390/s390-modes.def
+++ b/gcc/config/s390/s390-modes.def
@@ -259,14 +259,17 @@ CC_MODE (CCVFANY);
 
 /* Vector modes.  */
 
-VECTOR_MODES (INT, 2);/* V2QI */
-VECTOR_MODES (INT, 4);/*V4QI V2HI */
-VECTOR_MODES (INT, 8);/*   V8QI V4HI V2SI */
-VECTOR_MODES (INT, 16);   /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (INT, 2);/*   V2QI */
+VECTOR_MODES (INT, 4);/*  V4QI V2HI */
+VECTOR_MODES (INT, 8);/* V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);   /*   V16QI V8HI V4SI V2DI */
+VECTOR_MODES (INT, 32);   /* V32QI V16HI V8SI V4DI V2TI */
 
 VECTOR_MODE (FLOAT, SF, 2);   /* V2SF */
 VECTOR_MODE (FLOAT, SF, 4);   /* V4SF */
+VECTOR_MODE (FLOAT, SF, 8);   /* V8SF */
 VECTOR_MODE (FLOAT, DF, 2);   /* V2DF */
+VECTOR_MODE (FLOAT, DF, 4);   /* V4DF */
 
 VECTOR_MODE (INT, QI, 1); /* V1QI */
 VECTOR_MODE (INT, HI, 1); /* V1HI */
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 289e018cf0f..4b03c6e99f5 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -122,6 +122,8 @@ extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, 
rtx, rtx, bool);
 extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
 extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
 extern void s390_expand_vec_init (rtx, rtx);
+extern rtx s390_expand_merge_perm_const (machine_mode, bool);
+extern void s390_expand_merge (rtx, rtx, rtx, bool);
 extern rtx s390_build_signbit_mask (machine_mode);
 extern rtx s390_return_addr_rtx (int, rtx);
 extern rtx s390_back_chain_rtx (void);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index b1d3b99784d..b1a9ca9d8aa 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -7014,6 +7014,42 @@ s390_expand_vec_init (rtx target, rtx vals)
 }
 }
 
+/* Return a parallel of constant integers to be used as permutation
+   vector for a vector merge operation in MODE.  If HIGH_P is true the
+   left-most elements of the source vectors are merged otherwise the
+   right-most elements.  */
+rtx
+s390_expand_merge_perm_const (machine_mode mode, bool high_p)
+{
+  int nelts = GET_MODE_NUNITS (mode);
+  rtx perm[16];
+  int addend = high_p ? 0 : nelts;
+
+  for (int i = 0; i < nelts; i++)
+

[PATCH 3/5] IBM Z: Remove redundant V_HW_64 mode iterator.

2021-07-29 Thread Andreas Krebbel via Gcc-patches
gcc/ChangeLog:

* config/s390/vector.md (V_HW_64): Remove mode iterator.
(*vec_load_pair): Use V_HW_2 instead of V_HW_64.
* config/s390/vx-builtins.md
(vec_scatter_element_SI): Use V_HW_2 instead of
V_HW_64.
---
 gcc/config/s390/vector.md  |  7 +++
 gcc/config/s390/vx-builtins.md | 14 +++---
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 6a6370b5275..b372bf171f7 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -36,7 +36,6 @@ (define_mode_iterator V_HW  [V16QI V8HI V4SI V2DI (V1TI 
"TARGET_VXE") V2DF
 (define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")
 (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
 
-(define_mode_iterator V_HW_64 [V2DI V2DF])
 (define_mode_iterator VT_HW_HSDT [V8HI V4SI V4SF V2DI V2DF V1TI V1TF TI TF])
 (define_mode_iterator V_HW_HSD [V8HI V4SI (V4SF "TARGET_VXE") V2DI V2DF])
 
@@ -1972,9 +1971,9 @@ (define_expand "vec_cmp"
 })
 
 (define_insn "*vec_load_pair"
-  [(set (match_operand:V_HW_64   0 "register_operand" 
"=v,v")
-   (vec_concat:V_HW_64 (match_operand: 1 "register_operand"  
"d,v")
-   (match_operand: 2 "register_operand"  
"d,v")))]
+  [(set (match_operand:V_HW_2   0 "register_operand" 
"=v,v")
+   (vec_concat:V_HW_2 (match_operand: 1 "register_operand"  "d,v")
+  (match_operand: 2 "register_operand"  
"d,v")))]
   "TARGET_VX"
   "@
vlvgp\t%v0,%1,%2
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 3799e833187..3e7b8541887 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -452,17 +452,17 @@ (define_insn "vec_scatter_element_DI"
 
 ; A 31 bit target address is generated from 64 bit elements
 ; vsceg
-(define_insn "vec_scatter_element_SI"
+(define_insn "vec_scatter_element_SI"
   [(set (mem:
 (plus:SI (subreg:SI
-  (unspec: [(match_operand:V_HW_64 1 
"register_operand"   "v")
- (match_operand:QI  3 
"const_mask_operand" "C")]
+  (unspec: [(match_operand:V_HW_2 1 
"register_operand"   "v")
+ (match_operand:QI 3 
"const_mask_operand" "C")]
 UNSPEC_VEC_EXTRACT) 4)
- (match_operand:SI  2 
"address_operand"   "ZQ")))
-   (unspec: [(match_operand:V_HW_640 
"register_operand"   "v")
+ (match_operand:SI 2 
"address_operand"   "ZQ")))
+   (unspec: [(match_operand:V_HW_20 
"register_operand"   "v")
   (match_dup 3)] UNSPEC_VEC_EXTRACT))]
-  "TARGET_VX && !TARGET_64BIT && UINTVAL (operands[3]) < GET_MODE_NUNITS 
(mode)"
-  "vsce\t%v0,%O2(%v1,%R2),%3"
+  "TARGET_VX && !TARGET_64BIT && UINTVAL (operands[3]) < GET_MODE_NUNITS 
(mode)"
+  "vsce\t%v0,%O2(%v1,%R2),%3"
   [(set_attr "op_type" "VRV")])
 
 ; Element size and target address size is the same
-- 
2.31.1



[PATCH 2/5] IBM Z: Get rid of vpdi unspec

2021-07-29 Thread Andreas Krebbel via Gcc-patches
The patch gets rid of the unspec used for the vector permute double
immediate instruction and replaces it with generic rtx.

gcc/ChangeLog:

* config/s390/s390.md (UNSPEC_VEC_PERMI): Remove constant
definition.
* config/s390/vector.md (*vpdi1, *vpdi4): New pattern
definitions.
* config/s390/vx-builtins.md (*vec_permi): Emit generic rtx
instead of an unspec.

gcc/testsuite/ChangeLog:

* gcc.target/s390/zvector/vec-permi.c: Removed.
* gcc.target/s390/zvector/vec_permi.c: New test.
---
 gcc/config/s390/s390.md   |  1 -
 gcc/config/s390/vector.md | 26 
 gcc/config/s390/vx-builtins.md| 26 +++-
 .../gcc.target/s390/zvector/vec-permi.c   | 54 ---
 .../gcc.target/s390/zvector/vec_permi.c   | 66 +++
 5 files changed, 102 insertions(+), 71 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-permi.c
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec_permi.c

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index d896faee0fb..1b894a926ce 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -166,7 +166,6 @@ (define_c_enum "unspec" [
UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC
UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC
UNSPEC_VEC_PERM
-   UNSPEC_VEC_PERMI
UNSPEC_VEC_EXTEND
UNSPEC_VEC_STORE_LEN
UNSPEC_VEC_STORE_LEN_R
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 7507aec1c8e..6a6370b5275 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -767,6 +767,32 @@ (define_insn "*vec_perm"
   "vperm\t%v0,%v1,%v2,%v3"
   [(set_attr "op_type" "VRR")])
 
+
+; First DW of op1 and second DW of op2
+(define_insn "*vpdi1"
+  [(set (match_operand:V_HW_2   0 "register_operand" "=v")
+   (vec_select:V_HW_2
+(vec_concat:
+ (match_operand:V_HW_2 1 "register_operand"  "v")
+ (match_operand:V_HW_2 2 "register_operand"  "v"))
+(parallel [(const_int 0) (const_int 3)])))]
+  "TARGET_VX"
+  "vpdi\t%v0,%v1,%v2,1"
+  [(set_attr "op_type" "VRR")])
+
+; Second DW of op1 and first of op2
+(define_insn "*vpdi4"
+  [(set (match_operand:V_HW_2   0 "register_operand" "=v")
+   (vec_select:V_HW_2
+(vec_concat:
+ (match_operand:V_HW_2 1 "register_operand"  "v")
+ (match_operand:V_HW_2 2 "register_operand"  "v"))
+(parallel [(const_int 1) (const_int 2)])))]
+  "TARGET_VX"
+  "vpdi\t%v0,%v1,%v2,4"
+  [(set_attr "op_type" "VRR")])
+
+
 (define_insn "*vmrhb"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 (vec_select:V16QI
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 5abe43b9e53..3799e833187 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -403,28 +403,22 @@ (define_insn "vec_zperm"
   "vperm\t%v0,%v1,%v2,%v3"
   [(set_attr "op_type" "VRR")])
 
+; Incoming op3 is in vec_permi format and will we turned into a
+; permute vector consisting of op3 and op4.
 (define_expand "vec_permi"
-  [(set (match_operand:V_HW_64  0 "register_operand"   "")
-   (unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand"   "")
-(match_operand:V_HW_64 2 "register_operand"   "")
-(match_operand:QI  3 "const_mask_operand" "")]
-   UNSPEC_VEC_PERMI))]
+  [(set (match_operand:V_HW_2   0 "register_operand" "")
+   (vec_select:V_HW_2
+(vec_concat:
+ (match_operand:V_HW_2 1 "register_operand" "")
+ (match_operand:V_HW_2 2 "register_operand" ""))
+(parallel [(match_operand:QI 3 "const_mask_operand" "") (match_dup 
4)])))]
   "TARGET_VX"
 {
   HOST_WIDE_INT val = INTVAL (operands[3]);
-  operands[3] = GEN_INT ((val & 1) | (val & 2) << 1);
+  operands[3] = GEN_INT ((val & 2) >> 1);
+  operands[4] = GEN_INT ((val & 1) + 2);
 })
 
-(define_insn "*vec_permi"
-  [(set (match_operand:V_HW_64  0 "register_operand"  "=v")
-   (unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand"   "v")
-(match_operand:V_HW_64 2 "register_operand"   "v")
-(match_operand:QI  3 "const_mask_operand" "C")]
-   UNSPEC_VEC_PERMI))]
-  "TARGET_VX && (UINTVAL (operands[3]) & 10) == 0"
-  "vpdi\t%v0,%v1,%v2,%b3"
-  [(set_attr "op_type" "VRR")])
-
 
 ; Vector replicate
 
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c 
b/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c
deleted file mode 100644
index c0a852b9703..000
--- a/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O3 -march=z13 -mzarch --save-temps" } */
-/* { dg-do run { target { s390_z13_hw } } } */
-
-/*
- * The vector intrinsic vec_permi(a, b, c) chooses one of the two 

[PATCH 0/5] IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST

2021-07-29 Thread Andreas Krebbel via Gcc-patches
This patchset, after some prep work, provides an initial
implementation of the TARGET_VECTORIZE_VEC_PERM_CONST hook for IBM Z.
Only the vmrh, vmrl, and vpdi instruction are exploited so far.  More
instructions will be added with follow-on patches.

Bootstrapped and regression tested on s390x.

As expected various occurrences of the vperm instruction get replaced
with vmr* and vpdi.

I'll commit the patches after giving it a few days for comments.

Andreas Krebbel (5):
  IBM Z: Get rid of vec merge unspec
  IBM Z: Get rid of vpdi unspec
  IBM Z: Remove redundant V_HW_64 mode iterator.
  IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST for vector merge
  IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST for vpdi

 gcc/config/s390/s390-modes.def|  11 +-
 gcc/config/s390/s390-protos.h |   2 +
 gcc/config/s390/s390.c| 191 ++
 gcc/config/s390/s390.md   |   3 -
 gcc/config/s390/vector.md | 238 +++---
 gcc/config/s390/vx-builtins.md|  75 +++---
 .../long-double-asm-in-out-hard-fp-reg.c  |   8 +-
 .../long-double-asm-inout-hard-fp-reg.c   |   6 +-
 .../gcc.target/s390/vector/perm-merge.c   | 104 
 .../gcc.target/s390/vector/perm-vpdi.c|  49 
 .../gcc.target/s390/vector/vec-types.h|  35 +++
 .../gcc.target/s390/zvector/vec-permi.c   |  54 
 .../gcc.target/s390/zvector/vec-types.h   |  37 +++
 .../gcc.target/s390/zvector/vec_merge.c   |  88 +++
 .../gcc.target/s390/zvector/vec_permi.c   |  66 +
 15 files changed, 822 insertions(+), 145 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/perm-merge.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-types.h
 delete mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-permi.c
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-types.h
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec_merge.c
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec_permi.c

-- 
2.31.1



[PATCH 5/5] IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST for vpdi

2021-07-29 Thread Andreas Krebbel via Gcc-patches
This patch makes use of the vector permute double immediate
instruction for constant permute vectors.

gcc/ChangeLog:

* config/s390/s390.c (expand_perm_with_vpdi): New function.
(vectorize_vec_perm_const_1): Call expand_perm_with_vpdi.
* config/s390/vector.md (*vpdi1, @vpdi1): Enable a
parameterized expander.
(*vpdi4, @vpdi4): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/perm-vpdi.c: New test.
---
 gcc/config/s390/s390.c| 47 ++
 gcc/config/s390/vector.md |  5 +-
 .../gcc.target/s390/vector/perm-vpdi.c| 49 +++
 3 files changed, 98 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 684241b00b8..20c52c83c72 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -16981,6 +16981,50 @@ expand_perm_with_merge (const struct expand_vec_perm_d 
)
   return merge_lo_p || merge_hi_p;
 }
 
+/* Try to expand the vector permute operation described by D using the
+   vector permute doubleword immediate instruction vpdi.  Return true
+   if vpdi could be used.
+
+   VPDI allows 4 different immediate values (0, 1, 4, 5). The 0 and 5
+   cases are covered by vmrhg and vmrlg already.  So we only care
+   about the 1, 4 cases here.
+   1 - First element of src1 and second of src2
+   4 - Second element of src1 and first of src2  */
+static bool
+expand_perm_with_vpdi (const struct expand_vec_perm_d )
+{
+  bool vpdi1_p = false;
+  bool vpdi4_p = false;
+  rtx op0_reg, op1_reg;
+
+  // Only V2DI and V2DF are supported here.
+  if (d.nelt != 2)
+return false;
+
+  if (d.perm[0] == 0 && d.perm[1] == 3)
+vpdi1_p = true;
+
+  if (d.perm[0] == 1 && d.perm[1] == 2)
+vpdi4_p = true;
+
+  if (!vpdi1_p && !vpdi4_p)
+return false;
+
+  if (d.testing_p)
+return true;
+
+  op0_reg = force_reg (GET_MODE (d.op0), d.op0);
+  op1_reg = force_reg (GET_MODE (d.op1), d.op1);
+
+  if (vpdi1_p)
+emit_insn (gen_vpdi1 (d.vmode, d.target, op0_reg, op1_reg));
+
+  if (vpdi4_p)
+emit_insn (gen_vpdi4 (d.vmode, d.target, op0_reg, op1_reg));
+
+  return true;
+}
+
 /* Try to find the best sequence for the vector permute operation
described by D.  Return true if the operation could be
expanded.  */
@@ -16990,6 +17034,9 @@ vectorize_vec_perm_const_1 (const struct 
expand_vec_perm_d )
   if (expand_perm_with_merge (d))
 return true;
 
+  if (expand_perm_with_vpdi (d))
+return true;
+
   return false;
 }
 
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index b372bf171f7..1b0ae47ab49 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -768,7 +768,7 @@ (define_insn "*vec_perm"
 
 
 ; First DW of op1 and second DW of op2
-(define_insn "*vpdi1"
+(define_insn "@vpdi1"
   [(set (match_operand:V_HW_2   0 "register_operand" "=v")
(vec_select:V_HW_2
 (vec_concat:
@@ -780,7 +780,7 @@ (define_insn "*vpdi1"
   [(set_attr "op_type" "VRR")])
 
 ; Second DW of op1 and first of op2
-(define_insn "*vpdi4"
+(define_insn "@vpdi4"
   [(set (match_operand:V_HW_2   0 "register_operand" "=v")
(vec_select:V_HW_2
 (vec_concat:
@@ -926,7 +926,6 @@ (define_insn_and_split "tf_to_fprx2"
   operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8);
 })
 
-; vec_perm_const for V2DI using vpdi?
 
 ;;
 ;; Vector integer arithmetic instructions
diff --git a/gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c 
b/gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c
new file mode 100644
index 000..cc925315b37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c
@@ -0,0 +1,49 @@
+/* { dg-do run { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */
+
+/* { dg-final { scan-assembler-times "\tvmrhg\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvmrlg\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvpdi\t" 6 } } */
+
+#include "vec-types.h"
+#include 
+
+#define GEN_PERMI_BITS(VEC_TYPE, BITS) \
+  VEC_TYPE __attribute__((noinline))   \
+  permi_##BITS##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) {  \
+return (VEC_TYPE){a[((BITS) & 2) >> 1], b[(BITS) & 1] }; }
+
+#define GEN_PERMI(VEC_TYPE)\
+  GEN_PERMI_BITS(VEC_TYPE, 0); \
+  GEN_PERMI_BITS(VEC_TYPE, 1); \
+  GEN_PERMI_BITS(VEC_TYPE, 2); \
+  GEN_PERMI_BITS(VEC_TYPE, 3); \
+
+GEN_PERMI(v2di)
+GEN_PERMI(uv2di)
+GEN_PERMI(v2df)
+
+
+#define CHECK_PERMI_BITS(VEC_TYPE, BITS)   \
+  VEC_TYPE r##BITS = permi_##BITS##_##VEC_TYPE (a, b); \
+  if (r##BITS[0] != ((BITS) & 2) >> 1  \
+  || r##BITS[1] != ((BITS) & 1) + 2)   \
+__builtin_abort();
+
+#define CHECK_PERMI(VEC_TYPE)  \
+  {  

[PATCH 4/5] IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST for vector merge

2021-07-29 Thread Andreas Krebbel via Gcc-patches
This patch implements the TARGET_VECTORIZE_VEC_PERM_CONST in the IBM Z
backend. The initial implementation only exploits the vector merge
instruction but there is more to come.

gcc/ChangeLog:

* config/s390/s390.c (MAX_VECT_LEN): Define macro.
(struct expand_vec_perm_d): Define struct.
(expand_perm_with_merge): New function.
(vectorize_vec_perm_const_1): New function.
(s390_vectorize_vec_perm_const): New function.
(TARGET_VECTORIZE_VEC_PERM_CONST): Define target macro.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/perm-merge.c: New test.
* gcc.target/s390/vector/vec-types.h: New test.
---
 gcc/config/s390/s390.c| 108 ++
 .../gcc.target/s390/vector/perm-merge.c   | 104 +
 .../gcc.target/s390/vector/vec-types.h|  35 ++
 3 files changed, 247 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/perm-merge.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-types.h

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index b1a9ca9d8aa..684241b00b8 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -16928,6 +16928,110 @@ s390_md_asm_adjust (vec , vec 
,
   return after_md_seq;
 }
 
+#define MAX_VECT_LEN   16
+
+struct expand_vec_perm_d
+{
+  rtx target, op0, op1;
+  unsigned char perm[MAX_VECT_LEN];
+  machine_mode vmode;
+  unsigned char nelt;
+  bool testing_p;
+};
+
+/* Try to expand the vector permute operation described by D using the
+   vector merge instructions vml and vmh.  Return true if vector merge
+   could be used.  */
+static bool
+expand_perm_with_merge (const struct expand_vec_perm_d )
+{
+  bool merge_lo_p = true;
+  bool merge_hi_p = true;
+
+  if (d.nelt % 2)
+return false;
+
+  // For V4SI this checks for: { 0, 4, 1, 5 }
+  for (int telt = 0; telt < d.nelt; telt++)
+if (d.perm[telt] != telt / 2 + (telt % 2) * d.nelt)
+  {
+   merge_hi_p = false;
+   break;
+  }
+
+  if (!merge_hi_p)
+{
+  // For V4SI this checks for: { 2, 6, 3, 7 }
+  for (int telt = 0; telt < d.nelt; telt++)
+   if (d.perm[telt] != (telt + d.nelt) / 2 + (telt % 2) * d.nelt)
+ {
+   merge_lo_p = false;
+   break;
+ }
+}
+  else
+merge_lo_p = false;
+
+  if (d.testing_p)
+return merge_lo_p || merge_hi_p;
+
+  if (merge_lo_p || merge_hi_p)
+s390_expand_merge (d.target, d.op0, d.op1, merge_hi_p);
+
+  return merge_lo_p || merge_hi_p;
+}
+
+/* Try to find the best sequence for the vector permute operation
+   described by D.  Return true if the operation could be
+   expanded.  */
+static bool
+vectorize_vec_perm_const_1 (const struct expand_vec_perm_d )
+{
+  if (expand_perm_with_merge (d))
+return true;
+
+  return false;
+}
+
+/* Return true if we can emit instructions for the constant
+   permutation vector in SEL.  If OUTPUT, IN0, IN1 are non-null the
+   hook is supposed to emit the required INSNs.  */
+
+bool
+s390_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx 
op1,
+  const vec_perm_indices )
+{
+  struct expand_vec_perm_d d;
+  unsigned char perm[MAX_VECT_LEN];
+  unsigned int i, nelt;
+
+  if (!s390_vector_mode_supported_p (vmode) || GET_MODE_SIZE (vmode) != 16)
+return false;
+
+  d.target = target;
+  d.op0 = op0;
+  d.op1 = op1;
+
+  d.vmode = vmode;
+  gcc_assert (VECTOR_MODE_P (d.vmode));
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = target == NULL_RTX;
+
+  gcc_assert (target == NULL_RTX || REG_P (target));
+  gcc_assert (sel.length () == nelt);
+  gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
+
+  for (i = 0; i < nelt; i++)
+{
+  unsigned char e = sel[i];
+  gcc_assert (e < 2 * nelt);
+  d.perm[i] = e;
+  perm[i] = e;
+}
+
+  return vectorize_vec_perm_const_1 (d);
+}
+
 /* Initialize GCC target structure.  */
 
 #undef  TARGET_ASM_ALIGNED_HI_OP
@@ -17238,6 +17342,10 @@ s390_md_asm_adjust (vec , vec 
,
 #undef TARGET_MD_ASM_ADJUST
 #define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
 
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-s390.h"
diff --git a/gcc/testsuite/gcc.target/s390/vector/perm-merge.c 
b/gcc/testsuite/gcc.target/s390/vector/perm-merge.c
new file mode 100644
index 000..51b23ddd886
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/perm-merge.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */
+/* { dg-do run { target { s390_z14_hw } } } */
+
+/* { dg-final { scan-assembler-times "\tvmrhb\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrlb\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrhh\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrlh\t" 2 } } */
+/* { dg-final { 

Re: [PATCH] IBM Z: Fix 5 tests in 31-bit mode

2021-07-28 Thread Andreas Krebbel via Gcc-patches
On 7/23/21 2:47 PM, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> 
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/global-array-element-pic2.c: Add -mzarch, add
>   an expectation for 31-bit mode.
>   * gcc.target/s390/load-imm64-1.c: Use unsigned long long.
>   * gcc.target/s390/load-imm64-2.c: Likewise.
>   * gcc.target/s390/vector/long-double-vx-macro-off-on.c: Use
>   -mzarch.
>   * gcc.target/s390/vector/long-double-vx-macro-on-off.c:
>   Likewise.

Ok. Thanks!

Andreas


Re: [PATCH] Adjust docu of TARGET_VECTORIZE_VEC_PERM_CONST

2021-07-28 Thread Andreas Krebbel via Gcc-patches
On 7/28/21 9:43 AM, Richard Biener wrote:
> On Wed, Jul 28, 2021 at 8:44 AM Andreas Krebbel via Gcc-patches
>  wrote:
>>
>> There are also memory operands passed for in0 and in1.
>>
>> Ok for mainline?
> 
> They can also be constant vectors, I'd just not specify the operand
> kind - usually
> expanders are not limited as to what they feed down.

Right, I'll just replace "registers" with "operands" then. Ok?

 also to emit such a permutation.  In the former case @var{in0}, @var{in1}\n\
 and @var{out} are all null.  In the latter case @var{in0} and @var{in1} are\n\
 the source vectors and @var{out} is the destination vector; all three are\n\
-registers of mode @var{mode}.  @var{in1} is the same as @var{in0} if\n\
+operands of mode @var{mode}.  @var{in1} is the same as @var{in0} if\n\
 @var{sel} describes a permutation on one vector instead of two.\n\
 \n\
 Return true if the operation is possible, emitting instructions for it\n\

Andreas


Re: [PATCH] IBM Z: Enable LSan and TSan

2021-07-28 Thread Andreas Krebbel via Gcc-patches
On 7/27/21 10:04 PM, Ilya Leoshkevich via Gcc-patches wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> libsanitizer/ChangeLog:
> 
>   * configure.tgt (s390*-*-linux*): Enable LSan and TSan for
>   s390x.

Ok. Thanks!

Andreas


[PATCH] Adjust docu of TARGET_VECTORIZE_VEC_PERM_CONST

2021-07-28 Thread Andreas Krebbel via Gcc-patches
There are also memory operands passed for in0 and in1.

Ok for mainline?

gcc/ChangeLog:

* target.def: Describe in0 and in1 as being either register or
memory operands.
* doc/tm.texi: Regenerate.
---
 gcc/doc/tm.texi | 7 ---
 gcc/target.def  | 7 ---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c8f4abe3e41..31f188daf00 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6124,9 +6124,10 @@ This hook is used to test whether the target can permute 
up to two
 vectors of mode @var{mode} using the permutation vector @code{sel}, and
 also to emit such a permutation.  In the former case @var{in0}, @var{in1}
 and @var{out} are all null.  In the latter case @var{in0} and @var{in1} are
-the source vectors and @var{out} is the destination vector; all three are
-registers of mode @var{mode}.  @var{in1} is the same as @var{in0} if
-@var{sel} describes a permutation on one vector instead of two.
+the source vectors and @var{out} is the destination vector.  The destination
+vector is a register of mode @var{mode} while the source vectors can be either
+register or memory operands of mode @var{mode}.  @var{in1} is the same as
+@var{in0} if @var{sel} describes a permutation on one vector instead of two.
 
 Return true if the operation is possible, emitting instructions for it
 if rtxes are provided.
diff --git a/gcc/target.def b/gcc/target.def
index 2e40448e6c5..b368d81be63 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1860,9 +1860,10 @@ DEFHOOK
 vectors of mode @var{mode} using the permutation vector @code{sel}, and\n\
 also to emit such a permutation.  In the former case @var{in0}, @var{in1}\n\
 and @var{out} are all null.  In the latter case @var{in0} and @var{in1} are\n\
-the source vectors and @var{out} is the destination vector; all three are\n\
-registers of mode @var{mode}.  @var{in1} is the same as @var{in0} if\n\
-@var{sel} describes a permutation on one vector instead of two.\n\
+the source vectors and @var{out} is the destination vector.  The destination\n\
+vector is a register of mode @var{mode} while the source vectors can be 
either\n\
+register or memory operands of mode @var{mode}.  @var{in1} is the same as\n\
+@var{in0} if @var{sel} describes a permutation on one vector instead of two.\n\
 \n\
 Return true if the operation is possible, emitting instructions for it\n\
 if rtxes are provided.\n\
-- 
2.31.1



  1   2   3   >