[gcc r15-1368] x86: Emit cvtne2ps2bf16 for odd increasing perm in __builtin_shufflevector
https://gcc.gnu.org/g:6d0b7b69d143025f271d0041cfa29cf26e6c343b commit r15-1368-g6d0b7b69d143025f271d0041cfa29cf26e6c343b Author: Levy Hsu Date: Thu Jun 13 15:20:04 2024 +0930 x86: Emit cvtne2ps2bf16 for odd increasing perm in __builtin_shufflevector This patch updates the GCC x86 backend to efficiently handle odd, incrementally increasing permutations of BF16 vectors using the cvtne2ps2bf16 instruction. It modifies ix86_vectorize_vec_perm_const to support these operations and adds a specific predicate to ensure proper sequence handling. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_vectorize_vec_perm_const): Convert BF to HI using subreg. * config/i386/predicates.md (vcvtne2ps2bf_parallel): New define_insn_and_split. * config/i386/sse.md (vpermt2_sepcial_bf16_shuffle_): New predicates matches odd increasing perm. gcc/testsuite/ChangeLog: * gcc.target/i386/vpermt2-special-bf16-shufflue.c: New test. Diff: --- gcc/config/i386/i386-expand.cc | 4 +-- gcc/config/i386/predicates.md | 11 +++ gcc/config/i386/sse.md | 35 ++ .../i386/vpermt2-special-bf16-shufflue.c | 27 + 4 files changed, 75 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index a4379b863170..7c6a82ee6a2b 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -23657,8 +23657,8 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512) return false; - /* For HF mode vector, convert it to HI using subreg. */ - if (GET_MODE_INNER (vmode) == HFmode) + /* For HF and BF mode vector, convert it to HI using subreg. */ + if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode) { machine_mode orig_mode = vmode; vmode = mode_for_vector (HImode, diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 7afe3100cb7e..1676c50de711 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -2322,3 +2322,14 @@ return true; }) + +;; Check that each element is odd and incrementally increasing from 1 +(define_predicate "vcvtne2ps2bf_parallel" + (and (match_code "const_vector") + (match_code "const_int" "a")) +{ + for (int i = 0; i < XVECLEN (op, 0); ++i) +if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1)) + return false; + return true; +}) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 680a46a0b08a..5ddd1c0a778c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -30698,3 +30698,38 @@ "TARGET_AVXVNNIINT16" "vpdp\t{%3, %2, %0|%0, %2, %3}" [(set_attr "prefix" "vex")]) + +(define_mode_attr hi_cvt_bf + [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")]) + +(define_mode_attr HI_CVT_BF + [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")]) + +(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_" + [(set (match_operand:VI2_AVX512F 0 "register_operand") + (unspec:VI2_AVX512F + [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel") + (match_operand:VI2_AVX512F 2 "register_operand") + (match_operand:VI2_AVX512F 3 "nonimmediate_operand")] + UNSPEC_VPERMT2))] + "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx op0 = gen_reg_rtx (mode); + operands[2] = lowpart_subreg (mode, + force_reg (mode, operands[2]), + mode); + operands[3] = lowpart_subreg (mode, + force_reg (mode, operands[3]), + mode); + + emit_insn (gen_avx512f_cvtne2ps2bf16_(op0, + operands[3], + operands[2])); + emit_move_insn (operands[0], lowpart_subreg (mode, op0, + mode)); + DONE; +} +[(set_attr "mode" "")]) diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c new file mode 100755 index ..5c65f2a98847 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */ +/* { dg-final { scan-assembler-not "vpermi2b" } } */ +/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */ + +typedef __bf16 v8bf __attribute__((vector_size(16))); +typedef __bf16 v16bf __attribute__((vector_size(32))); +typedef __bf16 v32bf __attribute__((vector_size(64))); + +v8bf foo0(v8bf a, v8bf b) +{ + return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15); +} + +v16bf foo
[gcc r14-10317] s390: testsuite: Fix ifcvt-one-insn-bool.c
https://gcc.gnu.org/g:0ed63e3791345a9933cbbf28594ab5549d336bd4 commit r14-10317-g0ed63e3791345a9933cbbf28594ab5549d336bd4 Author: Stefan Schulze Frielinghaus Date: Mon Jun 17 08:52:28 2024 +0200 s390: testsuite: Fix ifcvt-one-insn-bool.c With the change of r15-787-g57e04879389f9c I forgot to also update this test. gcc/testsuite/ChangeLog: * gcc.target/s390/ifcvt-one-insn-bool.c: Fix loc. (cherry picked from commit ac66736bf2f8a10d2f43e83ed6377e4179027a39) Diff: --- gcc/testsuite/gcc.target/s390/ifcvt-one-insn-bool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-one-insn-bool.c b/gcc/testsuite/gcc.target/s390/ifcvt-one-insn-bool.c index 0c8c2f879a69..4ae29dbd6b61 100644 --- a/gcc/testsuite/gcc.target/s390/ifcvt-one-insn-bool.c +++ b/gcc/testsuite/gcc.target/s390/ifcvt-one-insn-bool.c @@ -3,7 +3,7 @@ /* { dg-do compile { target { s390*-*-* } } } */ /* { dg-options "-O2 -march=z13 -mzarch" } */ -/* { dg-final { scan-assembler "lochinh\t%r.?,1" } } */ +/* { dg-final { scan-assembler "lochile\t%r.?,1" } } */ #include int foo (int *a, unsigned int n)
[gcc r14-10316] s390: Implement TARGET_NOCE_CONVERSION_PROFITABLE_P [PR109549]
https://gcc.gnu.org/g:8f124e6b79daa43618dbb1e67c09629676d07396 commit r14-10316-g8f124e6b79daa43618dbb1e67c09629676d07396 Author: Stefan Schulze Frielinghaus Date: Mon Jun 17 08:52:20 2024 +0200 s390: Implement TARGET_NOCE_CONVERSION_PROFITABLE_P [PR109549] Consider a NOCE conversion as profitable if there is at least one conditional move. gcc/ChangeLog: PR target/109549 * config/s390/s390.cc (TARGET_NOCE_CONVERSION_PROFITABLE_P): Define. (s390_noce_conversion_profitable_p): Implement. gcc/testsuite/ChangeLog: * gcc.target/s390/ccor.c: Order of loads are reversed, now, as a consequence the condition has to be reversed. (cherry picked from commit 57e04879389f9c0d5d53f316b468ce1bddbab350) Diff: --- gcc/config/s390/s390.cc | 32 gcc/testsuite/gcc.target/s390/ccor.c | 4 ++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 5968808fcb6e..fa517bd3e77a 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -78,6 +78,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "context.h" #include "builtins.h" +#include "ifcvt.h" #include "rtl-iter.h" #include "intl.h" #include "tm-constrs.h" @@ -18037,6 +18038,34 @@ s390_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, return vectorize_vec_perm_const_1 (d); } +/* Consider a NOCE conversion as profitable if there is at least one + conditional move. */ + +static bool +s390_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) +{ + if (if_info->speed_p) +{ + for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) + { + rtx set = single_set (insn); + if (set == NULL) + continue; + if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE) + continue; + rtx src = SET_SRC (set); + machine_mode mode = GET_MODE (src); + if (GET_MODE_CLASS (mode) != MODE_INT + && GET_MODE_CLASS (mode) != MODE_FLOAT) + continue; + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + continue; + return true; + } +} + return default_noce_conversion_profitable_p (seq, if_info); +} + /* Initialize GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP @@ -18350,6 +18379,9 @@ s390_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, #undef TARGET_VECTORIZE_VEC_PERM_CONST #define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const +#undef TARGET_NOCE_CONVERSION_PROFITABLE_P +#define TARGET_NOCE_CONVERSION_PROFITABLE_P s390_noce_conversion_profitable_p + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h" diff --git a/gcc/testsuite/gcc.target/s390/ccor.c b/gcc/testsuite/gcc.target/s390/ccor.c index 31f30f60314e..36a3c3a999a9 100644 --- a/gcc/testsuite/gcc.target/s390/ccor.c +++ b/gcc/testsuite/gcc.target/s390/ccor.c @@ -42,7 +42,7 @@ GENFUN1(2) GENFUN1(3) -/* { dg-final { scan-assembler {locrno} } } */ +/* { dg-final { scan-assembler {locro} } } */ GENFUN2(0,1) @@ -58,7 +58,7 @@ GENFUN2(0,3) GENFUN2(1,2) -/* { dg-final { scan-assembler {locrnlh} } } */ +/* { dg-final { scan-assembler {locrlh} } } */ GENFUN2(1,3)
[gcc r15-1367] s390: Delete mistakenly added tests
https://gcc.gnu.org/g:e86d4e4ac7d7438f2f1b2437508cfd394a0a34d9 commit r15-1367-ge86d4e4ac7d7438f2f1b2437508cfd394a0a34d9 Author: Stefan Schulze Frielinghaus Date: Mon Jun 17 08:46:38 2024 +0200 s390: Delete mistakenly added tests gcc/testsuite/ChangeLog: * gcc.target/s390/vector/vgm-df-1.c: Removed. * gcc.target/s390/vector/vgm-di-1.c: Removed. * gcc.target/s390/vector/vgm-hi-1.c: Removed. * gcc.target/s390/vector/vgm-int128-1.c: Removed. * gcc.target/s390/vector/vgm-longdouble-1.c: Removed. * gcc.target/s390/vector/vgm-qi-1.c: Removed. * gcc.target/s390/vector/vgm-sf-1.c: Removed. * gcc.target/s390/vector/vgm-si-1.c: Removed. * gcc.target/s390/vector/vgm-ti-1.c: Removed. Diff: --- gcc/testsuite/gcc.target/s390/vector/vgm-df-1.c| 30 --- gcc/testsuite/gcc.target/s390/vector/vgm-di-1.c| 102 -- gcc/testsuite/gcc.target/s390/vector/vgm-hi-1.c| 212 .../gcc.target/s390/vector/vgm-int128-1.c | 64 --- .../gcc.target/s390/vector/vgm-longdouble-1.c | 55 -- gcc/testsuite/gcc.target/s390/vector/vgm-qi-1.c| 213 - gcc/testsuite/gcc.target/s390/vector/vgm-sf-1.c| 43 - gcc/testsuite/gcc.target/s390/vector/vgm-si-1.c| 146 -- gcc/testsuite/gcc.target/s390/vector/vgm-ti-1.c| 63 -- 9 files changed, 928 deletions(-) diff --git a/gcc/testsuite/gcc.target/s390/vector/vgm-df-1.c b/gcc/testsuite/gcc.target/s390/vector/vgm-df-1.c deleted file mode 100644 index 07aa6b9deece.. --- a/gcc/testsuite/gcc.target/s390/vector/vgm-df-1.c +++ /dev/null @@ -1,30 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -march=z13 -mzarch" } */ -/* { dg-final { check-function-bodies "**" "" "" } } */ - -typedef double v1df __attribute__ ((vector_size (8))); -typedef double v2df __attribute__ ((vector_size (16))); - -/* -** test_v1df_via_vgmb: -** vgmb%v24,0,1 -** br %r14 -*/ - -v1df -test_v1df_via_vgmb (void) -{ - return (v1df){-8577.505882352939806878566741943359375}; -} - -/* -** test_v2df_via_vgmb: -** vgmb%v24,0,1 -** br %r14 -*/ - -v2df -test_v2df_via_vgmb (void) -{ - return (v2df){-8577.505882352939806878566741943359375, -8577.505882352939806878566741943359375}; -} diff --git a/gcc/testsuite/gcc.target/s390/vector/vgm-di-1.c b/gcc/testsuite/gcc.target/s390/vector/vgm-di-1.c deleted file mode 100644 index fa608f2b5ae8.. --- a/gcc/testsuite/gcc.target/s390/vector/vgm-di-1.c +++ /dev/null @@ -1,102 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -march=z13 -mzarch" } */ -/* { dg-final { check-function-bodies "**" "" "" } } */ - -typedef long long v1di __attribute__ ((vector_size (8))); -typedef long long v2di __attribute__ ((vector_size (16))); - -/* -** test_v1di_via_vgmb: -** vgmb%v24,0,2 -** br %r14 -*/ - -v1di -test_v1di_via_vgmb (void) -{ - return (v1di){0xe0e0e0e0e0e0e0e0}; -} - -/* -** test_v2di_via_vgmb: -** vgmb%v24,0,2 -** br %r14 -*/ - -v2di -test_v2di_via_vgmb (void) -{ - return (v2di){0xe0e0e0e0e0e0e0e0, 0xe0e0e0e0e0e0e0e0}; -} - -/* -** test_v1di_via_vgmb_wrap: -** vgmb%v24,5,2 -** br %r14 -*/ - -v1di -test_v1di_via_vgmb_wrap (void) -{ - return (v1di){0xe7e7e7e7e7e7e7e7}; -} - -/* -** test_v2di_via_vgmb_wrap: -** vgmb%v24,5,2 -** br %r14 -*/ - -v2di -test_v2di_via_vgmb_wrap (void) -{ - return (v2di){0xe7e7e7e7e7e7e7e7, 0xe7e7e7e7e7e7e7e7}; -} - -/* -** test_v1di_via_vgmh: -** vgmh%v24,5,10 -** br %r14 -*/ - -v1di -test_v1di_via_vgmh (void) -{ - return (v1di){0x7e007e007e007e0}; -} - -/* -** test_v2di_via_vgmh: -** vgmh%v24,5,10 -** br %r14 -*/ - -v2di -test_v2di_via_vgmh (void) -{ - return (v2di){0x7e007e007e007e0, 0x7e007e007e007e0}; -} - -/* -** test_v1di_via_vgmg: -** vgmg%v24,17,46 -** br %r14 -*/ - -v1di -test_v1di_via_vgmg (void) -{ - return (v1di){0x7ffe}; -} - -/* -** test_v2di_via_vgmg: -** vgmg%v24,17,46 -** br %r14 -*/ - -v2di -test_v2di_via_vgmg (void) -{ - return (v2di){0x7ffe, 0x7ffe}; -} diff --git a/gcc/testsuite/gcc.target/s390/vector/vgm-hi-1.c b/gcc/testsuite/gcc.target/s390/vector/vgm-hi-1.c deleted file mode 100644 index da064792cfc9.. --- a/gcc/testsuite/gcc.target/s390/vector/vgm-hi-1.c +++ /dev/null @@ -1,212 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -march=z13 -mzarch" } */ -/* { dg-final { check-function-bodies "**" "" "" } } */ - -typedef short v1hi __attribute__ ((vector_size (2))); -typedef short v2hi __attribute__ ((vector_size (4))); -typedef short v4hi __attribute__ ((vector_size (8))); -typedef short v8hi __attribute__ ((vector_size (16))); - -/* -** test_v1hi_via_vgmb: -** vgmb%v24,0,2 -** br %r14 -*/ - -v1hi -test_v1hi_via_vgmb (void) -{ - return (v1hi){
[gcc r15-1366] s390: Extend two element float vector
https://gcc.gnu.org/g:9965acb77cbd686283a9d0a867c80b1e710f46b9 commit r15-1366-g9965acb77cbd686283a9d0a867c80b1e710f46b9 Author: Stefan Schulze Frielinghaus Date: Mon Jun 17 08:37:11 2024 +0200 s390: Extend two element float vector This implements a V2SF -> V2DF extend. gcc/ChangeLog: * config/s390/vector.md (*vmrhf_half): New. (extendv2sfv2df2): New. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/vec-extend-3.c: New test. Diff: --- gcc/config/s390/vector.md | 28 +++ .../gcc.target/s390/vector/vec-extend-3.c | 18 ++ gcc/testsuite/gcc.target/s390/vector/vgm-df-1.c| 30 +++ gcc/testsuite/gcc.target/s390/vector/vgm-di-1.c| 102 ++ gcc/testsuite/gcc.target/s390/vector/vgm-hi-1.c| 212 .../gcc.target/s390/vector/vgm-int128-1.c | 64 +++ .../gcc.target/s390/vector/vgm-longdouble-1.c | 55 ++ gcc/testsuite/gcc.target/s390/vector/vgm-qi-1.c| 213 + gcc/testsuite/gcc.target/s390/vector/vgm-sf-1.c| 43 + gcc/testsuite/gcc.target/s390/vector/vgm-si-1.c| 146 ++ gcc/testsuite/gcc.target/s390/vector/vgm-ti-1.c| 63 ++ 11 files changed, 974 insertions(+) diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index a931a4b1b17e..40de0c75a7cf 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -895,6 +895,17 @@ "vmrhf\t%0,%1,%2"; [(set_attr "op_type" "VRR")]) +(define_insn "*vmrhf_half" + [(set (match_operand:V_HW_40 "register_operand" "=v") + (vec_select:V_HW_4 +(vec_concat:V_HW_4 (match_operand: 1 "register_operand" "v") + (match_operand: 2 "register_operand" "v")) +(parallel [(const_int 0) (const_int 2) + (const_int 1) (const_int 3)])))] + "TARGET_VX" + "vmrhf\t%0,%1,%2"; + [(set_attr "op_type" "VRR")]) + (define_insn "*vmrlf" [(set (match_operand:V_HW_4 0 "register_operand" "=v") (vec_select:V_HW_4 @@ -2394,6 +2405,23 @@ "vuph\t%0,%1" [(set_attr "op_type" "VRR")]) +(define_expand "extendv2sfv2df2" + [(set (match_dup 2) + (vec_select:V4SF +(vec_concat:V4SF (match_operand:V2SF 1 "register_operand") + (match_dup 1)) +(parallel [(const_int 0) (const_int 2) + (const_int 1) (const_int 3)]))) + (set (match_operand:V2DF 0 "register_operand") + (float_extend:V2DF +(vec_select:V2SF + (match_dup 2) + (parallel [(const_int 0) (const_int 2)]] + "TARGET_VX" +{ + operands[2] = gen_reg_rtx (V4SFmode); +}) + ;; vector unpack v16qi ; signed diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-extend-3.c b/gcc/testsuite/gcc.target/s390/vector/vec-extend-3.c new file mode 100644 index ..2b02e7bf9f80 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vec-extend-3.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=z13 -mzarch" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +typedef float v2sf __attribute__ ((vector_size (8))); +typedef double v2df __attribute__ ((vector_size (16))); + +/* +** extendv2sfv2df2: +** vmrhf %v24,%v24,%v24 +** vldeb %v24,%v24 +** br %r14 +*/ + +v2df extendv2sfv2df2 (v2sf x) +{ + return __builtin_convertvector (x, v2df); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/vgm-df-1.c b/gcc/testsuite/gcc.target/s390/vector/vgm-df-1.c new file mode 100644 index ..07aa6b9deece --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vgm-df-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=z13 -mzarch" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +typedef double v1df __attribute__ ((vector_size (8))); +typedef double v2df __attribute__ ((vector_size (16))); + +/* +** test_v1df_via_vgmb: +** vgmb%v24,0,1 +** br %r14 +*/ + +v1df +test_v1df_via_vgmb (void) +{ + return (v1df){-8577.505882352939806878566741943359375}; +} + +/* +** test_v2df_via_vgmb: +** vgmb%v24,0,1 +** br %r14 +*/ + +v2df +test_v2df_via_vgmb (void) +{ + return (v2df){-8577.505882352939806878566741943359375, -8577.505882352939806878566741943359375}; +} diff --git a/gcc/testsuite/gcc.target/s390/vector/vgm-di-1.c b/gcc/testsuite/gcc.target/s390/vector/vgm-di-1.c new file mode 100644 index ..fa608f2b5ae8 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vgm-di-1.c @@ -0,0 +1,102 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=z13 -mzarch" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +typedef long long v1di __attribute__ ((vector_size (8))); +typedef long long v2di __attribute__ ((vector_size (16))); + +/* +** test_v1di_via_vgmb: +** vgmb%v24,0,2 +** br %r14 +*/ + +v1di +test
[gcc r15-1365] s390: Extend two/four element integer vectors
https://gcc.gnu.org/g:2ab143df110a40bd41b5368ef84819953bf971b1 commit r15-1365-g2ab143df110a40bd41b5368ef84819953bf971b1 Author: Stefan Schulze Frielinghaus Date: Mon Jun 17 08:36:11 2024 +0200 s390: Extend two/four element integer vectors For the moment I deliberately left out one-element QHS vectors since it is unclear whether these are pathological cases or whether they are really used. If we ever get an extend for V1DI -> V1TI we should reconsider this. As a side-effect this fixes PR115261. gcc/ChangeLog: PR target/115261 * config/s390/s390.md (any_extend,extend_insn,zero_extend): New code attributes and code iterator. * config/s390/vector.md (V_EXTEND): New mode iterator. (2): New insn. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/vec-extend-1.c: New test. * gcc.target/s390/vector/vec-extend-2.c: New test. Diff: --- gcc/config/s390/s390.md| 4 ++ gcc/config/s390/vector.md | 29 ++-- .../gcc.target/s390/vector/vec-extend-1.c | 79 ++ .../gcc.target/s390/vector/vec-extend-2.c | 55 +++ 4 files changed, 162 insertions(+), 5 deletions(-) diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index c607dce3cf0f..1311a5f01cf3 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -602,6 +602,10 @@ (define_attr "relative_long" "no,yes" (const_string "no")) +(define_code_attr extend_insn [(sign_extend "extend") (zero_extend "zero_extend")]) +(define_code_attr zero_extend [(sign_extend "") (zero_extend "l")]) +(define_code_iterator any_extend [sign_extend zero_extend]) + ;; Pipeline description for z900. (include "2064.md") diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index ed4742d93c91..a931a4b1b17e 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -87,6 +87,8 @@ ; 32 bit int<->fp vector conversion instructions are available since VXE2 (z15). (define_mode_iterator VX_VEC_CONV_BFP [V2DF (V4SF "TARGET_VXE2")]) +(define_mode_iterator VI_EXTEND [V2QI V2HI V2SI V4QI V4HI]) + ; Empty string for all but TImode. This is used to hide the TImode ; expander name in case it is defined already. See addti3 for an ; example. @@ -195,13 +197,20 @@ (V1DF "V2DF") (V2DF "V4DF")]) ; Vector with widened element size and the same number of elements. -(define_mode_attr vec_2x_wide [(V1QI "V1HI") (V2QI "V2HI") (V4QI "V4HI") (V8QI "V8HI") (V16QI "V16HI") +(define_mode_attr VEC_2X_WIDE [(V1QI "V1HI") (V2QI "V2HI") (V4QI "V4HI") (V8QI "V8HI") (V16QI "V16HI") (V1HI "V1SI") (V2HI "V2SI") (V4HI "V4SI") (V8HI "V8SI") (V1SI "V1DI") (V2SI "V2DI") (V4SI "V4DI") (V1DI "V1TI") (V2DI "V2TI") (V1SF "V1DF") (V2SF "V2DF") (V4SF "V4DF") (V1DF "V1TF") (V2DF "V2TF")]) +(define_mode_attr vec_2x_wide [(V1QI "v1hi") (V2QI "v2hi") (V4QI "v4hi") (V8QI "v8hi") (V16QI "v16hi") + (V1HI "v1si") (V2HI "v2si") (V4HI "v4si") (V8HI "v8si") + (V1SI "v1di") (V2SI "v2di") (V4SI "v4di") + (V1DI "v1ti") (V2DI "v2ti") + (V1SF "v1df") (V2SF "v2df") (V4SF "v4df") + (V1DF "v1tf") (V2DF "v2tf")]) + ; Vector with half the element size AND half the number of elements. (define_mode_attr vec_halfhalf [(V2HI "V2QI") (V4HI "V4QI") (V8HI "V8QI") @@ -1604,7 +1613,7 @@ UNSPEC_VEC_UMULT_ODD)) (set (match_operand: 0 "register_operand" "") (vec_select: -(vec_concat: (match_dup 3) (match_dup 4)) +(vec_concat: (match_dup 3) (match_dup 4)) (match_dup 5)))] "TARGET_VX" { @@ -1623,7 +1632,7 @@ UNSPEC_VEC_UMULT_ODD)) (set (match_operand: 0 "register_operand" "") (vec_select: -(vec_concat: (match_dup 3) (match_dup 4)) +(vec_concat: (match_dup 3) (match_dup 4)) (match_dup 5)))] "TARGET_VX" { @@ -1642,7 +1651,7 @@ UNSPEC_VEC_SMULT_ODD)) (set (match_operand: 0 "register_operand" "") (vec_select: -(vec_concat: (match_dup 3) (match_dup 4)) +(vec_concat: (match_dup 3) (match_dup 4)) (match_dup 5)))] "TARGET_VX" { @@ -1661,7 +1670,7 @@ UNSPEC_VEC_SMULT_ODD)) (set (match_operand: 0 "register_operand" "") (vec_select: -(vec_concat: (match_dup 3) (match_dup 4)) +(vec_concat: (match_dup 3) (match_dup 4)) (match_dup 5)))] "TARGET_VX" { @@ -2375,6 +2384,16 @@ "vpkls\
[gcc r15-1364] s390: testsuite: Fix nobp-table-jump-*.c
https://gcc.gnu.org/g:0bf3f14e0d79f3258d4e5570216b5d81af6d60ef commit r15-1364-g0bf3f14e0d79f3258d4e5570216b5d81af6d60ef Author: Stefan Schulze Frielinghaus Date: Mon Jun 17 08:35:27 2024 +0200 s390: testsuite: Fix nobp-table-jump-*.c Starting with r14-5628-g53ba8d669550d3 interprocedural VRP became strong enough in order to render these tests useless. Fixed by disabling IPA. gcc/testsuite/ChangeLog: * gcc.target/s390/nobp-table-jump-inline-z10.c: Do not perform IPA. * gcc.target/s390/nobp-table-jump-inline-z900.c: Dito. * gcc.target/s390/nobp-table-jump-z10.c: Dito. * gcc.target/s390/nobp-table-jump-z900.c: Dito. Diff: --- .../gcc.target/s390/nobp-table-jump-inline-z10.c | 42 +++--- .../gcc.target/s390/nobp-table-jump-inline-z900.c | 42 +++--- .../gcc.target/s390/nobp-table-jump-z10.c | 42 +++--- .../gcc.target/s390/nobp-table-jump-z900.c | 42 +++--- 4 files changed, 84 insertions(+), 84 deletions(-) diff --git a/gcc/testsuite/gcc.target/s390/nobp-table-jump-inline-z10.c b/gcc/testsuite/gcc.target/s390/nobp-table-jump-inline-z10.c index 8dfd7e4c7861..121751166d0a 100644 --- a/gcc/testsuite/gcc.target/s390/nobp-table-jump-inline-z10.c +++ b/gcc/testsuite/gcc.target/s390/nobp-table-jump-inline-z10.c @@ -4,29 +4,29 @@ /* case-values-threshold will be set to 20 by the back-end when jump thunk are requested. */ -int __attribute__((noinline,noclone)) foo1 (void) { return 1; } -int __attribute__((noinline,noclone)) foo2 (void) { return 2; } -int __attribute__((noinline,noclone)) foo3 (void) { return 3; } -int __attribute__((noinline,noclone)) foo4 (void) { return 4; } -int __attribute__((noinline,noclone)) foo5 (void) { return 5; } -int __attribute__((noinline,noclone)) foo6 (void) { return 6; } -int __attribute__((noinline,noclone)) foo7 (void) { return 7; } -int __attribute__((noinline,noclone)) foo8 (void) { return 8; } -int __attribute__((noinline,noclone)) foo9 (void) { return 9; } -int __attribute__((noinline,noclone)) foo10 (void) { return 10; } -int __attribute__((noinline,noclone)) foo11 (void) { return 11; } -int __attribute__((noinline,noclone)) foo12 (void) { return 12; } -int __attribute__((noinline,noclone)) foo13 (void) { return 13; } -int __attribute__((noinline,noclone)) foo14 (void) { return 14; } -int __attribute__((noinline,noclone)) foo15 (void) { return 15; } -int __attribute__((noinline,noclone)) foo16 (void) { return 16; } -int __attribute__((noinline,noclone)) foo17 (void) { return 17; } -int __attribute__((noinline,noclone)) foo18 (void) { return 18; } -int __attribute__((noinline,noclone)) foo19 (void) { return 19; } -int __attribute__((noinline,noclone)) foo20 (void) { return 20; } +int __attribute__((noipa)) foo1 (void) { return 1; } +int __attribute__((noipa)) foo2 (void) { return 2; } +int __attribute__((noipa)) foo3 (void) { return 3; } +int __attribute__((noipa)) foo4 (void) { return 4; } +int __attribute__((noipa)) foo5 (void) { return 5; } +int __attribute__((noipa)) foo6 (void) { return 6; } +int __attribute__((noipa)) foo7 (void) { return 7; } +int __attribute__((noipa)) foo8 (void) { return 8; } +int __attribute__((noipa)) foo9 (void) { return 9; } +int __attribute__((noipa)) foo10 (void) { return 10; } +int __attribute__((noipa)) foo11 (void) { return 11; } +int __attribute__((noipa)) foo12 (void) { return 12; } +int __attribute__((noipa)) foo13 (void) { return 13; } +int __attribute__((noipa)) foo14 (void) { return 14; } +int __attribute__((noipa)) foo15 (void) { return 15; } +int __attribute__((noipa)) foo16 (void) { return 16; } +int __attribute__((noipa)) foo17 (void) { return 17; } +int __attribute__((noipa)) foo18 (void) { return 18; } +int __attribute__((noipa)) foo19 (void) { return 19; } +int __attribute__((noipa)) foo20 (void) { return 20; } -int __attribute__((noinline,noclone)) +int __attribute__((noipa)) bar (int a) { int ret = 0; diff --git a/gcc/testsuite/gcc.target/s390/nobp-table-jump-inline-z900.c b/gcc/testsuite/gcc.target/s390/nobp-table-jump-inline-z900.c index 46d2c54bcff1..5ad0c72afc36 100644 --- a/gcc/testsuite/gcc.target/s390/nobp-table-jump-inline-z900.c +++ b/gcc/testsuite/gcc.target/s390/nobp-table-jump-inline-z900.c @@ -4,29 +4,29 @@ /* case-values-threshold will be set to 20 by the back-end when jump thunk are requested. */ -int __attribute__((noinline,noclone)) foo1 (void) { return 1; } -int __attribute__((noinline,noclone)) foo2 (void) { return 2; } -int __attribute__((noinline,noclone)) foo3 (void) { return 3; } -int __attribute__((noinline,noclone)) foo4 (void) { return 4; } -int __attribute__((noinline,noclone)) foo5 (void) { return 5; } -int __attribute__((noinline,noclone)) foo6 (void) { return 6; } -int __attribute__((noinline,noclone)) foo7 (void) { return 7; } -int __attribute__((noinline,noclone)) foo8 (void) { return 8; }
[gcc r15-1363] s390: testsuite: Fix ifcvt-one-insn-bool.c
https://gcc.gnu.org/g:ac66736bf2f8a10d2f43e83ed6377e4179027a39 commit r15-1363-gac66736bf2f8a10d2f43e83ed6377e4179027a39 Author: Stefan Schulze Frielinghaus Date: Mon Jun 17 08:34:34 2024 +0200 s390: testsuite: Fix ifcvt-one-insn-bool.c With the change of r15-787-g57e04879389f9c I forgot to also update this test. gcc/testsuite/ChangeLog: * gcc.target/s390/ifcvt-one-insn-bool.c: Fix loc. Diff: --- gcc/testsuite/gcc.target/s390/ifcvt-one-insn-bool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-one-insn-bool.c b/gcc/testsuite/gcc.target/s390/ifcvt-one-insn-bool.c index 0c8c2f879a69..4ae29dbd6b61 100644 --- a/gcc/testsuite/gcc.target/s390/ifcvt-one-insn-bool.c +++ b/gcc/testsuite/gcc.target/s390/ifcvt-one-insn-bool.c @@ -3,7 +3,7 @@ /* { dg-do compile { target { s390*-*-* } } } */ /* { dg-options "-O2 -march=z13 -mzarch" } */ -/* { dg-final { scan-assembler "lochinh\t%r.?,1" } } */ +/* { dg-final { scan-assembler "lochile\t%r.?,1" } } */ #include int foo (int *a, unsigned int n)
[gcc r15-1362] m2: Remove uses of {FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE
https://gcc.gnu.org/g:96fe23eb8a9ebac6b64aeb55db88d219177a345a commit r15-1362-g96fe23eb8a9ebac6b64aeb55db88d219177a345a Author: Kewen Lin Date: Sun Jun 16 21:50:19 2024 -0500 m2: Remove uses of {FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE Joseph pointed out "floating types should have their mode, not a poorly defined precision value" in the discussion[1], as he and Richi suggested, the existing macros {FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE will be replaced with a hook mode_for_floating_type. To be prepared for that, this patch is to remove uses of {FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE in m2. Currently they are used for assertion and can be replaced with TYPE_SIZE check on the corresponding type node, since we dropped the call to layout_type which would early return once TYPE_SIZE is set and this assertion ensures it's safe to drop that call. [1] https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651209.html gcc/m2/ChangeLog: * gm2-gcc/m2type.cc (build_m2_short_real_node): Adjust assertion with TYPE_SIZE check. (build_m2_real_node): Likewise. (build_m2_long_real_node): Add assertion with TYPE_SIZE check. Diff: --- gcc/m2/gm2-gcc/m2type.cc | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gcc/m2/gm2-gcc/m2type.cc b/gcc/m2/gm2-gcc/m2type.cc index 5773a5cbd190..7ed184518cb1 100644 --- a/gcc/m2/gm2-gcc/m2type.cc +++ b/gcc/m2/gm2-gcc/m2type.cc @@ -1416,7 +1416,7 @@ static tree build_m2_short_real_node (void) { /* Define `SHORTREAL'. */ - ASSERT_CONDITION (TYPE_PRECISION (float_type_node) == FLOAT_TYPE_SIZE); + ASSERT_CONDITION (TYPE_SIZE (float_type_node)); return float_type_node; } @@ -1424,7 +1424,7 @@ static tree build_m2_real_node (void) { /* Define `REAL'. */ - ASSERT_CONDITION (TYPE_PRECISION (double_type_node) == DOUBLE_TYPE_SIZE); + ASSERT_CONDITION (TYPE_SIZE (double_type_node)); return double_type_node; } @@ -1432,12 +1432,13 @@ static tree build_m2_long_real_node (void) { tree longreal; - + /* Define `LONGREAL'. */ if (M2Options_GetIEEELongDouble ()) longreal = float128_type_node; else longreal = long_double_type_node; + ASSERT_CONDITION (TYPE_SIZE (longreal)); return longreal; }
[gcc r15-1360] libbacktrace: it's OK if zstd decompressor sees no backward bits
https://gcc.gnu.org/g:8348f8c22ff1ac61df45d63739e1028f87d6ef88 commit r15-1360-g8348f8c22ff1ac61df45d63739e1028f87d6ef88 Author: Ian Lance Taylor Date: Sun Jun 16 15:39:53 2024 -0700 libbacktrace: it's OK if zstd decompressor sees no backward bits * elf.c (elf_fetch_bits_backward) Don't fail if no bits are available. Diff: --- libbacktrace/elf.c | 9 + 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/libbacktrace/elf.c b/libbacktrace/elf.c index 3cd87020b031..735f87525008 100644 --- a/libbacktrace/elf.c +++ b/libbacktrace/elf.c @@ -1182,14 +1182,7 @@ elf_fetch_bits_backward (const unsigned char **ppin, val = *pval; if (unlikely (pin <= pinend)) -{ - if (bits == 0) - { - elf_uncompress_failed (); - return 0; - } - return 1; -} +return 1; pin -= 4;
[gcc r15-1359] aarch64: Fix reg_is_wrapped_separately array size [PR100211]
https://gcc.gnu.org/g:33caee556c130b2dcf311480314e942a43d6b368 commit r15-1359-g33caee556c130b2dcf311480314e942a43d6b368 Author: Andrew Pinski Date: Sun Jun 16 10:53:15 2024 -0700 aarch64: Fix reg_is_wrapped_separately array size [PR100211] Currrently the size of the array reg_is_wrapped_separately is LAST_SAVED_REGNUM. But LAST_SAVED_REGNUM could be regno that is being saved. So the size needs to be `LAST_SAVED_REGNUM + 1` like aarch64_frame->reg_offset is. Committed as obvious after a bootstrap/test for aarch64-linux-gnu. gcc/ChangeLog: PR target/100211 * config/aarch64/aarch64.h (machine_function): Fix the size of reg_is_wrapped_separately. Signed-off-by: Andrew Pinski Diff: --- gcc/config/aarch64/aarch64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 0997b82dbc0f..2b89f6f88ef0 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -1059,7 +1059,7 @@ typedef struct GTY (()) machine_function { struct aarch64_frame frame; /* One entry for each hard register. */ - bool reg_is_wrapped_separately[LAST_SAVED_REGNUM]; + bool reg_is_wrapped_separately[LAST_SAVED_REGNUM + 1]; /* One entry for each general purpose register. */ rtx call_via[SP_REGNUM];
[gcc r15-1358] [to-be-committed] [RISC-V] Improve (1 << N) | C for rv64
https://gcc.gnu.org/g:59dfce6d618ccf5865dec216603dbc25a4f7bf2d commit r15-1358-g59dfce6d618ccf5865dec216603dbc25a4f7bf2d Author: Jeff Law Date: Sun Jun 16 08:36:27 2024 -0600 [to-be-committed] [RISC-V] Improve (1 << N) | C for rv64 Another improvement for generating Zbs instructions. In this case we're looking at stuff like (1 << N) | C where N varies and C is a single bit constant. In this pattern the (1 << N) happens in SImode, but is zero extended out to DImode before the bit manipulation. The fact that we're modifying a DImode object in the logical op is important as it means we don't have to worry about whether or not the resulting value is sign extended from SI to DI. This has run through Ventana's CI system. I'll wait for it to roll through pre-commit CI before moving forward. gcc/ * config/riscv/bitmanip.md ((1 << N) | C): New splitter for IOR/XOR of a single bit an a DImode object. gcc/testsuite/ * gcc.target/riscv/zbs-zext.c: New test. Diff: --- gcc/config/riscv/bitmanip.md | 15 +++ gcc/testsuite/gcc.target/riscv/zbs-zext.c | 31 +++ 2 files changed, 46 insertions(+) diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 4ee413c143e3..0d35fb786e11 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -627,6 +627,21 @@ "bseti\t%0,%1,%S2" [(set_attr "type" "bitmanip")]) +;; We can easily handle zero extensions +(define_split + [(set (match_operand:DI 0 "register_operand") +(any_or:DI (zero_extend:DI +(ashift:SI (const_int 1) + (match_operand:QI 1 "register_operand"))) + (match_operand:DI 2 "single_bit_mask_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] + "TARGET_64BIT && TARGET_ZBS" + [(set (match_dup 3) +(match_dup 2)) + (set (match_dup 0) + (any_or:DI (ashift:DI (const_int 1) (match_dup 1)) + (match_dup 3)))]) + (define_insn "*bclr" [(set (match_operand:X 0 "register_operand" "=r") (and:X (rotate:X (const_int -2) diff --git a/gcc/testsuite/gcc.target/riscv/zbs-zext.c b/gcc/testsuite/gcc.target/riscv/zbs-zext.c new file mode 100644 index ..5773b15d2987 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbs-zext.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" } } */ +typedef unsigned long uint64_t; +typedef unsigned int uint32_t; + +uint64_t bset (const uint32_t i) +{ + uint64_t checks = 8; + checks |= 1U << i; + return checks; +} + +uint64_t binv (const uint32_t i) +{ + uint64_t checks = 8; + checks ^= 1U << i; + return checks; +} + +uint64_t bclr (const uint32_t i) +{ + uint64_t checks = 10; + checks &= ~(1U << i); + return checks; +} + +/* { dg-final { scan-assembler-times "bset\t" 1 } } */ +/* { dg-final { scan-assembler-times "binv\t" 1 } } */ +/* { dg-final { scan-assembler-times "bclr\t" 1 } } */ +/* { dg-final { scan-assembler-not "sllw\t"} } */