[gcc r14-10396] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]
https://gcc.gnu.org/g:505382ceee0b5e72dc5defa05aec77a97658feca commit r14-10396-g505382ceee0b5e72dc5defa05aec77a97658feca Author: Pan Li Date: Wed Jul 3 22:06:48 2024 +0800 RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763] According to the ISA, the zvfhmin sub extension should only contain convertion insn. Thus, the vfmv insn acts on FP16 should not be present when only the zvfhmin option is given. This patch would like to fix it by split the pred_broadcast define_insn into zvfhmin and zvfh part. Given below example: void test (_Float16 *dest, _Float16 bias) { dest[0] = bias; dest[1] = bias; } when compile with -march=rv64gcv_zfh_zvfhmin Before this patch: test: vsetivlizero,2,e16,mf4,ta,ma vfmv.v.fv1,fa0 // should not leverage vfmv for zvfhmin vse16.v v1,0(a0) ret After this patch: test: addi sp,sp,-16 fsh fa0,14(sp) addi a5,sp,14 vsetivli zero,2,e16,mf4,ta,ma vlse16.v v1,0(a5),zero vse16.v v1,0(a0) addi sp,sp,16 jr ra PR target/115763 gcc/ChangeLog: * config/riscv/vector.md (*pred_broadcast): Split into zvfh and zvfhmin part. (*pred_broadcast_zvfh): New define_insn for zvfh part. (*pred_broadcast_zvfhmin): Ditto but for zvfhmin. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check. * gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto. * gcc.target/riscv/rvv/base/pr115763-1.c: New test. * gcc.target/riscv/rvv/base/pr115763-2.c: New test. Signed-off-by: Pan Li (cherry picked from commit de9254e224eb3d89303cb9b3ba50b4c479c55f7c) Diff: --- gcc/config/riscv/vector.md | 49 +++--- .../gcc.target/riscv/rvv/base/pr115763-1.c | 9 .../gcc.target/riscv/rvv/base/pr115763-2.c | 10 + .../gcc.target/riscv/rvv/base/scalar_move-5.c | 4 +- .../gcc.target/riscv/rvv/base/scalar_move-6.c | 6 +-- .../gcc.target/riscv/rvv/base/scalar_move-7.c | 6 +-- .../gcc.target/riscv/rvv/base/scalar_move-8.c | 6 +-- 7 files changed, 64 insertions(+), 26 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 228d0f9a7663..03012d677d79 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -2080,31 +2080,50 @@ [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv") (set_attr "mode" "")]) -(define_insn "*pred_broadcast" - [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr, vr, vr, vr, vr") - (if_then_else:V_VLSF_ZVFHMIN +(define_insn "*pred_broadcast_zvfh" + [(set (match_operand:V_VLSF0 "register_operand" "=vr, vr, vr, vr") + (if_then_else:V_VLSF (unspec: - [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm, vm,Wc1,Wc1,Wb1,Wb1") -(match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 5 "const_int_operand" " i, i, i, i, i, i, i, i") -(match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i") -(match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i") + [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1, Wc1, Wb1, Wb1") +(match_operand 4 "vector_length_operand" " rK, rK, rK, rK") +(match_operand 5 "const_int_operand" " i, i, i, i") +(match_operand 6 "const_int_operand" " i, i, i, i") +(match_operand 7 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLSF_ZVFHMIN - (match_operand: 3 "direct_broadcast_operand" " f, f,Wdm,Wdm,Wdm,Wdm, f, f")) - (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand""vu, 0, vu, 0, vu, 0, vu, 0")))] + (vec_duplicate:V_VLSF + (match_operand: 3 "direct_broadcast_operand" " f, f, f, f")) + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "@ vfmv.v.f\t%0,%3 vfmv.v.f\t%0,%3 + vfmv.s.f\t%0,%3 + vfmv.s.f\t%0,%3" + [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv") + (set_attr "mode" "")]) + +(define_insn "*pred_broadcast_zvfhmin" + [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr") + (if_then_e
[gcc r15-1905] Rename __{float, double}_u to __x86_{float, double}_u to avoid pulluting the namespace.
https://gcc.gnu.org/g:23ab7f632f4f5bae67fb53cf7b18fea7ba7242c4 commit r15-1905-g23ab7f632f4f5bae67fb53cf7b18fea7ba7242c4 Author: liuhongt Date: Mon Jul 8 10:35:35 2024 +0800 Rename __{float,double}_u to __x86_{float,double}_u to avoid pulluting the namespace. I have a build failure on NetBSD as the namespace pollution avoidance causes a direct hit with the system /usr/include/math.h === In file included from /usr/src/local/gcc/obj/gcc/include/emmintrin.h:31, from /usr/src/local/gcc/obj/x86_64-unknown-netbsd10.99/libstdc++-v3/include/ext/random:45, from /usr/src/local/gcc/libstdc++-v3/include/precompiled/extc++.h:65: /usr/src/local/gcc/obj/gcc/include/xmmintrin.h:75:15: error: conflicting declaration 'typedef float __float_u' 75 | typedef float __float_u __attribute__ ((__may_alias__, __aligned__ (1))); | ^ In file included from /usr/src/local/gcc/obj/x86_64-unknown-netbsd10.99/libstdc++-v3/include/cmath:47, from /usr/src/local/gcc/obj/x86_64-unknown-netbsd10.99/libstdc++-v3/include/x86_64-unknown-netbsd10.99/bits/stdc++.h:114, from /usr/src/local/gcc/libstdc++-v3/include/precompiled/extc++.h:32: /usr/src/local/gcc/obj/gcc/include-fixed/math.h:49:7: note: previous declaration as 'union __float_u' 49 | union __float_u { gcc/ChangeLog: PR target/115796 * config/i386/emmintrin.h (__float_u): Rename to .. (__x86_float_u): .. this. (_mm_load_sd): Ditto. (_mm_store_sd): Ditto. (_mm_loadh_pd): Ditto. (_mm_loadl_pd): Ditto. * config/i386/xmmintrin.h (__double_u): Rename to .. (__x86_double_u): .. this. (_mm_load_ss): Ditto. (_mm_store_ss): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr115796.c: New test. Diff: --- gcc/config/i386/emmintrin.h | 10 +- gcc/config/i386/xmmintrin.h | 6 +++--- gcc/testsuite/gcc.target/i386/pr115796.c | 24 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index d58030e5c4fe..a3fcd7a869cf 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -56,7 +56,7 @@ typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); /* Unaligned version of the same types. */ typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); -typedef double __double_u __attribute__ ((__may_alias__, __aligned__ (1))); +typedef double __x86_double_u __attribute__ ((__may_alias__, __aligned__ (1))); /* Create a selector for use with the SHUFPD instruction. */ #define _MM_SHUFFLE2(fp1,fp0) \ @@ -146,7 +146,7 @@ _mm_load1_pd (double const *__P) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_sd (double const *__P) { - return __extension__ (__m128d) { *(__double_u *)__P, 0.0 }; + return __extension__ (__m128d) { *(__x86_double_u *)__P, 0.0 }; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -181,7 +181,7 @@ _mm_storeu_pd (double *__P, __m128d __A) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_sd (double *__P, __m128d __A) { - *(__double_u *)__P = ((__v2df)__A)[0] ; + *(__x86_double_u *)__P = ((__v2df)__A)[0] ; } extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -974,13 +974,13 @@ _mm_unpacklo_pd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadh_pd (__m128d __A, double const *__B) { - return __extension__ (__m128d) { ((__v2df)__A)[0], *(__double_u*)__B }; + return __extension__ (__m128d) { ((__v2df)__A)[0], *(__x86_double_u*)__B }; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_pd (__m128d __A, double const *__B) { - return __extension__ (__m128d) { *(__double_u*)__B, ((__v2df)__A)[1] }; + return __extension__ (__m128d) { *(__x86_double_u*)__B, ((__v2df)__A)[1] }; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 37e5a94cf101..7f10f96d72ce 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -72,7 +72,7 @@ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); /* Unaligned version of the same type. */ typedef float __m128_u __attribute__ ((__vector_
[gcc r15-1904] RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 2
https://gcc.gnu.org/g:ecde8d50bea3573194f21277666f83463cbbe9c9 commit r15-1904-gecde8d50bea3573194f21277666f83463cbbe9c9 Author: Pan Li Date: Mon Jul 8 21:58:59 2024 +0800 RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 2 After the middle-end supported the vector mode of .SAT_ADD, add more testcases to ensure the correctness of RISC-V backend for form 2. Aka: Form 2: #define DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_2 (T *out, T *in, unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ out[i] = (T)(in[i] + IMM) < in[i] ? -1 : (in[i] + IMM); \ } DEF_VEC_SAT_U_ADD_IMM_FMT_2 (uint64_t, 9) Passed the fully rv64gcv regression tests. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add help test macro. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-5.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-6.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-7.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-8.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/binop/vec_sat_arith.h| 17 + .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c | 14 +++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-5.c| 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-6.c| 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-7.c| 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-8.c| 28 ++ 9 files changed, 185 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index 3733c8fd2c15..10459807b2c4 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -158,12 +158,29 @@ vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \ #define DEF_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, IMM) \ DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM) +#define DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM) \ +T __attribute__((noinline)) \ +vec_sat_u_add_imm##IMM##_##T##_fmt_2 (T *out, T *in, unsigned limit) \ +{\ + unsigned i;\ + for (i = 0; i < limit; i++)\ +out[i] = (T)(in[i] + IMM) < in[i] ? -1 : (in[i] + IMM); \ +} +#define DEF_VEC_SAT_U_ADD_IMM_FMT_2_WRAP(T, IMM) \ + DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM) + #define RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) \ vec_sat_u_add_imm##IMM##_##T##_fmt_1(out, op_1, N); \ VALIDATE_RESULT (out, expect, N) #define RUN_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, out, op_1, expect, IMM, N) \ RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) +#define RUN_VEC_SAT_U_ADD_IMM_FMT_2(T, out, op_1, expect, IMM, N) \ + vec_sat_u_add_imm##IMM##_##T##_fmt_2(out, op_1, N); \ + VALIDATE_RESULT (out, expect, N) +#define RUN_VEC_SAT_U_ADD_IMM_FMT_2_WRAP(T, out, op_1, expect, IMM, N) \ + RUN_VEC_SAT_U_ADD_IMM_FMT_2(T, out, op_1, expect, IMM, N) + /**/ /* Saturation Sub (Unsigned and Signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c new file mode 100644 index ..d25fdcf78f38 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mab
[gcc r15-1903] RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 1
https://gcc.gnu.org/g:35b1096896a94a90d787f5ef402ba009dd4f0393 commit r15-1903-g35b1096896a94a90d787f5ef402ba009dd4f0393 Author: Pan Li Date: Mon Jul 8 20:31:31 2024 +0800 RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 1 After the middle-end supported the vector mode of .SAT_ADD, add more testcases to ensure the correctness of RISC-V backend for form 1. Aka: Form 1: #define DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ out[i] = (T)(in[i] + IMM) >= in[i] ? (in[i] + IMM) : -1; \ } DEF_VEC_SAT_U_ADD_IMM_FMT_1 (uint64_t, 9) Passed the fully rv64gcv regression tests. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add help test macro. * gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-4.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/binop/vec_sat_arith.h| 25 ++ .../riscv/rvv/autovec/binop/vec_sat_data.h | 256 + .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c | 14 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c | 14 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c | 14 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c | 14 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-1.c| 28 +++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-2.c| 28 +++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-3.c| 28 +++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-4.c| 28 +++ 10 files changed, 449 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index b55a589e019a..3733c8fd2c15 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -4,6 +4,14 @@ #include #include +#define VALIDATE_RESULT(out, expect, N) \ + do \ +{\ + for (unsigned i = 0; i < N; i++) \ +if (out[i] != expect[i]) __builtin_abort (); \ +}\ + while (false) + /**/ /* Saturation Add (unsigned and signed) */ /**/ @@ -139,6 +147,23 @@ vec_sat_u_add_##T##_fmt_8 (T *out, T *op_1, T *op_2, unsigned limit) \ #define RUN_VEC_SAT_U_ADD_FMT_8(T, out, op_1, op_2, N) \ vec_sat_u_add_##T##_fmt_8(out, op_1, op_2, N) +#define DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM) \ +T __attribute__((noinline)) \ +vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \ +{\ + unsigned i;\ + for (i = 0; i < limit; i++)\ +out[i] = (T)(in[i] + IMM) >= in[i] ? (in[i] + IMM) : -1; \ +} +#define DEF_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, IMM) \ + DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM) + +#define RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) \ + vec_sat_u_add_imm##IMM##_##T##_fmt_1(out, op_1, N); \ + VALIDATE_RESULT (out, expect, N) +#define RUN_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, out, op_1, expect, IMM, N) \ + RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) + /**/ /* Saturation Sub (Unsigned and Signed) */ /
[gcc r15-1901] [to-be-committed][RISC-V][V3] DCE analysis for extension elimination
https://gcc.gnu.org/g:98914f9eba5f19d3eb93fbce8726b5264631cba0 commit r15-1901-g98914f9eba5f19d3eb93fbce8726b5264631cba0 Author: Jeff Law Date: Mon Jul 8 17:06:55 2024 -0600 [to-be-committed][RISC-V][V3] DCE analysis for extension elimination The pre-commit testing showed that making ext-dce only active at -O2 and above would require minor edits to the tests. In some cases we had specified -O1 in the test or specified no optimization level at all. Those need to be bumped to -O2. In one test we had one set of dg-options overriding another. The other approach that could have been taken would be to drop the -On argument, add an explicit -fext-dce and add dg-skip-if options. I originally thought that was going to be way to go, but the dg-skip-if aspect was going to get ugly as things like interaction between unrolling, peeling and -ftracer would have to be accounted for and would likely need semi-regular adjustment. Changes since V2: Testsuite changes to deal with pass only being enabled at -O2 or higher. -- Changes since V1: Check flag_ext_dce before running the new pass. I'd forgotten that I had removed that part of the gate to facilitate more testing. Turn flag_ext_dce on at -O2 and above. Adjust one of the riscv tests to explicitly avoid vectors Adjust a few aarch64 tests In tbz_2.c we remove an unnecessary extension which causes us to use "x" registers instead of "w" registers. In the pred_clobber tests we also remove an extension and that ultimately causes a reg->reg copy to change locations. -- This was actually ack'd late in the gcc-14 cycle, but I chose not to integrate it given how late we were in the cycle. The basic idea here is to track liveness of subobjects within a word and if we find an extension where the bits set aren't actually used, then we convert the extension into a subreg. The subreg typically simplifies away. I've seen this help a few routines in coremark, fix one bug in the testsuite (pr111384) and fix a couple internally reported bugs in Ventana. The original idea and code were from Joern; Jivan and I hacked it into usable shape. I've had this in my tester for ~8 months, so it's been through more build/test cycles than I care to contemplate and nearly every architecture we support. But just in case, I'm going to wait for it to spin through the pre-commit CI tester. I'll find my old ChangeLog before committing. gcc/ * Makefile.in (OBJS): Add ext-dce.o * common.opt (ext-dce): Document new option. * df-scan.cc (df_get_ext_block_use_set): Delete prototype and make extern. * df.h (df_get_exit_block_use_set): Prototype. * ext-dce.cc: New file/pass. * opts.cc (default_options_table): Handle ext-dce at -O2 or higher. * passes.def: Add ext-dce before combine. * tree-pass.h (make_pass_ext_dce): Prototype. gcc/testsuite * gcc.target/aarch64/sve/pred_clobber_1.c: Update expected output. * gcc.target/aarch64/sve/pred_clobber_2.c: Likewise. * gcc.target/aarch64/sve/pred_clobber_3.c: Likewise. * gcc.target/aarch64/tbz_2.c: Likewise. * gcc.target/riscv/core_bench_list.c: New test. * gcc.target/riscv/core_init_matrix.c: New test. * gcc.target/riscv/core_list_init.c: New test. * gcc.target/riscv/matrix_add_const.c: New test. * gcc.target/riscv/mem-extend.c: New test. * gcc.target/riscv/pr111384.c: New test. Co-authored-by: Jivan Hakobyan Co-authored-by: Joern Rennecke Diff: --- gcc/Makefile.in| 1 + gcc/common.opt | 4 + gcc/df-scan.cc | 3 +- gcc/df.h | 1 + gcc/ext-dce.cc | 943 + gcc/opts.cc| 1 + gcc/passes.def | 1 + .../gcc.target/aarch64/sve/pred_clobber_1.c| 1 + .../gcc.target/aarch64/sve/pred_clobber_2.c| 1 + .../gcc.target/aarch64/sve/pred_clobber_3.c| 1 + gcc/testsuite/gcc.target/aarch64/tbz_2.c | 6 +- gcc/testsuite/gcc.target/riscv/core_bench_list.c | 15 + gcc/testsuite/gcc.target/riscv/core_init_matrix.c | 17 + gcc/testsuite/gcc.target/riscv/core_list_init.c| 18 + gcc/testsuite/gcc.target/riscv/matrix_add_const.c | 13 + gcc/testsuite/gcc.target/riscv/mem-extend.c| 14 + gcc/testsuite/gcc.target/riscv/pr111384.c | 11 + gcc/tree-pass.h| 1 + 18
[gcc r15-1900] c-format.cc: add ctors to format_check_results and format_check_context
https://gcc.gnu.org/g:113b5ce0610207717f651a3f8a3f1123d93f97af commit r15-1900-g113b5ce0610207717f651a3f8a3f1123d93f97af Author: David Malcolm Date: Mon Jul 8 18:55:28 2024 -0400 c-format.cc: add ctors to format_check_results and format_check_context This is a minor cleanup I spotted whilst working on another patch. No functional change intended. gcc/c-family/ChangeLog: * c-format.cc (format_check_results::format_check_results): New ctor. (struct format_check_context): Add ctor; add "m_" prefix to all fields. (check_format_info): Use above ctors. (check_format_arg): Update for "m_" prefix to format_check_context. Signed-off-by: David Malcolm Diff: --- gcc/c-family/c-format.cc | 63 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/gcc/c-family/c-format.cc b/gcc/c-family/c-format.cc index 7a5ffc25602c..5bfd2fc4469e 100644 --- a/gcc/c-family/c-format.cc +++ b/gcc/c-family/c-format.cc @@ -1021,6 +1021,20 @@ static int n_format_types = ARRAY_SIZE (format_types_orig); many leaves resulting from nested conditional expressions. */ struct format_check_results { + format_check_results (location_t format_string_loc_) + : number_non_literal (0), +number_extra_args (0), +extra_arg_loc (UNKNOWN_LOCATION), +number_dollar_extra_args (0), +number_wide (0), +number_non_char (0), +number_empty (0), +number_unterminated (0), +number_other (0), +format_string_loc (format_string_loc_) + { + } + /* Number of leaves of the format argument that could not be checked as they were not string literals. */ int number_non_literal; @@ -1050,10 +1064,21 @@ struct format_check_results struct format_check_context { - format_check_results *res; - function_format_info *info; - tree params; - vec *arglocs; + format_check_context (format_check_results *res, + function_format_info *info, + tree params, + vec *arglocs) + : m_res (res), +m_info (info), +m_params (params), +m_arglocs (arglocs) + { + } + + format_check_results *m_res; + function_format_info *m_info; + tree m_params; + vec *m_arglocs; }; /* Return the format name (as specified in the original table) for the format @@ -1539,10 +1564,8 @@ static void check_format_info (function_format_info *info, tree params, vec *arglocs) { - format_check_context format_ctx; unsigned HOST_WIDE_INT arg_num; tree format_tree; - format_check_results res; /* Skip to format argument. If the argument isn't available, there's no work for us to do; prototype checking will catch the problem. */ for (arg_num = 1; ; ++arg_num) @@ -1558,26 +1581,14 @@ check_format_info (function_format_info *info, tree params, if (format_tree == 0) return; - res.number_non_literal = 0; - res.number_extra_args = 0; - res.extra_arg_loc = UNKNOWN_LOCATION; - res.number_dollar_extra_args = 0; - res.number_wide = 0; - res.number_non_char = 0; - res.number_empty = 0; - res.number_unterminated = 0; - res.number_other = 0; - res.format_string_loc = input_location; - - format_ctx.res = &res; - format_ctx.info = info; - format_ctx.params = params; - format_ctx.arglocs = arglocs; + format_check_results res (input_location); + + format_check_context format_ctx (&res, info, params, arglocs); check_function_arguments_recurse (check_format_arg, &format_ctx, format_tree, arg_num, OPT_Wformat_); - location_t loc = format_ctx.res->format_string_loc; + location_t loc = format_ctx.m_res->format_string_loc; if (res.number_non_literal > 0) { @@ -1659,10 +1670,10 @@ check_format_arg (void *ctx, tree format_tree, unsigned HOST_WIDE_INT arg_num) { format_check_context *format_ctx = (format_check_context *) ctx; - format_check_results *res = format_ctx->res; - function_format_info *info = format_ctx->info; - tree params = format_ctx->params; - vec *arglocs = format_ctx->arglocs; + format_check_results *res = format_ctx->m_res; + function_format_info *info = format_ctx->m_info; + tree params = format_ctx->m_params; + vec *arglocs = format_ctx->m_arglocs; int format_length; HOST_WIDE_INT offset;
[gcc r15-1899] i386: Promote {QI, HI}mode x86_movcc_0_m1_neg to SImode
https://gcc.gnu.org/g:2b3027bea3f218599d36379d3d593841df7a1559 commit r15-1899-g2b3027bea3f218599d36379d3d593841df7a1559 Author: Uros Bizjak Date: Mon Jul 8 20:47:52 2024 +0200 i386: Promote {QI,HI}mode x86_movcc_0_m1_neg to SImode Promote HImode x86_movcc_0_m1_neg insn to SImode to avoid redundant prefixes. Also promote QImode insn when TARGET_PROMOTE_QImode is set. This is similar to promotable_binary_operator splitter, where we promote the result to SImode. Also correct insn condition for splitters to SImode of NEG and NOT instructions. The sizes of QImode and SImode instructions are always the same, so there is no need for optimize_insn_for_size bypass. gcc/ChangeLog: * config/i386/i386.md (x86_movcc_0_m1_neg splitter to SImode): New splitter. (NEG and NOT splitter to SImode): Remove optimize_insn_for_size_p predicate from insn condition. Diff: --- gcc/config/i386/i386.md | 25 +++-- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b24c4fe58750..214cb2e239ae 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -26576,9 +26576,7 @@ (clobber (reg:CC FLAGS_REG))] "! TARGET_PARTIAL_REG_STALL && reload_completed && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode - || optimize_insn_for_size_p (" + || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))" [(parallel [(set (match_dup 0) (neg:SI (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] @@ -26593,15 +26591,30 @@ (not (match_operand 1 "general_reg_operand")))] "! TARGET_PARTIAL_REG_STALL && reload_completed && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode - || optimize_insn_for_size_p (" + || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))" [(set (match_dup 0) (not:SI (match_dup 1)))] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); }) + +(define_split + [(set (match_operand 0 "general_reg_operand") + (neg (match_operator 1 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]))) + (clobber (reg:CC FLAGS_REG))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))" + [(parallel [(set (match_dup 0) + (neg:SI (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = shallow_copy_rtx (operands[1]); + PUT_MODE (operands[1], SImode); +}) ;; RTL Peephole optimizations, run before sched2. These primarily look to ;; transform a complex memory operation into two memory to register operations.
[gcc r15-1898] libstdc++: Fix _Atomic(T) macro in [PR115807]
https://gcc.gnu.org/g:40d234dd6439e8c8cfbf3f375a61906aed35c80d commit r15-1898-g40d234dd6439e8c8cfbf3f375a61906aed35c80d Author: Jonathan Wakely Date: Sun Jul 7 12:22:42 2024 +0100 libstdc++: Fix _Atomic(T) macro in [PR115807] The definition of the _Atomic(T) macro needs to refer to ::std::atomic, not some other std::atomic relative to the current namespace. libstdc++-v3/ChangeLog: PR libstdc++/115807 * include/c_compatibility/stdatomic.h (_Atomic): Ensure it refers to std::atomic in the global namespace. * testsuite/29_atomics/headers/stdatomic.h/115807.cc: New test. Diff: --- libstdc++-v3/include/c_compatibility/stdatomic.h | 2 +- .../testsuite/29_atomics/headers/stdatomic.h/115807.cc | 14 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/libstdc++-v3/include/c_compatibility/stdatomic.h b/libstdc++-v3/include/c_compatibility/stdatomic.h index 5403b52a036d..72b9446eb170 100644 --- a/libstdc++-v3/include/c_compatibility/stdatomic.h +++ b/libstdc++-v3/include/c_compatibility/stdatomic.h @@ -35,7 +35,7 @@ #ifdef __cpp_lib_stdatomic_h // C++ >= 23 #include -#define _Atomic(_Tp) std::atomic<_Tp> +#define _Atomic(_Tp) ::std::atomic<_Tp> using std::memory_order; using std::memory_order_relaxed; diff --git a/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc new file mode 100644 index ..14f320fe8357 --- /dev/null +++ b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc @@ -0,0 +1,14 @@ +// { dg-do compile { target c++23 } } +#include +namespace other { + namespace std { +int atomic = 0; + } + _Atomic(long) a{}; +} + +#include + +namespace non::std { + static_assert( ::std::is_same_v<_Atomic(int), ::std::atomic> ); +}
[gcc r15-1897] Remove trailing whitespace from invoke.texi
https://gcc.gnu.org/g:a0e64a043ec498f959a214b5b02d6c7177984a0f commit r15-1897-ga0e64a043ec498f959a214b5b02d6c7177984a0f Author: Patrick O'Neill Date: Tue Jul 2 18:28:00 2024 -0700 Remove trailing whitespace from invoke.texi gcc/ChangeLog: * doc/invoke.texi: Remove trailing whitespace. Signed-off-by: Patrick O'Neill Diff: --- gcc/doc/invoke.texi | 392 ++-- 1 file changed, 196 insertions(+), 196 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b37c7af7a390..4d671c4f6d89 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -94,9 +94,9 @@ that option with all supported languages. The usual way to run GCC is to run the executable called @command{gcc}, or @command{@var{machine}-gcc} when cross-compiling, or @command{@var{machine}-gcc-@var{version}} to run a specific version of GCC. -When you compile C++ programs, you should invoke GCC as @command{g++} -instead. @xref{Invoking G++,,Compiling C++ Programs}, -for information about the differences in behavior between @command{gcc} +When you compile C++ programs, you should invoke GCC as @command{g++} +instead. @xref{Invoking G++,,Compiling C++ Programs}, +for information about the differences in behavior between @command{gcc} and @command{g++} when compiling C++ programs. @cindex grouping options @@ -3623,8 +3623,8 @@ unambiguous base classes. Mixing code compiled with @option{-frtti} with that compiled with @option{-fno-rtti} may not work. For example, programs may -fail to link if a class compiled with @option{-fno-rtti} is used as a base -for a class compiled with @option{-frtti}. +fail to link if a class compiled with @option{-fno-rtti} is used as a base +for a class compiled with @option{-frtti}. @opindex fsized-deallocation @item -fsized-deallocation @@ -4176,7 +4176,7 @@ As an example: @smallexample template void f(T t) @{ t(); @}; void g() noexcept; -void h() @{ f(g); @} +void h() @{ f(g); @} @end smallexample @noindent @@ -4443,10 +4443,10 @@ But this use is not portable across different compilers. @item -Wno-non-template-friend @r{(C++ and Objective-C++ only)} Disable warnings when non-template friend functions are declared within a template. In very old versions of GCC that predate implementation -of the ISO standard, declarations such as +of the ISO standard, declarations such as @samp{friend int foo(int)}, where the name of the friend is an unqualified-id, could be interpreted as a particular specialization of a template -function; the warning exists to diagnose compatibility problems, +function; the warning exists to diagnose compatibility problems, and is enabled by default. @opindex Wold-style-cast @@ -5197,7 +5197,7 @@ value, if any. Traditionally, diagnostic messages have been formatted irrespective of the output device's aspect (e.g.@: its width, @dots{}). You can use the options described below -to control the formatting algorithm for diagnostic messages, +to control the formatting algorithm for diagnostic messages, e.g.@: how many characters per line, how often source location information should be reported. Note that some language front ends may not honor these options. @@ -8039,7 +8039,7 @@ This warning is enabled by @option{-Wall} or @option{-Wextra}. @cindex unknown pragmas, warning @cindex pragmas, warning of unknown @item -Wunknown-pragmas -Warn when a @code{#pragma} directive is encountered that is not understood by +Warn when a @code{#pragma} directive is encountered that is not understood by GCC@. If this command-line option is used, warnings are even issued for unknown pragmas in system header files. This is not the case if the warnings are only enabled by the @option{-Wall} command-line option. @@ -8077,7 +8077,7 @@ This option is only active when @option{-fstrict-aliasing} is active. It warns about code that might break the strict aliasing rules that the compiler is using for optimization. Higher levels correspond to higher accuracy (fewer false positives). -Higher levels also correspond to more effort, similar to the way @option{-O} +Higher levels also correspond to more effort, similar to the way @option{-O} works. @option{-Wstrict-aliasing} is equivalent to @option{-Wstrict-aliasing=3}. @@ -9298,7 +9298,7 @@ enabled by @option{-Wextra}. @opindex Wno-bad-function-cast @item -Wbad-function-cast @r{(C and Objective-C only)} Warn when a function call is cast to a non-matching type. -For example, warn if a call to a function returning an integer type +For example, warn if a call to a function returning an integer type is cast to a pointer type. @opindex Wc90-c99-compat @@ -9696,13 +9696,13 @@ Do not warn about stray tokens after @code{#else} and @code{#endif}. @item -Wenum-compare Warn about a comparison between values of different enumerated types. In C++ enumerated type mismatches in conditional expressions are also -dia
[gcc r15-1896] x86: Support bitwise and/andnot/abs/neg/copysign/xorsign op for V8BF/V16BF/V32BF
https://gcc.gnu.org/g:f3f9e4ee7642e5131f2d6607f764267df7d233d4 commit r15-1896-gf3f9e4ee7642e5131f2d6607f764267df7d233d4 Author: Levy Hsu Date: Mon Jul 8 14:59:35 2024 + x86: Support bitwise and/andnot/abs/neg/copysign/xorsign op for V8BF/V16BF/V32BF This patch extends support for BF16 vector operations in GCC, including bitwise AND, ANDNOT, ABS, NEG, COPYSIGN, and XORSIGN for V8BF, V16BF, and V32BF modes. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_fp_absneg_operator): Add VBF modes. (ix86_expand_copysign): Ditto. (ix86_expand_xorsign): Ditto. * config/i386/i386.cc (ix86_build_const_vector): Ditto. (ix86_build_signbit_mask): Ditto. * config/i386/sse.md: Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx2-bf16-vec-absneg.c: New test. * gcc.target/i386/avx512f-bf16-vec-absneg.c: New test. Diff: --- gcc/config/i386/i386-expand.cc | 76 +-- gcc/config/i386/i386.cc| 6 ++ gcc/config/i386/sse.md | 37 +++--- .../gcc.target/i386/avx2-bf16-vec-absneg.c | 85 ++ .../gcc.target/i386/avx512f-bf16-vec-absneg.c | 66 + 5 files changed, 234 insertions(+), 36 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index bf79e59f811e..abc702d3ff27 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -2174,20 +2174,28 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, machine_mode vmode = mode; rtvec par; - if (vector_mode || mode == TFmode || mode == HFmode) -{ - use_sse = true; - if (mode == HFmode) - vmode = V8HFmode; -} - else if (TARGET_SSE_MATH) -{ - use_sse = SSE_FLOAT_MODE_P (mode); - if (mode == SFmode) - vmode = V4SFmode; - else if (mode == DFmode) - vmode = V2DFmode; -} + switch (mode) + { + case HFmode: +use_sse = true; +vmode = V8HFmode; +break; + case BFmode: +use_sse = true; +vmode = V8BFmode; +break; + case SFmode: +use_sse = TARGET_SSE_MATH && TARGET_SSE; +vmode = V4SFmode; +break; + case DFmode: +use_sse = TARGET_SSE_MATH && TARGET_SSE2; +vmode = V2DFmode; +break; + default: +use_sse = vector_mode || mode == TFmode; +break; + } dst = operands[0]; src = operands[1]; @@ -2320,16 +2328,26 @@ ix86_expand_copysign (rtx operands[]) mode = GET_MODE (operands[0]); - if (mode == HFmode) + switch (mode) + { + case HFmode: vmode = V8HFmode; - else if (mode == SFmode) +break; + case BFmode: +vmode = V8BFmode; +break; + case SFmode: vmode = V4SFmode; - else if (mode == DFmode) +break; + case DFmode: vmode = V2DFmode; - else if (mode == TFmode) +break; + case TFmode: vmode = mode; - else -gcc_unreachable (); +break; + default: +gcc_unreachable(); + } if (rtx_equal_p (operands[1], operands[2])) { @@ -2390,14 +2408,24 @@ ix86_expand_xorsign (rtx operands[]) mode = GET_MODE (dest); - if (mode == HFmode) + switch (mode) + { + case HFmode: vmode = V8HFmode; - else if (mode == SFmode) +break; + case BFmode: +vmode = V8BFmode; +break; + case SFmode: vmode = V4SFmode; - else if (mode == DFmode) +break; + case DFmode: vmode = V2DFmode; - else +break; + default: gcc_unreachable (); +break; + } temp = gen_reg_rtx (vmode); mask = ix86_build_signbit_mask (vmode, 0, 0); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 17d23bbcbc27..9c2ebe74fc92 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -16173,6 +16173,9 @@ ix86_build_const_vector (machine_mode mode, bool vect, rtx value) case E_V8DFmode: case E_V4DFmode: case E_V2DFmode: +case E_V32BFmode: +case E_V16BFmode: +case E_V8BFmode: n_elt = GET_MODE_NUNITS (mode); v = rtvec_alloc (n_elt); scalar_mode = GET_MODE_INNER (mode); @@ -16209,6 +16212,9 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) case E_V8HFmode: case E_V16HFmode: case E_V32HFmode: +case E_V32BFmode: +case E_V16BFmode: +case E_V8BFmode: vec_mode = mode; imode = HImode; break; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index bda66d5e1212..b3b4697924b5 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -351,7 +351,9 @@ ;; 128-, 256- and 512-bit float vector modes for bitwise operations (define_mode_iterator VFB - [(V32HF "TARGET_AVX512F && TARGET_EVEX512") + [(V32BF "TARGET_AVX512F && TARGET_EVEX512") + (V16BF "TARGET_AVX") (V8BF "TARGET_SSE2") + (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2"
[gcc r11-11562] c++: Add testcase for this PR [PR97990]
https://gcc.gnu.org/g:c2c216d0f85f861cc10529a455edfaf645aa393f commit r11-11562-gc2c216d0f85f861cc10529a455edfaf645aa393f Author: Andrew Pinski Date: Fri Feb 16 10:55:43 2024 -0800 c++: Add testcase for this PR [PR97990] This testcase was fixed by r14-5934-gf26d68d5d128c8 but we should add one to make sure it does not regress again. Committed as obvious after a quick test on the testcase. PR c++/97990 gcc/testsuite/ChangeLog: * g++.dg/torture/vector-struct-1.C: New test. Signed-off-by: Andrew Pinski (cherry picked from commit 5f1438db419c9eb8901d1d1d7f98fb69082aec8e) Diff: --- gcc/testsuite/g++.dg/torture/vector-struct-1.C | 18 ++ 1 file changed, 18 insertions(+) diff --git a/gcc/testsuite/g++.dg/torture/vector-struct-1.C b/gcc/testsuite/g++.dg/torture/vector-struct-1.C new file mode 100644 index ..e2747417e2d5 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/vector-struct-1.C @@ -0,0 +1,18 @@ +/* PR c++/97990 */ +/* This used to crash with lto and strict aliasing enabled as the + vector type variant still had TYPE_ALIAS_SET set on it. */ + +typedef __attribute__((__vector_size__(sizeof(short short TSimd; +TSimd hh(int); +struct y6 +{ + TSimd VALUE; + ~y6(); +}; +template +auto f2(T1 p1, T2){ + return hh(p1) <= 0; +} +void f1(){ + f2(0, y6{}); +}
[gcc r11-11561] middle-end/112732 - stray TYPE_ALIAS_SET in type variant
https://gcc.gnu.org/g:e7879391bb2b86606d0ce35ed97eccc108970e36 commit r11-11561-ge7879391bb2b86606d0ce35ed97eccc108970e36 Author: Richard Biener Date: Tue Nov 28 12:36:21 2023 +0100 middle-end/112732 - stray TYPE_ALIAS_SET in type variant The following fixes a stray TYPE_ALIAS_SET in a type variant built by build_opaque_vector_type which is diagnosed by type checking enabled with -flto. PR middle-end/112732 * tree.c (build_opaque_vector_type): Reset TYPE_ALIAS_SET of the newly built type. (cherry picked from commit f26d68d5d128c86faaceeb81b1e8f22254ad53df) Diff: --- gcc/tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/tree.c b/gcc/tree.c index 8b5b0b7508cc..2cbdc7b65ba9 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -11098,6 +11098,8 @@ build_opaque_vector_type (tree innertype, poly_int64 nunits) TYPE_NEXT_VARIANT (cand) = TYPE_NEXT_VARIANT (t); TYPE_NEXT_VARIANT (t) = cand; TYPE_MAIN_VARIANT (cand) = TYPE_MAIN_VARIANT (t); + /* Type variants have no alias set defined. */ + TYPE_ALIAS_SET (cand) = -1; return cand; }
[gcc r14-10394] tree-optimization/115723 - ICE with .COND_ADD reduction
https://gcc.gnu.org/g:64a6c0d594c05f275de91df35047cffb3ccecf2f commit r14-10394-g64a6c0d594c05f275de91df35047cffb3ccecf2f Author: Richard Biener Date: Mon Jul 1 10:06:55 2024 +0200 tree-optimization/115723 - ICE with .COND_ADD reduction The following fixes an ICE with a .COND_ADD discovered as reduction even though its else value isn't the reduction chain link but a constant. This would be wrong-code with --disable-checking I think. PR tree-optimization/115723 * tree-vect-loop.cc (check_reduction_path): For a .COND_ADD verify the else value also refers to the reduction chain op. * gcc.dg/vect/pr115723.c: New testcase. (cherry picked from commit 286cda3461d6f5ce7d911d3f26bd4975ea7ea11d) Diff: --- gcc/testsuite/gcc.dg/vect/pr115723.c | 25 + gcc/tree-vect-loop.cc| 12 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr115723.c b/gcc/testsuite/gcc.dg/vect/pr115723.c new file mode 100644 index ..b98b29d48702 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115723.c @@ -0,0 +1,25 @@ +/* { dg-additional-options "-ffast-math -fno-unsafe-math-optimizations" } */ + +#include "tree-vect.h" + +double __attribute__((noipa)) +foo (double *x, double *y, int n) +{ + double res = 0.; + for (int i = 0; i < n; ++i) +if (y[i] > 0.) + res += x[i]; +else + res = 64.; + return res; +} + +double y[16] = { 1., 1., 1., 1., 0., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1. }; +int main () +{ + check_vect (); + if (foo (y, y, 16) != 64. + 11.) +abort (); + return 0; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 29c03c246d45..832399f7e9d7 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -4161,15 +4161,19 @@ pop: FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi]) { - /* In case of a COND_OP (mask, op1, op2, op1) reduction we might have - op1 twice (once as definition, once as else) in the same operation. - Allow this. */ + /* In case of a COND_OP (mask, op1, op2, op1) reduction we should +have op1 twice (once as definition, once as else) in the same +operation. Enforce this. */ if (cond_fn_p && op_use_stmt == use_stmt) { gcall *call = as_a (use_stmt); unsigned else_pos = internal_fn_else_index (internal_fn (op.code)); - + if (gimple_call_arg (call, else_pos) != op.ops[opi]) + { + fail = true; + break; + } for (unsigned int j = 0; j < gimple_call_num_args (call); ++j) { if (j == else_pos)
[gcc r14-10392] tree-optimization/115669 - fix SLP reduction association
https://gcc.gnu.org/g:03844a2a15a85015506c0f187d0e9d526900cc2c commit r14-10392-g03844a2a15a85015506c0f187d0e9d526900cc2c Author: Richard Biener Date: Thu Jun 27 11:26:08 2024 +0200 tree-optimization/115669 - fix SLP reduction association The following avoids associating a reduction path as that might get STMT_VINFO_REDUC_IDX out-of-sync with the SLP operand order. This is a latent issue with SLP reductions but now easily exposed as we're doing single-lane SLP reductions. When we achieved SLP only we can move and update this meta-data. PR tree-optimization/115669 * tree-vect-slp.cc (vect_build_slp_tree_2): Do not reassociate chains that participate in a reduction. * gcc.dg/vect/pr115669.c: New testcase. (cherry picked from commit 7886830bb45c4f5dca0496d4deae9a45204d78f5) Diff: --- gcc/testsuite/gcc.dg/vect/pr115669.c | 22 ++ gcc/tree-vect-slp.cc | 3 +++ 2 files changed, 25 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/pr115669.c b/gcc/testsuite/gcc.dg/vect/pr115669.c new file mode 100644 index ..361a17a64e68 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115669.c @@ -0,0 +1,22 @@ +/* { dg-additional-options "-fwrapv" } */ + +#include "tree-vect.h" + +int a = 10; +unsigned b; +long long c[100]; +int foo() +{ + long long *d = c; + for (short e = 0; e < a; e++) +b += ~(d ? d[e] : 0); + return b; +} + +int main() +{ + check_vect (); + if (foo () != -10) +abort (); + return 0; +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 5e7e9b5bf085..0795605ec527 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -2050,6 +2050,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, else if (is_a (vinfo) /* ??? We don't handle !vect_internal_def defs below. */ && STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def + /* ??? Do not associate a reduction, this will wreck REDUC_IDX + mapping as long as that exists on the stmt_info level. */ + && STMT_VINFO_REDUC_IDX (stmt_info) == -1 && is_gimple_assign (stmt_info->stmt) && (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt)) || gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR)
[gcc r14-10393] tree-optimization/115694 - ICE with complex store rewrite
https://gcc.gnu.org/g:cde411950e91e0174a0134360d2eb138ca6821c6 commit r14-10393-gcde411950e91e0174a0134360d2eb138ca6821c6 Author: Richard Biener Date: Sun Jun 30 13:07:14 2024 +0200 tree-optimization/115694 - ICE with complex store rewrite The following adds a missed check when forwprop attempts to rewrite a complex store. PR tree-optimization/115694 * tree-ssa-forwprop.cc (pass_forwprop::execute): Check the store is complex before rewriting it. * g++.dg/torture/pr115694.C: New testcase. (cherry picked from commit 543a5b9da964f821b9e723ed9c93d6cdca464d47) Diff: --- gcc/testsuite/g++.dg/torture/pr115694.C | 13 + gcc/tree-ssa-forwprop.cc| 2 ++ 2 files changed, 15 insertions(+) diff --git a/gcc/testsuite/g++.dg/torture/pr115694.C b/gcc/testsuite/g++.dg/torture/pr115694.C new file mode 100644 index ..bbce47decf83 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr115694.C @@ -0,0 +1,13 @@ +// { dg-do compile } + +_Complex a; +typedef struct { + double a[2]; +} b; +void c(b); +void d() +{ + _Complex b1 = a; + b t = __builtin_bit_cast (b, b1); + c(t); +} diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 05d42ccd3c61..abf71f0d3a03 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -3762,6 +3762,8 @@ pass_forwprop::execute (function *fun) && gimple_store_p (use_stmt) && !gimple_has_volatile_ops (use_stmt) && is_gimple_assign (use_stmt) + && (TREE_CODE (TREE_TYPE (gimple_assign_lhs (use_stmt))) + == COMPLEX_TYPE) && (TREE_CODE (gimple_assign_lhs (use_stmt)) != TARGET_MEM_REF)) {
[gcc r14-10391] tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield
https://gcc.gnu.org/g:078cdccc849831b8f1ff74b9ad16ce3f5aa172be commit r14-10391-g078cdccc849831b8f1ff74b9ad16ce3f5aa172be Author: Richard Biener Date: Tue Jun 25 16:13:02 2024 +0200 tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield The following makes analysis and transform agree on constraints. PR tree-optimization/115646 * tree-call-cdce.cc (check_pow): Check for bit_sz values as allowed by transform. * gcc.dg/pr115646.c: New testcase. (cherry picked from commit 453b1d291d1a0f89087ad91cf6b1bed1ec68eff3) Diff: --- gcc/testsuite/gcc.dg/pr115646.c | 13 + gcc/tree-call-cdce.cc | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/pr115646.c b/gcc/testsuite/gcc.dg/pr115646.c new file mode 100644 index ..24bc1e45 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr115646.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern double pow(double x, double y); + +struct S { +unsigned int a : 3, b : 8, c : 21; +}; + +void foo (struct S *p) +{ + pow (p->c, 42); +} diff --git a/gcc/tree-call-cdce.cc b/gcc/tree-call-cdce.cc index 7f67a0b2dc6f..befe6acf178a 100644 --- a/gcc/tree-call-cdce.cc +++ b/gcc/tree-call-cdce.cc @@ -260,7 +260,7 @@ check_pow (gcall *pow_call) /* If the type of the base is too wide, the resulting shrink wrapping condition will be too conservative. */ - if (bit_sz > MAX_BASE_INT_BIT_SIZE) + if (bit_sz != 8 && bit_sz != 16 && bit_sz != MAX_BASE_INT_BIT_SIZE) return false; return true;
[gcc r15-1895] rs6000: load high and low part of 128bit vector independently [PR110040]
https://gcc.gnu.org/g:5be97039aa6c27fdf5d5bd43ef393b307c5ecedd commit r15-1895-g5be97039aa6c27fdf5d5bd43ef393b307c5ecedd Author: Jeevitha Date: Mon Jul 8 06:09:49 2024 -0500 rs6000: load high and low part of 128bit vector independently [PR110040] PR110040 exposes an issue concerning moves from vector registers to GPRs. There are two moves, one for upper 64 bits and the other for the lower 64 bits. In the problematic test case, we are only interested in storing the lower 64 bits. However, the instruction for copying the upper 64 bits is still emitted and is dead code. This patch adds a splitter that splits apart the two move instructions so that DCE can remove the dead code after splitting. 2024-07-08 Jeevitha Palanisamy gcc/ PR target/110040 * config/rs6000/vsx.md (split pattern for V1TI to DI move): New define. gcc/testsuite/ PR target/110040 * gcc.target/powerpc/pr110040-1.c: New testcase. * gcc.target/powerpc/pr110040-2.c: New testcase. Diff: --- gcc/config/rs6000/vsx.md | 17 + gcc/testsuite/gcc.target/powerpc/pr110040-1.c | 15 +++ gcc/testsuite/gcc.target/powerpc/pr110040-2.c | 16 3 files changed, 48 insertions(+) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 48ba262f7e48..23ce5c740510 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -6735,3 +6735,20 @@ "vmsumcud %0,%1,%2,%3" [(set_attr "type" "veccomplex")] ) + +(define_split + [(set (match_operand:V1TI 0 "gpc_reg_operand") + (match_operand:V1TI 1 "vsx_register_operand"))] + "reload_completed + && TARGET_DIRECT_MOVE_64BIT + && int_reg_operand (operands[0], V1TImode) + && vsx_register_operand (operands[1], V1TImode)" + [(pc)] +{ + rtx src_op = gen_rtx_REG (V2DImode, REGNO (operands[1])); + rtx dest_op0 = gen_rtx_REG (DImode, REGNO (operands[0])); + rtx dest_op1 = gen_rtx_REG (DImode, REGNO (operands[0]) + 1); + emit_insn (gen_vsx_extract_v2di (dest_op0, src_op, const0_rtx)); + emit_insn (gen_vsx_extract_v2di (dest_op1, src_op, const1_rtx)); + DONE; +}) diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-1.c b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c new file mode 100644 index ..0a521e9e51d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c @@ -0,0 +1,15 @@ +/* PR target/110040 */ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target powerpc_vsx } */ +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */ +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */ + +#include + +void +foo (signed long *dst, vector signed __int128 src) +{ + *dst = (signed long) src[0]; +} + diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-2.c b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c new file mode 100644 index ..8236f3cbe223 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c @@ -0,0 +1,16 @@ +/* PR target/110040 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target powerpc_vsx } */ +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */ + +/* builtin vec_xst_trunc requires power10. */ + +#include + +void +foo (signed int *dst, vector signed __int128 src) +{ + __builtin_vec_xst_trunc (src, 0, dst); +}
[gcc r15-1894] RISC-V: Implement .SAT_TRUNC for vector unsigned int
https://gcc.gnu.org/g:dafd63d7c5cddce1e00803606e742d75927b1a1e commit r15-1894-gdafd63d7c5cddce1e00803606e742d75927b1a1e Author: Pan Li Date: Fri Jul 5 09:02:47 2024 +0800 RISC-V: Implement .SAT_TRUNC for vector unsigned int This patch would like to implement the .SAT_TRUNC for the RISC-V backend. With the help of the RVV Vector Narrowing Fixed-Point Clip Instructions. The below SEW(S) are supported: * e64 => e32 * e64 => e16 * e64 => e8 * e32 => e16 * e32 => e8 * e16 => e8 Take below example to see the changes to asm. Form 1: #define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \ void __attribute__((noinline))\ vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ bool overflow = x > (WT)(NT)(-1); \ out[i] = ((NT)x) | (NT)-overflow; \ } \ } DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t) Before this patch: .L3: vsetvli a5,a2,e64,m1,ta,ma vle64.v v1,0(a1) vmsgtu.vvv0,v1,v2 vsetvli zero,zero,e32,mf2,ta,ma vncvt.x.x.w v1,v1 vmerge.vim v1,v1,-1,v0 vse32.v v1,0(a0) slli a4,a5,3 add a1,a1,a4 slli a4,a5,2 add a0,a0,a4 sub a2,a2,a5 bne a2,zero,.L3 After this patch: .L3: vsetvli a5,a2,e32,mf2,ta,ma vle64.v v1,0(a1) vnclipu.wi v1,v1,0 vse32.v v1,0(a0) slli a4,a5,3 add a1,a1,a4 slli a4,a5,2 add a0,a0,a4 sub a2,a2,a5 bne a2,zero,.L3 Passed the rv64gcv fully regression tests. gcc/ChangeLog: * config/riscv/autovec.md (ustrunc2): Add new pattern for double truncation. (ustrunc2): Ditto but for quad truncation. (ustrunc2): Ditto but for oct truncation. * config/riscv/riscv-protos.h (expand_vec_double_ustrunc): Add new func decl to expand double vec ustrunc. (expand_vec_quad_ustrunc): Ditto but for quad. (expand_vec_oct_ustrunc): Ditto but for oct. * config/riscv/riscv-v.cc (expand_vec_double_ustrunc): Add new func impl to expand vector double ustrunc. (expand_vec_quad_ustrunc): Ditto but for quad. (expand_vec_oct_ustrunc): Ditto but for oct. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper test macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/autovec.md| 35 ++ gcc/config/riscv/riscv-protos.h| 4 + gcc/config/riscv/riscv-v.cc| 46 +++ .../riscv/rvv/autovec/binop/vec_sat_arith.h| 22 ++ .../riscv/rvv/autovec/unop/vec_sat_data.h | 394 + .../riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c | 19 + .../riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c | 21 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c | 23 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c | 19 + .../riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c
[gcc r15-1893] fortran: Move definition of variable closer to its uses
https://gcc.gnu.org/g:7183a8ca18d5889a1f66ec1edbda00200d700c6c commit r15-1893-g7183a8ca18d5889a1f66ec1edbda00200d700c6c Author: Mikael Morin Date: Mon Jul 8 09:38:42 2024 +0200 fortran: Move definition of variable closer to its uses No change of behaviour, this makes a variable easier to track. gcc/fortran/ChangeLog: * trans-array.cc (gfc_trans_preloop_setup): Use a separate variable for iteration. Use directly the value of variable I if it is known. Move the definition of the variable to the branch where the remaining uses are. Diff: --- gcc/fortran/trans-array.cc | 33 +++-- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 510f429ef8ed..c7d244689393 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -4294,7 +4294,6 @@ gfc_trans_preloop_setup (gfc_loopinfo * loop, int dim, int flag, gfc_ss *ss, *pss; gfc_loopinfo *ploop; gfc_array_ref *ar; - int i; /* This code will be executed before entering the scalarization loop for this dimension. */ @@ -4340,19 +4339,12 @@ gfc_trans_preloop_setup (gfc_loopinfo * loop, int dim, int flag, pss = ss; } - if (dim == loop->dimen - 1) - i = 0; - else - i = dim + 1; - - /* For the time being, there is no loop reordering. */ - gcc_assert (i == ploop->order[i]); - i = ploop->order[i]; - if (dim == loop->dimen - 1 && loop->parent == NULL) { + gcc_assert (0 == ploop->order[0]); + stride = gfc_conv_array_stride (info->descriptor, - innermost_ss (ss)->dim[i]); + innermost_ss (ss)->dim[0]); /* Calculate the stride of the innermost loop. Hopefully this will allow the backend optimizers to do their stuff more effectively. @@ -4364,7 +4356,7 @@ gfc_trans_preloop_setup (gfc_loopinfo * loop, int dim, int flag, base offset of the array. */ if (info->ref) { - for (i = 0; i < ar->dimen; i++) + for (int i = 0; i < ar->dimen; i++) { if (ar->dimen_type[i] != DIMEN_ELEMENT) continue; @@ -4374,8 +4366,21 @@ gfc_trans_preloop_setup (gfc_loopinfo * loop, int dim, int flag, } } else - /* Add the offset for the previous loop dimension. */ - add_array_offset (pblock, ploop, ss, ar, pss->dim[i], i); + { + int i; + + if (dim == loop->dimen - 1) + i = 0; + else + i = dim + 1; + + /* For the time being, there is no loop reordering. */ + gcc_assert (i == ploop->order[i]); + i = ploop->order[i]; + + /* Add the offset for the previous loop dimension. */ + add_array_offset (pblock, ploop, ss, ar, pss->dim[i], i); + } /* Remember this offset for the second loop. */ if (dim == loop->temp_dim - 1 && loop->parent == NULL)