[gcc r14-10396] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]

2024-07-08 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:505382ceee0b5e72dc5defa05aec77a97658feca

commit r14-10396-g505382ceee0b5e72dc5defa05aec77a97658feca
Author: Pan Li 
Date:   Wed Jul 3 22:06:48 2024 +0800

RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]

According to the ISA,  the zvfhmin sub extension should only contain
convertion insn.  Thus,  the vfmv insn acts on FP16 should not be
present when only the zvfhmin option is given.

This patch would like to fix it by split the pred_broadcast define_insn
into zvfhmin and zvfh part.  Given below example:

void test (_Float16 *dest, _Float16 bias) {
  dest[0] = bias;
  dest[1] = bias;
}

when compile with -march=rv64gcv_zfh_zvfhmin

Before this patch:
test:
  vsetivlizero,2,e16,mf4,ta,ma
  vfmv.v.fv1,fa0 // should not leverage vfmv for zvfhmin
  vse16.v v1,0(a0)
  ret

After this patch:
test:
  addi sp,sp,-16
  fsh  fa0,14(sp)
  addi a5,sp,14
  vsetivli zero,2,e16,mf4,ta,ma
  vlse16.v v1,0(a5),zero
  vse16.v  v1,0(a0)
  addi sp,sp,16
  jr   ra

PR target/115763

gcc/ChangeLog:

* config/riscv/vector.md (*pred_broadcast): Split into
zvfh and zvfhmin part.
(*pred_broadcast_zvfh): New define_insn for zvfh part.
(*pred_broadcast_zvfhmin): Ditto but for zvfhmin.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check.
* gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto.
* gcc.target/riscv/rvv/base/pr115763-1.c: New test.
* gcc.target/riscv/rvv/base/pr115763-2.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit de9254e224eb3d89303cb9b3ba50b4c479c55f7c)

Diff:
---
 gcc/config/riscv/vector.md | 49 +++---
 .../gcc.target/riscv/rvv/base/pr115763-1.c |  9 
 .../gcc.target/riscv/rvv/base/pr115763-2.c | 10 +
 .../gcc.target/riscv/rvv/base/scalar_move-5.c  |  4 +-
 .../gcc.target/riscv/rvv/base/scalar_move-6.c  |  6 +--
 .../gcc.target/riscv/rvv/base/scalar_move-7.c  |  6 +--
 .../gcc.target/riscv/rvv/base/scalar_move-8.c  |  6 +--
 7 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 228d0f9a7663..03012d677d79 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2080,31 +2080,50 @@
   [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv")
(set_attr "mode" "")])
 
-(define_insn "*pred_broadcast"
-  [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, 
vr, vr, vr, vr, vr, vr")
-   (if_then_else:V_VLSF_ZVFHMIN
+(define_insn "*pred_broadcast_zvfh"
+  [(set (match_operand:V_VLSF0 "register_operand"  "=vr,  vr,  
vr,  vr")
+   (if_then_else:V_VLSF
  (unspec:
-   [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1,Wc1, 
vm, vm,Wc1,Wc1,Wb1,Wb1")
-(match_operand 4 "vector_length_operand"  " rK, rK, 
rK, rK, rK, rK, rK, rK")
-(match_operand 5 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i")
-(match_operand 6 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i")
-(match_operand 7 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i")
+   [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1, Wc1, 
Wb1, Wb1")
+(match_operand  4 "vector_length_operand" " rK,  rK,  
rK,  rK")
+(match_operand  5 "const_int_operand" "  i,   i,   
i,   i")
+(match_operand  6 "const_int_operand" "  i,   i,   
i,   i")
+(match_operand  7 "const_int_operand" "  i,   i,   
i,   i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (vec_duplicate:V_VLSF_ZVFHMIN
-   (match_operand: 3 "direct_broadcast_operand"   " f,  
f,Wdm,Wdm,Wdm,Wdm,  f,  f"))
- (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand""vu,  0, 
vu,  0, vu,  0, vu,  0")))]
+ (vec_duplicate:V_VLSF
+   (match_operand: 3 "direct_broadcast_operand"  "  f,   f,   
f,   f"))
+ (match_operand:V_VLSF  2 "vector_merge_operand"  " vu,   0,  
vu,   0")))]
   "TARGET_VECTOR"
   "@
vfmv.v.f\t%0,%3
vfmv.v.f\t%0,%3
+   vfmv.s.f\t%0,%3
+   vfmv.s.f\t%0,%3"
+  [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv")
+   (set_attr "mode" "")])
+
+(define_insn "*pred_broadcast_zvfhmin"
+  [(set (match_operand:V_VLSF_ZVFHMIN   0 "register_operand"  
"=vr,  vr,  vr,  vr")
+   (if_then_e

[gcc r15-1905] Rename __{float, double}_u to __x86_{float, double}_u to avoid pulluting the namespace.

2024-07-08 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:23ab7f632f4f5bae67fb53cf7b18fea7ba7242c4

commit r15-1905-g23ab7f632f4f5bae67fb53cf7b18fea7ba7242c4
Author: liuhongt 
Date:   Mon Jul 8 10:35:35 2024 +0800

Rename __{float,double}_u to __x86_{float,double}_u to avoid pulluting the 
namespace.

I have a build failure on NetBSD as the namespace pollution avoidance causes
a direct hit with the system /usr/include/math.h
===

In file included from /usr/src/local/gcc/obj/gcc/include/emmintrin.h:31,
 from 
/usr/src/local/gcc/obj/x86_64-unknown-netbsd10.99/libstdc++-v3/include/ext/random:45,
 from 
/usr/src/local/gcc/libstdc++-v3/include/precompiled/extc++.h:65:
/usr/src/local/gcc/obj/gcc/include/xmmintrin.h:75:15: error: conflicting 
declaration 'typedef float __float_u'
   75 | typedef float __float_u __attribute__ ((__may_alias__, __aligned__ 
(1)));
  |   ^
In file included from 
/usr/src/local/gcc/obj/x86_64-unknown-netbsd10.99/libstdc++-v3/include/cmath:47,
 from 
/usr/src/local/gcc/obj/x86_64-unknown-netbsd10.99/libstdc++-v3/include/x86_64-unknown-netbsd10.99/bits/stdc++.h:114,
 from 
/usr/src/local/gcc/libstdc++-v3/include/precompiled/extc++.h:32:
/usr/src/local/gcc/obj/gcc/include-fixed/math.h:49:7: note: previous 
declaration as 'union __float_u'
   49 | union __float_u {

gcc/ChangeLog:

PR target/115796
* config/i386/emmintrin.h (__float_u): Rename to ..
(__x86_float_u): .. this.
(_mm_load_sd): Ditto.
(_mm_store_sd): Ditto.
(_mm_loadh_pd): Ditto.
(_mm_loadl_pd): Ditto.
* config/i386/xmmintrin.h (__double_u): Rename to ..
(__x86_double_u): .. this.
(_mm_load_ss): Ditto.
(_mm_store_ss): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr115796.c: New test.

Diff:
---
 gcc/config/i386/emmintrin.h  | 10 +-
 gcc/config/i386/xmmintrin.h  |  6 +++---
 gcc/testsuite/gcc.target/i386/pr115796.c | 24 
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index d58030e5c4fe..a3fcd7a869cf 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -56,7 +56,7 @@ typedef double __m128d __attribute__ ((__vector_size__ (16), 
__may_alias__));
 /* Unaligned version of the same types.  */
 typedef long long __m128i_u __attribute__ ((__vector_size__ (16), 
__may_alias__, __aligned__ (1)));
 typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, 
__aligned__ (1)));
-typedef double __double_u __attribute__ ((__may_alias__, __aligned__ (1)));
+typedef double __x86_double_u __attribute__ ((__may_alias__, __aligned__ (1)));
 
 /* Create a selector for use with the SHUFPD instruction.  */
 #define _MM_SHUFFLE2(fp1,fp0) \
@@ -146,7 +146,7 @@ _mm_load1_pd (double const *__P)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_load_sd (double const *__P)
 {
-  return __extension__ (__m128d) { *(__double_u *)__P, 0.0 };
+  return __extension__ (__m128d) { *(__x86_double_u *)__P, 0.0 };
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -181,7 +181,7 @@ _mm_storeu_pd (double *__P, __m128d __A)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_store_sd (double *__P, __m128d __A)
 {
-  *(__double_u *)__P = ((__v2df)__A)[0] ;
+  *(__x86_double_u *)__P = ((__v2df)__A)[0] ;
 }
 
 extern __inline double __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -974,13 +974,13 @@ _mm_unpacklo_pd (__m128d __A, __m128d __B)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_loadh_pd (__m128d __A, double const *__B)
 {
-  return __extension__ (__m128d) { ((__v2df)__A)[0], *(__double_u*)__B };
+  return __extension__ (__m128d) { ((__v2df)__A)[0], *(__x86_double_u*)__B };
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_loadl_pd (__m128d __A, double const *__B)
 {
-  return __extension__ (__m128d) { *(__double_u*)__B, ((__v2df)__A)[1] };
+  return __extension__ (__m128d) { *(__x86_double_u*)__B, ((__v2df)__A)[1] };
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 37e5a94cf101..7f10f96d72ce 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -72,7 +72,7 @@ typedef float __m128 __attribute__ ((__vector_size__ (16), 
__may_alias__));
 
 /* Unaligned version of the same type.  */
 typedef float __m128_u __attribute__ ((__vector_

[gcc r15-1904] RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 2

2024-07-08 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:ecde8d50bea3573194f21277666f83463cbbe9c9

commit r15-1904-gecde8d50bea3573194f21277666f83463cbbe9c9
Author: Pan Li 
Date:   Mon Jul 8 21:58:59 2024 +0800

RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 2

After the middle-end supported the vector mode of .SAT_ADD,  add more
testcases to ensure the correctness of RISC-V backend for form 2.  Aka:

Form 2:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_2 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  out[i] = (T)(in[i] + IMM) < in[i] ? -1 : (in[i] + IMM);  \
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_2 (uint64_t, 9)

Passed the fully rv64gcv regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add help
test macro.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-5.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-6.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-7.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-8.c: New 
test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h| 17 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c  | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c  | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c  | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c  | 14 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-5.c| 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-6.c| 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-7.c| 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-8.c| 28 ++
 9 files changed, 185 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index 3733c8fd2c15..10459807b2c4 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -158,12 +158,29 @@ vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, 
unsigned limit) \
 #define DEF_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, IMM) \
   DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM)
 
+#define DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM)  \
+T __attribute__((noinline))  \
+vec_sat_u_add_imm##IMM##_##T##_fmt_2 (T *out, T *in, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+out[i] = (T)(in[i] + IMM) < in[i] ? -1 : (in[i] + IMM);  \
+}
+#define DEF_VEC_SAT_U_ADD_IMM_FMT_2_WRAP(T, IMM) \
+  DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM)
+
 #define RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) \
   vec_sat_u_add_imm##IMM##_##T##_fmt_1(out, op_1, N); \
   VALIDATE_RESULT (out, expect, N)
 #define RUN_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, out, op_1, expect, IMM, N) \
   RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N)
 
+#define RUN_VEC_SAT_U_ADD_IMM_FMT_2(T, out, op_1, expect, IMM, N) \
+  vec_sat_u_add_imm##IMM##_##T##_fmt_2(out, op_1, N); \
+  VALIDATE_RESULT (out, expect, N)
+#define RUN_VEC_SAT_U_ADD_IMM_FMT_2_WRAP(T, out, op_1, expect, IMM, N) \
+  RUN_VEC_SAT_U_ADD_IMM_FMT_2(T, out, op_1, expect, IMM, N)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/**/
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c
new file mode 100644
index ..d25fdcf78f38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mab

[gcc r15-1903] RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 1

2024-07-08 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:35b1096896a94a90d787f5ef402ba009dd4f0393

commit r15-1903-g35b1096896a94a90d787f5ef402ba009dd4f0393
Author: Pan Li 
Date:   Mon Jul 8 20:31:31 2024 +0800

RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 1

After the middle-end supported the vector mode of .SAT_ADD,  add more
testcases to ensure the correctness of RISC-V backend for form 1.  Aka:

Form 1:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  out[i] = (T)(in[i] + IMM) >= in[i] ? (in[i] + IMM) : -1; \
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_1 (uint64_t, 9)

Passed the fully rv64gcv regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add help
test macro.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-4.c: New 
test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h|  25 ++
 .../riscv/rvv/autovec/binop/vec_sat_data.h | 256 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c  |  14 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c  |  14 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c  |  14 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c  |  14 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-1.c|  28 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-2.c|  28 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-3.c|  28 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-4.c|  28 +++
 10 files changed, 449 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index b55a589e019a..3733c8fd2c15 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -4,6 +4,14 @@
 #include 
 #include 
 
+#define VALIDATE_RESULT(out, expect, N)  \
+  do \
+{\
+  for (unsigned i = 0; i < N; i++)   \
+if (out[i] != expect[i]) __builtin_abort (); \
+}\
+  while (false)
+
 
/**/
 /* Saturation Add (unsigned and signed)   
*/
 
/**/
@@ -139,6 +147,23 @@ vec_sat_u_add_##T##_fmt_8 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 #define RUN_VEC_SAT_U_ADD_FMT_8(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_8(out, op_1, op_2, N)
 
+#define DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM)  \
+T __attribute__((noinline))  \
+vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+out[i] = (T)(in[i] + IMM) >= in[i] ? (in[i] + IMM) : -1; \
+}
+#define DEF_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, IMM) \
+  DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM)
+
+#define RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) \
+  vec_sat_u_add_imm##IMM##_##T##_fmt_1(out, op_1, N); \
+  VALIDATE_RESULT (out, expect, N)
+#define RUN_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, out, op_1, expect, IMM, N) \
+  RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/

[gcc r15-1901] [to-be-committed][RISC-V][V3] DCE analysis for extension elimination

2024-07-08 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:98914f9eba5f19d3eb93fbce8726b5264631cba0

commit r15-1901-g98914f9eba5f19d3eb93fbce8726b5264631cba0
Author: Jeff Law 
Date:   Mon Jul 8 17:06:55 2024 -0600

[to-be-committed][RISC-V][V3] DCE analysis for extension elimination

The pre-commit testing showed that making ext-dce only active at -O2 and 
above
would require minor edits to the tests.  In some cases we had specified -O1 
in
the test or specified no optimization level at all. Those need to be bumped 
to
-O2.   In one test we had one set of dg-options overriding another.

The other approach that could have been taken would be to drop the -On
argument, add an explicit -fext-dce and add dg-skip-if options.  I 
originally
thought that was going to be way to go, but the dg-skip-if aspect was going 
to
get ugly as things like interaction between unrolling, peeling and -ftracer
would have to be accounted for and would likely need semi-regular 
adjustment.

Changes since V2:
  Testsuite changes to deal with pass only being enabled at -O2 or
  higher.

--

Changes since V1:

  Check flag_ext_dce before running the new pass.  I'd forgotten that
  I had removed that part of the gate to facilitate more testing.
  Turn flag_ext_dce on at -O2 and above.
  Adjust one of the riscv tests to explicitly avoid vectors
  Adjust a few aarch64 tests
In tbz_2.c we remove an unnecessary extension which causes us to use
"x" registers instead of "w" registers.

In the pred_clobber tests we also remove an extension and that
ultimately causes a reg->reg copy to change locations.

--

This was actually ack'd late in the gcc-14 cycle, but I chose not to 
integrate
it given how late we were in the cycle.

The basic idea here is to track liveness of subobjects within a word and if 
we
find an extension where the bits set aren't actually used, then we convert 
the
extension into a subreg.  The subreg typically simplifies away.

I've seen this help a few routines in coremark, fix one bug in the testsuite
(pr111384) and fix a couple internally reported bugs in Ventana.

The original idea and code were from Joern; Jivan and I hacked it into 
usable
shape.  I've had this in my tester for ~8 months, so it's been through more
build/test cycles than I care to contemplate and nearly every architecture 
we
support.

But just in case, I'm going to wait for it to spin through the pre-commit CI
tester.  I'll find my old ChangeLog before committing.

gcc/
* Makefile.in (OBJS): Add ext-dce.o
* common.opt (ext-dce): Document new option.
* df-scan.cc (df_get_ext_block_use_set): Delete prototype and
make extern.
* df.h (df_get_exit_block_use_set): Prototype.
* ext-dce.cc: New file/pass.
* opts.cc (default_options_table): Handle ext-dce at -O2 or higher.
* passes.def: Add ext-dce before combine.
* tree-pass.h (make_pass_ext_dce): Prototype.

gcc/testsuite
* gcc.target/aarch64/sve/pred_clobber_1.c: Update expected output.
* gcc.target/aarch64/sve/pred_clobber_2.c: Likewise.
* gcc.target/aarch64/sve/pred_clobber_3.c: Likewise.
* gcc.target/aarch64/tbz_2.c: Likewise.
* gcc.target/riscv/core_bench_list.c: New test.
* gcc.target/riscv/core_init_matrix.c: New test.
* gcc.target/riscv/core_list_init.c: New test.
* gcc.target/riscv/matrix_add_const.c: New test.
* gcc.target/riscv/mem-extend.c: New test.
* gcc.target/riscv/pr111384.c: New test.

Co-authored-by: Jivan Hakobyan 
Co-authored-by: Joern Rennecke 

Diff:
---
 gcc/Makefile.in|   1 +
 gcc/common.opt |   4 +
 gcc/df-scan.cc |   3 +-
 gcc/df.h   |   1 +
 gcc/ext-dce.cc | 943 +
 gcc/opts.cc|   1 +
 gcc/passes.def |   1 +
 .../gcc.target/aarch64/sve/pred_clobber_1.c|   1 +
 .../gcc.target/aarch64/sve/pred_clobber_2.c|   1 +
 .../gcc.target/aarch64/sve/pred_clobber_3.c|   1 +
 gcc/testsuite/gcc.target/aarch64/tbz_2.c   |   6 +-
 gcc/testsuite/gcc.target/riscv/core_bench_list.c   |  15 +
 gcc/testsuite/gcc.target/riscv/core_init_matrix.c  |  17 +
 gcc/testsuite/gcc.target/riscv/core_list_init.c|  18 +
 gcc/testsuite/gcc.target/riscv/matrix_add_const.c  |  13 +
 gcc/testsuite/gcc.target/riscv/mem-extend.c|  14 +
 gcc/testsuite/gcc.target/riscv/pr111384.c  |  11 +
 gcc/tree-pass.h|   1 +
 18

[gcc r15-1900] c-format.cc: add ctors to format_check_results and format_check_context

2024-07-08 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:113b5ce0610207717f651a3f8a3f1123d93f97af

commit r15-1900-g113b5ce0610207717f651a3f8a3f1123d93f97af
Author: David Malcolm 
Date:   Mon Jul 8 18:55:28 2024 -0400

c-format.cc: add ctors to format_check_results and format_check_context

This is a minor cleanup I spotted whilst working on another patch.
No functional change intended.

gcc/c-family/ChangeLog:
* c-format.cc (format_check_results::format_check_results): New
ctor.
(struct format_check_context): Add ctor; add "m_" prefix to all
fields.
(check_format_info): Use above ctors.
(check_format_arg): Update for "m_" prefix to
format_check_context.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/c-family/c-format.cc | 63 
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/gcc/c-family/c-format.cc b/gcc/c-family/c-format.cc
index 7a5ffc25602c..5bfd2fc4469e 100644
--- a/gcc/c-family/c-format.cc
+++ b/gcc/c-family/c-format.cc
@@ -1021,6 +1021,20 @@ static int n_format_types = ARRAY_SIZE 
(format_types_orig);
many leaves resulting from nested conditional expressions.  */
 struct format_check_results
 {
+  format_check_results (location_t format_string_loc_)
+  : number_non_literal (0),
+number_extra_args (0),
+extra_arg_loc (UNKNOWN_LOCATION),
+number_dollar_extra_args (0),
+number_wide (0),
+number_non_char (0),
+number_empty (0),
+number_unterminated (0),
+number_other (0),
+format_string_loc (format_string_loc_)
+  {
+  }
+
   /* Number of leaves of the format argument that could not be checked
  as they were not string literals.  */
   int number_non_literal;
@@ -1050,10 +1064,21 @@ struct format_check_results
 
 struct format_check_context
 {
-  format_check_results *res;
-  function_format_info *info;
-  tree params;
-  vec *arglocs;
+  format_check_context (format_check_results *res,
+   function_format_info *info,
+   tree params,
+   vec *arglocs)
+  : m_res (res),
+m_info (info),
+m_params (params),
+m_arglocs (arglocs)
+  {
+  }
+
+  format_check_results *m_res;
+  function_format_info *m_info;
+  tree m_params;
+  vec *m_arglocs;
 };
 
 /* Return the format name (as specified in the original table) for the format
@@ -1539,10 +1564,8 @@ static void
 check_format_info (function_format_info *info, tree params,
   vec *arglocs)
 {
-  format_check_context format_ctx;
   unsigned HOST_WIDE_INT arg_num;
   tree format_tree;
-  format_check_results res;
   /* Skip to format argument.  If the argument isn't available, there's
  no work for us to do; prototype checking will catch the problem.  */
   for (arg_num = 1; ; ++arg_num)
@@ -1558,26 +1581,14 @@ check_format_info (function_format_info *info, tree 
params,
   if (format_tree == 0)
 return;
 
-  res.number_non_literal = 0;
-  res.number_extra_args = 0;
-  res.extra_arg_loc = UNKNOWN_LOCATION;
-  res.number_dollar_extra_args = 0;
-  res.number_wide = 0;
-  res.number_non_char = 0;
-  res.number_empty = 0;
-  res.number_unterminated = 0;
-  res.number_other = 0;
-  res.format_string_loc = input_location;
-
-  format_ctx.res = &res;
-  format_ctx.info = info;
-  format_ctx.params = params;
-  format_ctx.arglocs = arglocs;
+  format_check_results res (input_location);
+
+  format_check_context format_ctx (&res, info, params, arglocs);
 
   check_function_arguments_recurse (check_format_arg, &format_ctx,
format_tree, arg_num, OPT_Wformat_);
 
-  location_t loc = format_ctx.res->format_string_loc;
+  location_t loc = format_ctx.m_res->format_string_loc;
 
   if (res.number_non_literal > 0)
 {
@@ -1659,10 +1670,10 @@ check_format_arg (void *ctx, tree format_tree,
  unsigned HOST_WIDE_INT arg_num)
 {
   format_check_context *format_ctx = (format_check_context *) ctx;
-  format_check_results *res = format_ctx->res;
-  function_format_info *info = format_ctx->info;
-  tree params = format_ctx->params;
-  vec *arglocs = format_ctx->arglocs;
+  format_check_results *res = format_ctx->m_res;
+  function_format_info *info = format_ctx->m_info;
+  tree params = format_ctx->m_params;
+  vec *arglocs = format_ctx->m_arglocs;
 
   int format_length;
   HOST_WIDE_INT offset;


[gcc r15-1899] i386: Promote {QI, HI}mode x86_movcc_0_m1_neg to SImode

2024-07-08 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:2b3027bea3f218599d36379d3d593841df7a1559

commit r15-1899-g2b3027bea3f218599d36379d3d593841df7a1559
Author: Uros Bizjak 
Date:   Mon Jul 8 20:47:52 2024 +0200

i386: Promote {QI,HI}mode x86_movcc_0_m1_neg to SImode

Promote HImode x86_movcc_0_m1_neg insn to SImode to avoid
redundant prefixes. Also promote QImode insn when TARGET_PROMOTE_QImode
is set. This is similar to promotable_binary_operator splitter, where we
promote the result to SImode.

Also correct insn condition for splitters to SImode of NEG and NOT
instructions. The sizes of QImode and SImode instructions are always
the same, so there is no need for optimize_insn_for_size bypass.

gcc/ChangeLog:

* config/i386/i386.md (x86_movcc_0_m1_neg splitter to SImode):
New splitter.
(NEG and NOT splitter to SImode): Remove optimize_insn_for_size_p
predicate from insn condition.

Diff:
---
 gcc/config/i386/i386.md | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b24c4fe58750..214cb2e239ae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -26576,9 +26576,7 @@
(clobber (reg:CC FLAGS_REG))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
&& (GET_MODE (operands[0]) == HImode
-   || (GET_MODE (operands[0]) == QImode
-  && (TARGET_PROMOTE_QImode
-  || optimize_insn_for_size_p ("
+   || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
   [(parallel [(set (match_dup 0)
   (neg:SI (match_dup 1)))
  (clobber (reg:CC FLAGS_REG))])]
@@ -26593,15 +26591,30 @@
(not (match_operand 1 "general_reg_operand")))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
&& (GET_MODE (operands[0]) == HImode
-   || (GET_MODE (operands[0]) == QImode
-  && (TARGET_PROMOTE_QImode
-  || optimize_insn_for_size_p ("
+   || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
   [(set (match_dup 0)
(not:SI (match_dup 1)))]
 {
   operands[0] = gen_lowpart (SImode, operands[0]);
   operands[1] = gen_lowpart (SImode, operands[1]);
 })
+
+(define_split
+  [(set (match_operand 0 "general_reg_operand")
+   (neg (match_operator 1 "ix86_carry_flag_operator"
+ [(reg FLAGS_REG) (const_int 0)])))
+   (clobber (reg:CC FLAGS_REG))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && (GET_MODE (operands[0]) == HImode
+   || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
+  [(parallel [(set (match_dup 0)
+  (neg:SI (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = shallow_copy_rtx (operands[1]);
+  PUT_MODE (operands[1], SImode);
+})
 
 ;; RTL Peephole optimizations, run before sched2.  These primarily look to
 ;; transform a complex memory operation into two memory to register operations.


[gcc r15-1898] libstdc++: Fix _Atomic(T) macro in [PR115807]

2024-07-08 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:40d234dd6439e8c8cfbf3f375a61906aed35c80d

commit r15-1898-g40d234dd6439e8c8cfbf3f375a61906aed35c80d
Author: Jonathan Wakely 
Date:   Sun Jul 7 12:22:42 2024 +0100

libstdc++: Fix _Atomic(T) macro in  [PR115807]

The definition of the _Atomic(T) macro needs to refer to ::std::atomic,
not some other std::atomic relative to the current namespace.

libstdc++-v3/ChangeLog:

PR libstdc++/115807
* include/c_compatibility/stdatomic.h (_Atomic): Ensure it
refers to std::atomic in the global namespace.
* testsuite/29_atomics/headers/stdatomic.h/115807.cc: New test.

Diff:
---
 libstdc++-v3/include/c_compatibility/stdatomic.h   |  2 +-
 .../testsuite/29_atomics/headers/stdatomic.h/115807.cc | 14 ++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/c_compatibility/stdatomic.h 
b/libstdc++-v3/include/c_compatibility/stdatomic.h
index 5403b52a036d..72b9446eb170 100644
--- a/libstdc++-v3/include/c_compatibility/stdatomic.h
+++ b/libstdc++-v3/include/c_compatibility/stdatomic.h
@@ -35,7 +35,7 @@
 #ifdef __cpp_lib_stdatomic_h // C++ >= 23
 #include 
 
-#define _Atomic(_Tp) std::atomic<_Tp>
+#define _Atomic(_Tp) ::std::atomic<_Tp>
 
 using std::memory_order;
 using std::memory_order_relaxed;
diff --git a/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc 
b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc
new file mode 100644
index ..14f320fe8357
--- /dev/null
+++ b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc
@@ -0,0 +1,14 @@
+// { dg-do compile { target c++23 } }
+#include 
+namespace other {
+  namespace std {
+int atomic = 0;
+  }
+  _Atomic(long) a{};
+}
+
+#include 
+
+namespace non::std {
+  static_assert( ::std::is_same_v<_Atomic(int), ::std::atomic> );
+}


[gcc r15-1897] Remove trailing whitespace from invoke.texi

2024-07-08 Thread Patrick O'Neill via Gcc-cvs
https://gcc.gnu.org/g:a0e64a043ec498f959a214b5b02d6c7177984a0f

commit r15-1897-ga0e64a043ec498f959a214b5b02d6c7177984a0f
Author: Patrick O'Neill 
Date:   Tue Jul 2 18:28:00 2024 -0700

Remove trailing whitespace from invoke.texi

gcc/ChangeLog:

* doc/invoke.texi: Remove trailing whitespace.

Signed-off-by: Patrick O'Neill 

Diff:
---
 gcc/doc/invoke.texi | 392 ++--
 1 file changed, 196 insertions(+), 196 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b37c7af7a390..4d671c4f6d89 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -94,9 +94,9 @@ that option with all supported languages.
 The usual way to run GCC is to run the executable called @command{gcc}, or
 @command{@var{machine}-gcc} when cross-compiling, or
 @command{@var{machine}-gcc-@var{version}} to run a specific version of GCC.
-When you compile C++ programs, you should invoke GCC as @command{g++} 
-instead.  @xref{Invoking G++,,Compiling C++ Programs}, 
-for information about the differences in behavior between @command{gcc} 
+When you compile C++ programs, you should invoke GCC as @command{g++}
+instead.  @xref{Invoking G++,,Compiling C++ Programs},
+for information about the differences in behavior between @command{gcc}
 and @command{g++} when compiling C++ programs.
 
 @cindex grouping options
@@ -3623,8 +3623,8 @@ unambiguous base classes.
 
 Mixing code compiled with @option{-frtti} with that compiled with
 @option{-fno-rtti} may not work.  For example, programs may
-fail to link if a class compiled with @option{-fno-rtti} is used as a base 
-for a class compiled with @option{-frtti}.  
+fail to link if a class compiled with @option{-fno-rtti} is used as a base
+for a class compiled with @option{-frtti}.
 
 @opindex fsized-deallocation
 @item -fsized-deallocation
@@ -4176,7 +4176,7 @@ As an example:
 @smallexample
 template  void f(T t) @{ t(); @};
 void g() noexcept;
-void h() @{ f(g); @} 
+void h() @{ f(g); @}
 @end smallexample
 
 @noindent
@@ -4443,10 +4443,10 @@ But this use is not portable across different compilers.
 @item -Wno-non-template-friend @r{(C++ and Objective-C++ only)}
 Disable warnings when non-template friend functions are declared
 within a template.  In very old versions of GCC that predate implementation
-of the ISO standard, declarations such as 
+of the ISO standard, declarations such as
 @samp{friend int foo(int)}, where the name of the friend is an unqualified-id,
 could be interpreted as a particular specialization of a template
-function; the warning exists to diagnose compatibility problems, 
+function; the warning exists to diagnose compatibility problems,
 and is enabled by default.
 
 @opindex Wold-style-cast
@@ -5197,7 +5197,7 @@ value, if any.
 Traditionally, diagnostic messages have been formatted irrespective of
 the output device's aspect (e.g.@: its width, @dots{}).  You can use the
 options described below
-to control the formatting algorithm for diagnostic messages, 
+to control the formatting algorithm for diagnostic messages,
 e.g.@: how many characters per line, how often source location
 information should be reported.  Note that some language front ends may not
 honor these options.
@@ -8039,7 +8039,7 @@ This warning is enabled by @option{-Wall} or 
@option{-Wextra}.
 @cindex unknown pragmas, warning
 @cindex pragmas, warning of unknown
 @item -Wunknown-pragmas
-Warn when a @code{#pragma} directive is encountered that is not understood by 
+Warn when a @code{#pragma} directive is encountered that is not understood by
 GCC@.  If this command-line option is used, warnings are even issued
 for unknown pragmas in system header files.  This is not the case if
 the warnings are only enabled by the @option{-Wall} command-line option.
@@ -8077,7 +8077,7 @@ This option is only active when 
@option{-fstrict-aliasing} is active.
 It warns about code that might break the strict aliasing rules that the
 compiler is using for optimization.
 Higher levels correspond to higher accuracy (fewer false positives).
-Higher levels also correspond to more effort, similar to the way @option{-O} 
+Higher levels also correspond to more effort, similar to the way @option{-O}
 works.
 @option{-Wstrict-aliasing} is equivalent to @option{-Wstrict-aliasing=3}.
 
@@ -9298,7 +9298,7 @@ enabled by @option{-Wextra}.
 @opindex Wno-bad-function-cast
 @item -Wbad-function-cast @r{(C and Objective-C only)}
 Warn when a function call is cast to a non-matching type.
-For example, warn if a call to a function returning an integer type 
+For example, warn if a call to a function returning an integer type
 is cast to a pointer type.
 
 @opindex Wc90-c99-compat
@@ -9696,13 +9696,13 @@ Do not warn about stray tokens after @code{#else} and 
@code{#endif}.
 @item -Wenum-compare
 Warn about a comparison between values of different enumerated types.
 In C++ enumerated type mismatches in conditional expressions are also
-dia

[gcc r15-1896] x86: Support bitwise and/andnot/abs/neg/copysign/xorsign op for V8BF/V16BF/V32BF

2024-07-08 Thread Levy Hsu via Gcc-cvs
https://gcc.gnu.org/g:f3f9e4ee7642e5131f2d6607f764267df7d233d4

commit r15-1896-gf3f9e4ee7642e5131f2d6607f764267df7d233d4
Author: Levy Hsu 
Date:   Mon Jul 8 14:59:35 2024 +

x86: Support bitwise and/andnot/abs/neg/copysign/xorsign op for 
V8BF/V16BF/V32BF

This patch extends support for BF16 vector operations in GCC, including 
bitwise AND, ANDNOT, ABS, NEG, COPYSIGN, and XORSIGN for V8BF, V16BF, and V32BF 
modes.

gcc/ChangeLog:

* config/i386/i386-expand.cc (ix86_expand_fp_absneg_operator): Add 
VBF modes.
(ix86_expand_copysign): Ditto.
(ix86_expand_xorsign): Ditto.
* config/i386/i386.cc (ix86_build_const_vector): Ditto.
(ix86_build_signbit_mask): Ditto.
* config/i386/sse.md: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx2-bf16-vec-absneg.c: New test.
* gcc.target/i386/avx512f-bf16-vec-absneg.c: New test.

Diff:
---
 gcc/config/i386/i386-expand.cc | 76 +--
 gcc/config/i386/i386.cc|  6 ++
 gcc/config/i386/sse.md | 37 +++---
 .../gcc.target/i386/avx2-bf16-vec-absneg.c | 85 ++
 .../gcc.target/i386/avx512f-bf16-vec-absneg.c  | 66 +
 5 files changed, 234 insertions(+), 36 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index bf79e59f811e..abc702d3ff27 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -2174,20 +2174,28 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, 
machine_mode mode,
   machine_mode vmode = mode;
   rtvec par;
 
-  if (vector_mode || mode == TFmode || mode == HFmode)
-{
-  use_sse = true;
-  if (mode == HFmode)
-   vmode = V8HFmode;
-}
-  else if (TARGET_SSE_MATH)
-{
-  use_sse = SSE_FLOAT_MODE_P (mode);
-  if (mode == SFmode)
-   vmode = V4SFmode;
-  else if (mode == DFmode)
-   vmode = V2DFmode;
-}
+  switch (mode)
+  {
+  case HFmode:
+use_sse = true;
+vmode = V8HFmode;
+break;
+  case BFmode:
+use_sse = true;
+vmode = V8BFmode;
+break;
+  case SFmode:
+use_sse = TARGET_SSE_MATH && TARGET_SSE;
+vmode = V4SFmode;
+break;
+  case DFmode:
+use_sse = TARGET_SSE_MATH && TARGET_SSE2;
+vmode = V2DFmode;
+break;
+  default:
+use_sse = vector_mode || mode == TFmode;
+break;
+  }
 
   dst = operands[0];
   src = operands[1];
@@ -2320,16 +2328,26 @@ ix86_expand_copysign (rtx operands[])
 
   mode = GET_MODE (operands[0]);
 
-  if (mode == HFmode)
+  switch (mode)
+  {
+  case HFmode:
 vmode = V8HFmode;
-  else if (mode == SFmode)
+break;
+  case BFmode:
+vmode = V8BFmode;
+break;
+  case SFmode:
 vmode = V4SFmode;
-  else if (mode == DFmode)
+break;
+  case DFmode:
 vmode = V2DFmode;
-  else if (mode == TFmode)
+break;
+  case TFmode:
 vmode = mode;
-  else
-gcc_unreachable ();
+break;
+  default:
+gcc_unreachable();
+  }
 
   if (rtx_equal_p (operands[1], operands[2]))
 {
@@ -2390,14 +2408,24 @@ ix86_expand_xorsign (rtx operands[])
 
   mode = GET_MODE (dest);
 
-  if (mode == HFmode)
+  switch (mode)
+  {
+  case HFmode:
 vmode = V8HFmode;
-  else if (mode == SFmode)
+break;
+  case BFmode:
+vmode = V8BFmode;
+break;
+  case SFmode:
 vmode = V4SFmode;
-  else if (mode == DFmode)
+break;
+  case DFmode:
 vmode = V2DFmode;
-  else
+break;
+  default:
 gcc_unreachable ();
+break;
+  }
 
   temp = gen_reg_rtx (vmode);
   mask = ix86_build_signbit_mask (vmode, 0, 0);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 17d23bbcbc27..9c2ebe74fc92 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -16173,6 +16173,9 @@ ix86_build_const_vector (machine_mode mode, bool vect, 
rtx value)
 case E_V8DFmode:
 case E_V4DFmode:
 case E_V2DFmode:
+case E_V32BFmode:
+case E_V16BFmode:
+case E_V8BFmode:
   n_elt = GET_MODE_NUNITS (mode);
   v = rtvec_alloc (n_elt);
   scalar_mode = GET_MODE_INNER (mode);
@@ -16209,6 +16212,9 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, 
bool invert)
 case E_V8HFmode:
 case E_V16HFmode:
 case E_V32HFmode:
+case E_V32BFmode:
+case E_V16BFmode:
+case E_V8BFmode:
   vec_mode = mode;
   imode = HImode;
   break;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index bda66d5e1212..b3b4697924b5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -351,7 +351,9 @@
 
 ;; 128-, 256- and 512-bit float vector modes for bitwise operations
 (define_mode_iterator VFB
-  [(V32HF "TARGET_AVX512F && TARGET_EVEX512")
+  [(V32BF "TARGET_AVX512F && TARGET_EVEX512")
+   (V16BF "TARGET_AVX") (V8BF "TARGET_SSE2")
+   (V32HF "TARGET_AVX512F && TARGET_EVEX512")
(V16HF "TARGET_AVX") (V8HF "TARGET_SSE2"

[gcc r11-11562] c++: Add testcase for this PR [PR97990]

2024-07-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:c2c216d0f85f861cc10529a455edfaf645aa393f

commit r11-11562-gc2c216d0f85f861cc10529a455edfaf645aa393f
Author: Andrew Pinski 
Date:   Fri Feb 16 10:55:43 2024 -0800

c++: Add testcase for this PR [PR97990]

This testcase was fixed by r14-5934-gf26d68d5d128c8 but we should add
one to make sure it does not regress again.

Committed as obvious after a quick test on the testcase.

PR c++/97990

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-struct-1.C: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 5f1438db419c9eb8901d1d1d7f98fb69082aec8e)

Diff:
---
 gcc/testsuite/g++.dg/torture/vector-struct-1.C | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/gcc/testsuite/g++.dg/torture/vector-struct-1.C 
b/gcc/testsuite/g++.dg/torture/vector-struct-1.C
new file mode 100644
index ..e2747417e2d5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-struct-1.C
@@ -0,0 +1,18 @@
+/* PR c++/97990 */
+/* This used to crash with lto and strict aliasing enabled as the
+   vector type variant still had TYPE_ALIAS_SET set on it. */
+
+typedef __attribute__((__vector_size__(sizeof(short short TSimd;
+TSimd hh(int);
+struct y6
+{
+  TSimd VALUE;
+  ~y6();
+};
+template 
+auto f2(T1 p1, T2){
+  return hh(p1) <= 0;
+}
+void f1(){
+  f2(0, y6{});
+}


[gcc r11-11561] middle-end/112732 - stray TYPE_ALIAS_SET in type variant

2024-07-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:e7879391bb2b86606d0ce35ed97eccc108970e36

commit r11-11561-ge7879391bb2b86606d0ce35ed97eccc108970e36
Author: Richard Biener 
Date:   Tue Nov 28 12:36:21 2023 +0100

middle-end/112732 - stray TYPE_ALIAS_SET in type variant

The following fixes a stray TYPE_ALIAS_SET in a type variant built
by build_opaque_vector_type which is diagnosed by type checking
enabled with -flto.

PR middle-end/112732
* tree.c (build_opaque_vector_type): Reset TYPE_ALIAS_SET
of the newly built type.

(cherry picked from commit f26d68d5d128c86faaceeb81b1e8f22254ad53df)

Diff:
---
 gcc/tree.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/tree.c b/gcc/tree.c
index 8b5b0b7508cc..2cbdc7b65ba9 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -11098,6 +11098,8 @@ build_opaque_vector_type (tree innertype, poly_int64 
nunits)
   TYPE_NEXT_VARIANT (cand) = TYPE_NEXT_VARIANT (t);
   TYPE_NEXT_VARIANT (t) = cand;
   TYPE_MAIN_VARIANT (cand) = TYPE_MAIN_VARIANT (t);
+  /* Type variants have no alias set defined.  */
+  TYPE_ALIAS_SET (cand) = -1;
   return cand;
 }


[gcc r14-10394] tree-optimization/115723 - ICE with .COND_ADD reduction

2024-07-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:64a6c0d594c05f275de91df35047cffb3ccecf2f

commit r14-10394-g64a6c0d594c05f275de91df35047cffb3ccecf2f
Author: Richard Biener 
Date:   Mon Jul 1 10:06:55 2024 +0200

tree-optimization/115723 - ICE with .COND_ADD reduction

The following fixes an ICE with a .COND_ADD discovered as reduction
even though its else value isn't the reduction chain link but a
constant.  This would be wrong-code with --disable-checking I think.

PR tree-optimization/115723
* tree-vect-loop.cc (check_reduction_path): For a .COND_ADD
verify the else value also refers to the reduction chain op.

* gcc.dg/vect/pr115723.c: New testcase.

(cherry picked from commit 286cda3461d6f5ce7d911d3f26bd4975ea7ea11d)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115723.c | 25 +
 gcc/tree-vect-loop.cc| 12 
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115723.c 
b/gcc/testsuite/gcc.dg/vect/pr115723.c
new file mode 100644
index ..b98b29d48702
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115723.c
@@ -0,0 +1,25 @@
+/* { dg-additional-options "-ffast-math -fno-unsafe-math-optimizations" } */
+
+#include "tree-vect.h"
+
+double __attribute__((noipa))
+foo (double *x, double *y, int n)
+{
+  double res = 0.;
+  for (int i = 0; i < n; ++i)
+if (y[i] > 0.)
+  res += x[i];
+else
+  res = 64.;
+  return res;
+}
+
+double y[16] = { 1., 1., 1., 1., 0., 1., 1., 1.,
+ 1., 1., 1., 1., 1., 1., 1., 1. };
+int main ()
+{
+  check_vect ();
+  if (foo (y, y, 16) != 64. + 11.)
+abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 29c03c246d45..832399f7e9d7 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -4161,15 +4161,19 @@ pop:
 
   FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi])
{
-   /* In case of a COND_OP (mask, op1, op2, op1) reduction we might have
-  op1 twice (once as definition, once as else) in the same operation.
-  Allow this.  */
+ /* In case of a COND_OP (mask, op1, op2, op1) reduction we should
+have op1 twice (once as definition, once as else) in the same
+operation.  Enforce this.  */
  if (cond_fn_p && op_use_stmt == use_stmt)
{
  gcall *call = as_a (use_stmt);
  unsigned else_pos
= internal_fn_else_index (internal_fn (op.code));
-
+ if (gimple_call_arg (call, else_pos) != op.ops[opi])
+   {
+ fail = true;
+ break;
+   }
  for (unsigned int j = 0; j < gimple_call_num_args (call); ++j)
{
  if (j == else_pos)


[gcc r14-10392] tree-optimization/115669 - fix SLP reduction association

2024-07-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:03844a2a15a85015506c0f187d0e9d526900cc2c

commit r14-10392-g03844a2a15a85015506c0f187d0e9d526900cc2c
Author: Richard Biener 
Date:   Thu Jun 27 11:26:08 2024 +0200

tree-optimization/115669 - fix SLP reduction association

The following avoids associating a reduction path as that might
get STMT_VINFO_REDUC_IDX out-of-sync with the SLP operand order.
This is a latent issue with SLP reductions but now easily exposed
as we're doing single-lane SLP reductions.

When we achieved SLP only we can move and update this meta-data.

PR tree-optimization/115669
* tree-vect-slp.cc (vect_build_slp_tree_2): Do not reassociate
chains that participate in a reduction.

* gcc.dg/vect/pr115669.c: New testcase.

(cherry picked from commit 7886830bb45c4f5dca0496d4deae9a45204d78f5)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115669.c | 22 ++
 gcc/tree-vect-slp.cc |  3 +++
 2 files changed, 25 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115669.c 
b/gcc/testsuite/gcc.dg/vect/pr115669.c
new file mode 100644
index ..361a17a64e68
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115669.c
@@ -0,0 +1,22 @@
+/* { dg-additional-options "-fwrapv" } */
+
+#include "tree-vect.h"
+
+int a = 10;
+unsigned b;
+long long c[100];
+int foo()
+{
+  long long *d = c;
+  for (short e = 0; e < a; e++)
+b += ~(d ? d[e] : 0);
+  return b;
+}
+
+int main()
+{
+  check_vect ();
+  if (foo () != -10)
+abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 5e7e9b5bf085..0795605ec527 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2050,6 +2050,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   else if (is_a  (vinfo)
   /* ???  We don't handle !vect_internal_def defs below.  */
   && STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
+  /* ???  Do not associate a reduction, this will wreck REDUC_IDX
+ mapping as long as that exists on the stmt_info level.  */
+  && STMT_VINFO_REDUC_IDX (stmt_info) == -1
   && is_gimple_assign (stmt_info->stmt)
   && (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt))
   || gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR)


[gcc r14-10393] tree-optimization/115694 - ICE with complex store rewrite

2024-07-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:cde411950e91e0174a0134360d2eb138ca6821c6

commit r14-10393-gcde411950e91e0174a0134360d2eb138ca6821c6
Author: Richard Biener 
Date:   Sun Jun 30 13:07:14 2024 +0200

tree-optimization/115694 - ICE with complex store rewrite

The following adds a missed check when forwprop attempts to rewrite
a complex store.

PR tree-optimization/115694
* tree-ssa-forwprop.cc (pass_forwprop::execute): Check the
store is complex before rewriting it.

* g++.dg/torture/pr115694.C: New testcase.

(cherry picked from commit 543a5b9da964f821b9e723ed9c93d6cdca464d47)

Diff:
---
 gcc/testsuite/g++.dg/torture/pr115694.C | 13 +
 gcc/tree-ssa-forwprop.cc|  2 ++
 2 files changed, 15 insertions(+)

diff --git a/gcc/testsuite/g++.dg/torture/pr115694.C 
b/gcc/testsuite/g++.dg/torture/pr115694.C
new file mode 100644
index ..bbce47decf83
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr115694.C
@@ -0,0 +1,13 @@
+// { dg-do compile }
+
+_Complex a;
+typedef struct {
+  double a[2];
+} b;
+void c(b);
+void d()
+{
+  _Complex b1 = a;
+  b t = __builtin_bit_cast (b, b1);
+  c(t);
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 05d42ccd3c61..abf71f0d3a03 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -3762,6 +3762,8 @@ pass_forwprop::execute (function *fun)
  && gimple_store_p (use_stmt)
  && !gimple_has_volatile_ops (use_stmt)
  && is_gimple_assign (use_stmt)
+ && (TREE_CODE (TREE_TYPE (gimple_assign_lhs (use_stmt)))
+ == COMPLEX_TYPE)
  && (TREE_CODE (gimple_assign_lhs (use_stmt))
  != TARGET_MEM_REF))
{


[gcc r14-10391] tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield

2024-07-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:078cdccc849831b8f1ff74b9ad16ce3f5aa172be

commit r14-10391-g078cdccc849831b8f1ff74b9ad16ce3f5aa172be
Author: Richard Biener 
Date:   Tue Jun 25 16:13:02 2024 +0200

tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield

The following makes analysis and transform agree on constraints.

PR tree-optimization/115646
* tree-call-cdce.cc (check_pow): Check for bit_sz values
as allowed by transform.

* gcc.dg/pr115646.c: New testcase.

(cherry picked from commit 453b1d291d1a0f89087ad91cf6b1bed1ec68eff3)

Diff:
---
 gcc/testsuite/gcc.dg/pr115646.c | 13 +
 gcc/tree-call-cdce.cc   |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr115646.c b/gcc/testsuite/gcc.dg/pr115646.c
new file mode 100644
index ..24bc1e45
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115646.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern double pow(double x, double y);
+
+struct S {
+unsigned int a : 3, b : 8, c : 21;
+};
+
+void foo (struct S *p)
+{
+  pow (p->c, 42);
+}
diff --git a/gcc/tree-call-cdce.cc b/gcc/tree-call-cdce.cc
index 7f67a0b2dc6f..befe6acf178a 100644
--- a/gcc/tree-call-cdce.cc
+++ b/gcc/tree-call-cdce.cc
@@ -260,7 +260,7 @@ check_pow (gcall *pow_call)
   /* If the type of the base is too wide,
  the resulting shrink wrapping condition
 will be too conservative.  */
-  if (bit_sz > MAX_BASE_INT_BIT_SIZE)
+  if (bit_sz != 8 && bit_sz != 16 && bit_sz != MAX_BASE_INT_BIT_SIZE)
 return false;
 
   return true;


[gcc r15-1895] rs6000: load high and low part of 128bit vector independently [PR110040]

2024-07-08 Thread jeevitha via Gcc-cvs
https://gcc.gnu.org/g:5be97039aa6c27fdf5d5bd43ef393b307c5ecedd

commit r15-1895-g5be97039aa6c27fdf5d5bd43ef393b307c5ecedd
Author: Jeevitha 
Date:   Mon Jul 8 06:09:49 2024 -0500

rs6000: load high and low part of 128bit vector independently [PR110040]

PR110040 exposes an issue concerning moves from vector registers to GPRs.
There are two moves, one for upper 64 bits and the other for the lower
64 bits.  In the problematic test case, we are only interested in storing
the lower 64 bits.  However, the instruction for copying the upper 64 bits
is still emitted and is dead code.  This patch adds a splitter that splits
apart the two move instructions so that DCE can remove the dead code after
splitting.

2024-07-08  Jeevitha Palanisamy  

gcc/
PR target/110040
* config/rs6000/vsx.md (split pattern for V1TI to DI move): New 
define.

gcc/testsuite/
PR target/110040
* gcc.target/powerpc/pr110040-1.c: New testcase.
* gcc.target/powerpc/pr110040-2.c: New testcase.

Diff:
---
 gcc/config/rs6000/vsx.md  | 17 +
 gcc/testsuite/gcc.target/powerpc/pr110040-1.c | 15 +++
 gcc/testsuite/gcc.target/powerpc/pr110040-2.c | 16 
 3 files changed, 48 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 48ba262f7e48..23ce5c740510 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6735,3 +6735,20 @@
   "vmsumcud %0,%1,%2,%3"
   [(set_attr "type" "veccomplex")]
 )
+
+(define_split
+  [(set (match_operand:V1TI 0 "gpc_reg_operand")
+   (match_operand:V1TI 1 "vsx_register_operand"))]
+  "reload_completed
+   && TARGET_DIRECT_MOVE_64BIT
+   && int_reg_operand (operands[0], V1TImode)
+   && vsx_register_operand (operands[1], V1TImode)"
+   [(pc)]
+{
+  rtx src_op = gen_rtx_REG (V2DImode, REGNO (operands[1]));
+  rtx dest_op0 = gen_rtx_REG (DImode, REGNO (operands[0]));
+  rtx dest_op1 = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
+  emit_insn (gen_vsx_extract_v2di (dest_op0, src_op, const0_rtx));
+  emit_insn (gen_vsx_extract_v2di (dest_op1, src_op, const1_rtx));
+  DONE;
+})
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
new file mode 100644
index ..0a521e9e51d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
@@ -0,0 +1,15 @@
+/* PR target/110040 */
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target powerpc_vsx } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
+
+#include 
+
+void
+foo (signed long *dst, vector signed __int128 src)
+{
+  *dst = (signed long) src[0];
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
new file mode 100644
index ..8236f3cbe223
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
@@ -0,0 +1,16 @@
+/* PR target/110040 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target powerpc_vsx } */
+/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
+
+/* builtin vec_xst_trunc requires power10.  */
+
+#include 
+
+void
+foo (signed int *dst, vector signed __int128 src)
+{
+  __builtin_vec_xst_trunc (src, 0, dst);
+}


[gcc r15-1894] RISC-V: Implement .SAT_TRUNC for vector unsigned int

2024-07-08 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:dafd63d7c5cddce1e00803606e742d75927b1a1e

commit r15-1894-gdafd63d7c5cddce1e00803606e742d75927b1a1e
Author: Pan Li 
Date:   Fri Jul 5 09:02:47 2024 +0800

RISC-V: Implement .SAT_TRUNC for vector unsigned int

This patch would like to implement the .SAT_TRUNC for the RISC-V
backend.  With the help of the RVV Vector Narrowing Fixed-Point
Clip Instructions.  The below SEW(S) are supported:

* e64 => e32
* e64 => e16
* e64 => e8
* e32 => e16
* e32 => e8
* e16 => e8

Take below example to see the changes to asm.
Form 1:
  #define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \
  void __attribute__((noinline))\
  vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
bool overflow = x > (WT)(NT)(-1);   \
out[i] = ((NT)x) | (NT)-overflow;   \
  } \
  }

DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t)

Before this patch:
.L3:
  vsetvli  a5,a2,e64,m1,ta,ma
  vle64.v  v1,0(a1)
  vmsgtu.vvv0,v1,v2
  vsetvli  zero,zero,e32,mf2,ta,ma
  vncvt.x.x.w  v1,v1
  vmerge.vim   v1,v1,-1,v0
  vse32.v  v1,0(a0)
  slli a4,a5,3
  add  a1,a1,a4
  slli a4,a5,2
  add  a0,a0,a4
  sub  a2,a2,a5
  bne  a2,zero,.L3

After this patch:
.L3:
  vsetvli  a5,a2,e32,mf2,ta,ma
  vle64.v  v1,0(a1)
  vnclipu.wi   v1,v1,0
  vse32.v  v1,0(a0)
  slli a4,a5,3
  add  a1,a1,a4
  slli a4,a5,2
  add  a0,a0,a4
  sub  a2,a2,a5
  bne  a2,zero,.L3

Passed the rv64gcv fully regression tests.

gcc/ChangeLog:

* config/riscv/autovec.md (ustrunc2): Add
new pattern for double truncation.
(ustrunc2): Ditto but for quad truncation.
(ustrunc2): Ditto but for oct truncation.
* config/riscv/riscv-protos.h (expand_vec_double_ustrunc): Add
new func decl to expand double vec ustrunc.
(expand_vec_quad_ustrunc): Ditto but for quad.
(expand_vec_oct_ustrunc): Ditto but for oct.
* config/riscv/riscv-v.cc (expand_vec_double_ustrunc): Add new
func impl to expand vector double ustrunc.
(expand_vec_quad_ustrunc): Ditto but for quad.
(expand_vec_oct_ustrunc): Ditto but for oct.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper
test macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h: New 
test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/autovec.md|  35 ++
 gcc/config/riscv/riscv-protos.h|   4 +
 gcc/config/riscv/riscv-v.cc|  46 +++
 .../riscv/rvv/autovec/binop/vec_sat_arith.h|  22 ++
 .../riscv/rvv/autovec/unop/vec_sat_data.h  | 394 +
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c |  19 +
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c |  21 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c |  23 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c |  19 +
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c

[gcc r15-1893] fortran: Move definition of variable closer to its uses

2024-07-08 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:7183a8ca18d5889a1f66ec1edbda00200d700c6c

commit r15-1893-g7183a8ca18d5889a1f66ec1edbda00200d700c6c
Author: Mikael Morin 
Date:   Mon Jul 8 09:38:42 2024 +0200

fortran: Move definition of variable closer to its uses

No change of behaviour, this makes a variable easier to track.

gcc/fortran/ChangeLog:

* trans-array.cc (gfc_trans_preloop_setup): Use a separate variable
for iteration.  Use directly the value of variable I if it is known.
Move the definition of the variable to the branch where the
remaining uses are.

Diff:
---
 gcc/fortran/trans-array.cc | 33 +++--
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 510f429ef8ed..c7d244689393 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4294,7 +4294,6 @@ gfc_trans_preloop_setup (gfc_loopinfo * loop, int dim, 
int flag,
   gfc_ss *ss, *pss;
   gfc_loopinfo *ploop;
   gfc_array_ref *ar;
-  int i;
 
   /* This code will be executed before entering the scalarization loop
  for this dimension.  */
@@ -4340,19 +4339,12 @@ gfc_trans_preloop_setup (gfc_loopinfo * loop, int dim, 
int flag,
  pss = ss;
}
 
-  if (dim == loop->dimen - 1)
-   i = 0;
-  else
-   i = dim + 1;
-
-  /* For the time being, there is no loop reordering.  */
-  gcc_assert (i == ploop->order[i]);
-  i = ploop->order[i];
-
   if (dim == loop->dimen - 1 && loop->parent == NULL)
{
+ gcc_assert (0 == ploop->order[0]);
+
  stride = gfc_conv_array_stride (info->descriptor,
- innermost_ss (ss)->dim[i]);
+ innermost_ss (ss)->dim[0]);
 
  /* Calculate the stride of the innermost loop.  Hopefully this will
 allow the backend optimizers to do their stuff more effectively.
@@ -4364,7 +4356,7 @@ gfc_trans_preloop_setup (gfc_loopinfo * loop, int dim, 
int flag,
 base offset of the array.  */
  if (info->ref)
{
- for (i = 0; i < ar->dimen; i++)
+ for (int i = 0; i < ar->dimen; i++)
{
  if (ar->dimen_type[i] != DIMEN_ELEMENT)
continue;
@@ -4374,8 +4366,21 @@ gfc_trans_preloop_setup (gfc_loopinfo * loop, int dim, 
int flag,
}
}
   else
-   /* Add the offset for the previous loop dimension.  */
-   add_array_offset (pblock, ploop, ss, ar, pss->dim[i], i);
+   {
+ int i;
+
+ if (dim == loop->dimen - 1)
+   i = 0;
+ else
+   i = dim + 1;
+
+ /* For the time being, there is no loop reordering.  */
+ gcc_assert (i == ploop->order[i]);
+ i = ploop->order[i];
+
+ /* Add the offset for the previous loop dimension.  */
+ add_array_offset (pblock, ploop, ss, ar, pss->dim[i], i);
+   }
 
   /* Remember this offset for the second loop.  */
   if (dim == loop->temp_dim - 1 && loop->parent == NULL)