date:20220531

[PATCH] Update {skylake, icelake, alderlake}_cost to add a bit preference to vector store.

2022-05-31 Thread Cui,Lili via Gcc-patches

This patch is to update {skylake,icelake,alderlake}_cost to add a bit 
preference to vector store.
Since the interger vector construction cost has changed, we need to adjust the 
load and store costs for intel processers.

With the patch applied
538.imagic_r:gets ~6% improvement on ADL for multicopy.
525.x264_r  :gets ~2% improvement on ADL and ICX for multicopy.
with no measurable changes for other benchmarks.

Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. Ok for trunk?

Thanks,
Lili.

gcc/ChangeLog

PR target/105493
* config/i386/x86-tune-costs.h (skylake_cost): Raise the gpr load cost
from 4 to 6 and gpr store cost from 6 to 8. Change SSE loads and
unaligned loads cost from {6, 6, 6, 10, 20} to {8, 8, 8, 8, 16}.
(icelake_cost): Ditto.
(alderlake_cost): Raise the gpr store cost from 6 to 8 and SSE loads,
stores and unaligned stores cost from {6, 6, 6, 10, 15} to
{8, 8, 8, 10, 15}.

gcc/testsuite/

PR target/105493
* gcc.target/i386/pr91446.c: Adjust to expect vectorization
* gcc.target/i386/pr99881.c: XFAIL.
---
 gcc/config/i386/x86-tune-costs.h| 26 -
 gcc/testsuite/gcc.target/i386/pr91446.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr99881.c |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index ea34a939c68..6c9066c84cc 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1897,15 +1897,15 @@ struct processor_costs skylake_cost = {
   8,   /* "large" insn */
   17,  /* MOVE_RATIO */
   17,  /* CLEAR_RATIO */
-  {4, 4, 4},   /* cost of loading integer registers
+  {6, 6, 6},   /* cost of loading integer registers
   in QImode, HImode and SImode.
   Relative to reg-reg move (2).  */
-  {6, 6, 6},   /* cost of storing integer registers */
-  {6, 6, 6, 10, 20},   /* cost of loading SSE register
+  {8, 8, 8},   /* cost of storing integer registers */
+  {8, 8, 8, 8, 16},/* cost of loading SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
   {8, 8, 8, 8, 16},/* cost of storing SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
-  {6, 6, 6, 10, 20},   /* cost of unaligned loads.  */
+  {8, 8, 8, 8, 16},/* cost of unaligned loads.  */
   {8, 8, 8, 8, 16},/* cost of unaligned stores.  */
   2, 2, 4, /* cost of moving XMM,YMM,ZMM register 
*/
   6,   /* cost of moving SSE register to 
integer.  */
@@ -2023,15 +2023,15 @@ struct processor_costs icelake_cost = {
   8,   /* "large" insn */
   17,  /* MOVE_RATIO */
   17,  /* CLEAR_RATIO */
-  {4, 4, 4},   /* cost of loading integer registers
+  {6, 6, 6},   /* cost of loading integer registers
   in QImode, HImode and SImode.
   Relative to reg-reg move (2).  */
-  {6, 6, 6},   /* cost of storing integer registers */
-  {6, 6, 6, 10, 20},   /* cost of loading SSE register
+  {8, 8, 8},   /* cost of storing integer registers */
+  {8, 8, 8, 8, 16},/* cost of loading SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
   {8, 8, 8, 8, 16},/* cost of storing SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
-  {6, 6, 6, 10, 20},   /* cost of unaligned loads.  */
+  {8, 8, 8, 8, 16},/* cost of unaligned loads.  */
   {8, 8, 8, 8, 16},/* cost of unaligned stores.  */
   2, 2, 4, /* cost of moving XMM,YMM,ZMM register 
*/
   6,   /* cost of moving SSE register to 
integer.  */
@@ -2146,13 +2146,13 @@ struct processor_costs alderlake_cost = {
   {6, 6, 6},   /* cost of loading integer registers
   in QImode, HImode and SImode.
   Relative to reg-reg move (2).  */
-  {6, 6, 6},   /* cost of storing integer registers */
-  {6, 6, 6, 10, 15},   /* cost of loading SSE register
+  {8, 8, 8},

[PATCH v4 33/34] RISC-V: Add vssex.C

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/vssex.C: New test.

---
 gcc/testsuite/g++.target/riscv/rvv/vssex.C | 1704 
 1 file changed, 1704 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vssex.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/vssex.C 
b/gcc/testsuite/g++.target/riscv/rvv/vssex.C
new file mode 100644
index 000..2f89aef1e64
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/vssex.C
@@ -0,0 +1,1704 @@
+/* { dg-do compile } */
+/* { dg-skip-if "test vector intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } 
*/
+/* { dg-final { check-function-bodies "**" "" } } */
+#include 
+#include 
+
+
+/*
+** test_vsse32_v_f32mf2:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  ...
+** 
vsse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])
+**  ...
+**  ret
+*/
+extern "C" void
+test_vsse32_v_f32mf2 (float32_t *base, ptrdiff_t bstride, vfloat32mf2_t v0, 
size_t vl)
+{
+  vsse32 (base, bstride, v0, vl);
+}
+
+
+/*
+** test_vsse32_v_f32mf2_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  ...
+** 
vsse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" void
+test_vsse32_v_f32mf2_m (vbool64_t mask, float32_t *base, ptrdiff_t bstride, 
vfloat32mf2_t v0, size_t vl)
+{
+  vsse32 (mask, base, bstride, v0, vl);
+}
+
+
+/*
+** test_vsse32_v_f32m1:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vsse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])
+**  ...
+**  ret
+*/
+extern "C" void
+test_vsse32_v_f32m1 (float32_t *base, ptrdiff_t bstride, vfloat32m1_t v0, 
size_t vl)
+{
+  vsse32 (base, bstride, v0, vl);
+}
+
+
+/*
+** test_vsse32_v_f32m1_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vsse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" void
+test_vsse32_v_f32m1_m (vbool32_t mask, float32_t *base, ptrdiff_t bstride, 
vfloat32m1_t v0, size_t vl)
+{
+  vsse32 (mask, base, bstride, v0, vl);
+}
+
+
+/*
+** test_vsse32_v_f32m2:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m2,\s*t[au],\s*m[au]
+**  ...
+** 
vsse32\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])
+**  ...
+**  ret
+*/
+extern "C" void
+test_vsse32_v_f32m2 (float32_t *base, ptrdiff_t bstride, vfloat32m2_t v0, 
size_t vl)
+{
+  vsse32 (base, bstride, v0, vl);
+}
+
+
+/*
+** test_vsse32_v_f32m2_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m2,\s*t[au],\s*m[au]
+**  ...
+** 
vsse32\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" void
+test_vsse32_v_f32m2_m (vbool16_t mask, float32_t *base, ptrdiff_t bstride, 
vfloat32m2_t v0, size_t vl)
+{
+  vsse32 (mask, base, bstride, v0, vl);
+}
+
+
+/*
+** test_vsse32_v_f32m4:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m4,\s*t[au],\s*m[au]
+**  ...
+** 
vsse32\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])
+**  ...
+**  ret
+*/
+extern "C" void
+test_vsse32_v_f32m4 (float32_t *base, ptrdiff_t bstride, vfloat32m4_t v0, 
size_t vl)
+{
+  vsse32 (base, bstride, v0, vl);
+}
+
+
+/*
+** test_vsse32_v_f32m4_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m4,\s*t[au],\s*m[au]
+**  ...
+** 
vsse32\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" void
+test_vsse32_v_f32m4_m (vbool8_t mask, float32_t *base, ptrdiff_t bstride, 
vfloat32m4_t v0, size_t vl)
+{
+  vsse32 (mask, base, bstride, v0, vl);
+}
+
+
+/*
+** test_vsse32_v_f32m8:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m8,\s*t[au],\s*m[au]
+**  ...
+** 
vsse32\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])
+**  ...
+**  ret
+*/
+extern "C" void
+test_vsse32_v_f32m8 (float32_t *base, ptrdiff_t bstride, vfloat32m8_t v0, 
size

[PATCH v4 30/34] RISC-V: Add vluxeix_4.C

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/vluxeix_4.C: New test.

---
 .../g++.target/riscv/rvv/vluxeix_4.C  | 2503 +
 1 file changed, 2503 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vluxeix_4.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/vluxeix_4.C 
b/gcc/testsuite/g++.target/riscv/rvv/vluxeix_4.C
new file mode 100644
index 000..2ed5edd0a4d
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/vluxeix_4.C
@@ -0,0 +1,2503 @@
+/* { dg-do compile } */
+/* { dg-skip-if "test vector intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } 
*/
+/* { dg-final { check-function-bodies "**" "" } } */
+#include 
+#include 
+
+/*
+** test_vluxei8_v_u8mf8:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf8,\s*t[au],\s*m[au]
+**  ...
+** 
vluxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1])
+**  ...
+**  ret
+*/
+extern "C" vuint8mf8_t
+test_vluxei8_v_u8mf8 (uint8_t *base, vuint8mf8_t bindex, size_t vl)
+{
+  return vluxei8 (base, bindex, vl);
+}
+
+
+/*
+** test_vluxei8_v_u8mf8_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf8,\s*tu,\s*mu
+**  ...
+** 
vluxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" vuint8mf8_t
+test_vluxei8_v_u8mf8_m (vbool64_t mask, vuint8mf8_t dest, uint8_t *base, 
vuint8mf8_t bindex, size_t vl)
+{
+  return vluxei8 (mask, dest, base, bindex, vl);
+}
+
+
+/*
+** test_vluxei8_v_u16mf4:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  ...
+** 
vluxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1])
+**  ...
+**  ret
+*/
+extern "C" vuint16mf4_t
+test_vluxei8_v_u16mf4 (uint16_t *base, vuint8mf8_t bindex, size_t vl)
+{
+  return vluxei8 (base, bindex, vl);
+}
+
+
+/*
+** test_vluxei8_v_u16mf4_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e16,\s*mf4,\s*tu,\s*mu
+**  ...
+** 
vluxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" vuint16mf4_t
+test_vluxei8_v_u16mf4_m (vbool64_t mask, vuint16mf4_t dest, uint16_t *base, 
vuint8mf8_t bindex, size_t vl)
+{
+  return vluxei8 (mask, dest, base, bindex, vl);
+}
+
+
+/*
+** test_vluxei8_v_u32mf2:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  ...
+** 
vluxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1])
+**  ...
+**  ret
+*/
+extern "C" vuint32mf2_t
+test_vluxei8_v_u32mf2 (uint32_t *base, vuint8mf8_t bindex, size_t vl)
+{
+  return vluxei8 (base, bindex, vl);
+}
+
+
+/*
+** test_vluxei8_v_u32mf2_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*mf2,\s*tu,\s*mu
+**  ...
+** 
vluxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" vuint32mf2_t
+test_vluxei8_v_u32mf2_m (vbool64_t mask, vuint32mf2_t dest, uint32_t *base, 
vuint8mf8_t bindex, size_t vl)
+{
+  return vluxei8 (mask, dest, base, bindex, vl);
+}
+
+
+/*
+** test_vluxei8_v_u64m1:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vluxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1])
+**  ...
+**  ret
+*/
+extern "C" vuint64m1_t
+test_vluxei8_v_u64m1 (uint64_t *base, vuint8mf8_t bindex, size_t vl)
+{
+  return vluxei8 (base, bindex, vl);
+}
+
+
+/*
+** test_vluxei8_v_u64m1_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e64,\s*m1,\s*tu,\s*mu
+**  ...
+** 
vluxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" vuint64m1_t
+test_vluxei8_v_u64m1_m (vbool64_t mask, vuint64m1_t dest, uint64_t *base, 
vuint8mf8_t bindex, size_t vl)
+{
+  return vluxei8 (mask, dest, base, bindex, vl);
+}
+
+
+/*
+** test_vluxei8_v_u8mf4:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf4,\s*t[au],\s*m[au]
+**  ...
+** 
vluxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1])
+**  ...
+**  ret
+*/
+extern "C" vuint8mf4_t
+test_vluxei8_v_u8mf4 (uint8_t *base, vuint8mf4_t bindex, size_t vl)
+{
+  return vluxei8 (base, binde

[PATCH v4 31/34] RISC-V: Add vsex.C

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/vsex.C: New test.

---
 gcc/testsuite/g++.target/riscv/rvv/vsex.C | 1704 +
 1 file changed, 1704 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vsex.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/vsex.C 
b/gcc/testsuite/g++.target/riscv/rvv/vsex.C
new file mode 100644
index 000..88aac87922f
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/vsex.C
@@ -0,0 +1,1704 @@
+/* { dg-do compile } */
+/* { dg-skip-if "test vector intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } 
*/
+/* { dg-final { check-function-bodies "**" "" } } */
+#include 
+#include 
+
+
+/*
+** test_vse32_v_f32mf2:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+extern "C" void
+test_vse32_v_f32mf2 (float32_t *base, vfloat32mf2_t v0, size_t vl)
+{
+  vse32 (base, v0, vl);
+}
+
+
+/*
+** test_vse32_v_f32mf2_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" void
+test_vse32_v_f32mf2_m (vbool64_t mask, float32_t *base, vfloat32mf2_t v0, 
size_t vl)
+{
+  vse32 (mask, base, v0, vl);
+}
+
+
+/*
+** test_vse32_v_f32m1:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+extern "C" void
+test_vse32_v_f32m1 (float32_t *base, vfloat32m1_t v0, size_t vl)
+{
+  vse32 (base, v0, vl);
+}
+
+
+/*
+** test_vse32_v_f32m1_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" void
+test_vse32_v_f32m1_m (vbool32_t mask, float32_t *base, vfloat32m1_t v0, size_t 
vl)
+{
+  vse32 (mask, base, v0, vl);
+}
+
+
+/*
+** test_vse32_v_f32m2:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+extern "C" void
+test_vse32_v_f32m2 (float32_t *base, vfloat32m2_t v0, size_t vl)
+{
+  vse32 (base, v0, vl);
+}
+
+
+/*
+** test_vse32_v_f32m2_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" void
+test_vse32_v_f32m2_m (vbool16_t mask, float32_t *base, vfloat32m2_t v0, size_t 
vl)
+{
+  vse32 (mask, base, v0, vl);
+}
+
+
+/*
+** test_vse32_v_f32m4:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m4,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+extern "C" void
+test_vse32_v_f32m4 (float32_t *base, vfloat32m4_t v0, size_t vl)
+{
+  vse32 (base, v0, vl);
+}
+
+
+/*
+** test_vse32_v_f32m4_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m4,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" void
+test_vse32_v_f32m4_m (vbool8_t mask, float32_t *base, vfloat32m4_t v0, size_t 
vl)
+{
+  vse32 (mask, base, v0, vl);
+}
+
+
+/*
+** test_vse32_v_f32m8:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m8,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+extern "C" void
+test_vse32_v_f32m8 (float32_t *base, vfloat32m8_t v0, size_t vl)
+{
+  vse32 (base, v0, vl);
+}
+
+
+/*
+** test_vse32_v_f32m8_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m8,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" void
+test_vse32_v_f32m8_m (vbool4_t mask, float32_t *base, vfloat32m8_t v0, size_t 
vl)
+{
+  vse32 (mask, base, v0, vl);
+}
+
+
+/*
+** test_vse64_v_f64m1:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ..

[PATCH v4 25/34] RISC-V: Add vloxeix_4.C

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/vloxeix_4.C: New test.

---
 .../g++.target/riscv/rvv/vloxeix_4.C  | 2503 +
 1 file changed, 2503 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vloxeix_4.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/vloxeix_4.C 
b/gcc/testsuite/g++.target/riscv/rvv/vloxeix_4.C
new file mode 100644
index 000..38115441b71
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/vloxeix_4.C
@@ -0,0 +1,2503 @@
+/* { dg-do compile } */
+/* { dg-skip-if "test vector intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } 
*/
+/* { dg-final { check-function-bodies "**" "" } } */
+#include 
+#include 
+
+/*
+** test_vloxei8_v_u8mf8:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf8,\s*t[au],\s*m[au]
+**  ...
+** 
vloxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1])
+**  ...
+**  ret
+*/
+extern "C" vuint8mf8_t
+test_vloxei8_v_u8mf8 (uint8_t *base, vuint8mf8_t bindex, size_t vl)
+{
+  return vloxei8 (base, bindex, vl);
+}
+
+
+/*
+** test_vloxei8_v_u8mf8_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf8,\s*tu,\s*mu
+**  ...
+** 
vloxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" vuint8mf8_t
+test_vloxei8_v_u8mf8_m (vbool64_t mask, vuint8mf8_t dest, uint8_t *base, 
vuint8mf8_t bindex, size_t vl)
+{
+  return vloxei8 (mask, dest, base, bindex, vl);
+}
+
+
+/*
+** test_vloxei8_v_u16mf4:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  ...
+** 
vloxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1])
+**  ...
+**  ret
+*/
+extern "C" vuint16mf4_t
+test_vloxei8_v_u16mf4 (uint16_t *base, vuint8mf8_t bindex, size_t vl)
+{
+  return vloxei8 (base, bindex, vl);
+}
+
+
+/*
+** test_vloxei8_v_u16mf4_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e16,\s*mf4,\s*tu,\s*mu
+**  ...
+** 
vloxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" vuint16mf4_t
+test_vloxei8_v_u16mf4_m (vbool64_t mask, vuint16mf4_t dest, uint16_t *base, 
vuint8mf8_t bindex, size_t vl)
+{
+  return vloxei8 (mask, dest, base, bindex, vl);
+}
+
+
+/*
+** test_vloxei8_v_u32mf2:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  ...
+** 
vloxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1])
+**  ...
+**  ret
+*/
+extern "C" vuint32mf2_t
+test_vloxei8_v_u32mf2 (uint32_t *base, vuint8mf8_t bindex, size_t vl)
+{
+  return vloxei8 (base, bindex, vl);
+}
+
+
+/*
+** test_vloxei8_v_u32mf2_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*mf2,\s*tu,\s*mu
+**  ...
+** 
vloxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" vuint32mf2_t
+test_vloxei8_v_u32mf2_m (vbool64_t mask, vuint32mf2_t dest, uint32_t *base, 
vuint8mf8_t bindex, size_t vl)
+{
+  return vloxei8 (mask, dest, base, bindex, vl);
+}
+
+
+/*
+** test_vloxei8_v_u64m1:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vloxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1])
+**  ...
+**  ret
+*/
+extern "C" vuint64m1_t
+test_vloxei8_v_u64m1 (uint64_t *base, vuint8mf8_t bindex, size_t vl)
+{
+  return vloxei8 (base, bindex, vl);
+}
+
+
+/*
+** test_vloxei8_v_u64m1_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e64,\s*m1,\s*tu,\s*mu
+**  ...
+** 
vloxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*v0\.t
+**  ...
+**  ret
+*/
+extern "C" vuint64m1_t
+test_vloxei8_v_u64m1_m (vbool64_t mask, vuint64m1_t dest, uint64_t *base, 
vuint8mf8_t bindex, size_t vl)
+{
+  return vloxei8 (mask, dest, base, bindex, vl);
+}
+
+
+/*
+** test_vloxei8_v_u8mf4:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf4,\s*t[au],\s*m[au]
+**  ...
+** 
vloxei8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:v[0-9]|v[1-2][0-9]|v3[0-1])
+**  ...
+**  ret
+*/
+extern "C" vuint8mf4_t
+test_vloxei8_v_u8mf4 (uint8_t *base, vuint8mf4_t bindex, size_t vl)
+{
+  return vloxei8 (base, binde

[PATCH v4 17/34] RISC-V: Add vsex.c

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/intrinsic/vsex.c: New test.

---
 .../gcc.target/riscv/rvv/intrinsic/vsex.c | 4776 +
 1 file changed, 4776 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vsex.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vsex.c 
b/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vsex.c
new file mode 100644
index 000..bc6f4f34135
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vsex.c
@@ -0,0 +1,4776 @@
+/* { dg-do compile } */
+/* { dg-skip-if "test vector intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } 
*/
+/* { dg-final { check-function-bodies "**" "" } } */
+#include 
+#include 
+
+/*
+** test_vse32_v_f32mf2:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32mf2 (float32_t *base, vfloat32mf2_t v0, size_t vl)
+{
+  vse32_v_f32mf2 (base, v0, vl);
+}
+
+/*
+** test_vse32_v_f32mf2_vl31:
+**  ...
+** vsetivli\s+zero,\s*31,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32mf2_vl31 (float32_t *base, vfloat32mf2_t v0)
+{
+  vse32_v_f32mf2 (base, v0, 31);
+}
+
+/*
+** test_vse32_v_f32mf2_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32mf2_m (vbool64_t mask, float32_t *base, vfloat32mf2_t v0, 
size_t vl)
+{
+  vse32_v_f32mf2_m (mask, base, v0, vl);
+}
+
+/*
+** test_vse32_v_f32mf2_m_vl31:
+**  ...
+** vsetivli\s+zero,\s*31,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32mf2_m_vl31 (vbool64_t mask, float32_t *base, vfloat32mf2_t v0)
+{
+  vse32_v_f32mf2_m (mask, base, v0, 31);
+}
+
+/*
+** test_vse32_v_f32m1:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32m1 (float32_t *base, vfloat32m1_t v0, size_t vl)
+{
+  vse32_v_f32m1 (base, v0, vl);
+}
+
+/*
+** test_vse32_v_f32m1_vl31:
+**  ...
+** vsetivli\s+zero,\s*31,\s*e32,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32m1_vl31 (float32_t *base, vfloat32m1_t v0)
+{
+  vse32_v_f32m1 (base, v0, 31);
+}
+
+/*
+** test_vse32_v_f32m1_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32m1_m (vbool32_t mask, float32_t *base, vfloat32m1_t v0, size_t 
vl)
+{
+  vse32_v_f32m1_m (mask, base, v0, vl);
+}
+
+/*
+** test_vse32_v_f32m1_m_vl31:
+**  ...
+** vsetivli\s+zero,\s*31,\s*e32,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32m1_m_vl31 (vbool32_t mask, float32_t *base, vfloat32m1_t v0)
+{
+  vse32_v_f32m1_m (mask, base, v0, 31);
+}
+
+/*
+** test_vse32_v_f32m2:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32m2 (float32_t *base, vfloat32m2_t v0, size_t vl)
+{
+  vse32_v_f32m2 (base, v0, vl);
+}
+
+/*
+** test_vse32_v_f32m2_vl31:
+**  ...
+** vsetivli\s+zero,\s*31,\s*e32,\s*m2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32m2_vl31 (float32_t *base, vfloat32m2_t v0)
+{
+  vse32_v_f32m2 (base, v0, 31);
+}
+
+/*
+** test_vse32_v_f32m2_m:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e32,\s*m2,\s*t[au],\s*m[au]
+**  ...
+** 
vse32\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+void
+test_vse32_v_f32m2_m (vbool16_t mask, float32_t *base, vfloat32m2_t v0, size_t 
vl)
+{
+  vse32_v_f32m2_m (mask, base, v0, vl);
+}
+
+/*
+** test_vse32_v_f32m2

[PATCH v4 12/34] RISC-V: Add vlsex_2.c

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/intrinsic/vlsex_2.c: New test.

---
 .../gcc.target/riscv/rvv/intrinsic/vlsex_2.c  | 1251 +
 1 file changed, 1251 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlsex_2.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlsex_2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlsex_2.c
new file mode 100644
index 000..a3d8b4fd588
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlsex_2.c
@@ -0,0 +1,1251 @@
+/* { dg-do compile } */
+/* { dg-skip-if "test vector intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } 
*/
+/* { dg-final { check-function-bodies "**" "" } } */
+#include 
+#include 
+
+/*
+** test_vlse8_v_i8mf2_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf2,\s*tu,\s*mu
+**  ...
+** 
vlse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8mf2_t
+test_vlse8_v_i8mf2_m_vl32 (vbool16_t mask, vint8mf2_t dest, int8_t *base, 
ptrdiff_t bstride)
+{
+  return vlse8_v_i8mf2_m (mask, dest, base, bstride, 32);
+}
+
+/*
+** test_vlse8_v_i8m1_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vlse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])
+**  ...
+**  ret
+*/
+vint8m1_t
+test_vlse8_v_i8m1_vl32 (int8_t *base, ptrdiff_t bstride)
+{
+  return vlse8_v_i8m1 (base, bstride, 32);
+}
+
+/*
+** test_vlse8_v_i8m1_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m1,\s*tu,\s*mu
+**  ...
+** 
vlse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m1_t
+test_vlse8_v_i8m1_m_vl32 (vbool8_t mask, vint8m1_t dest, int8_t *base, 
ptrdiff_t bstride)
+{
+  return vlse8_v_i8m1_m (mask, dest, base, bstride, 32);
+}
+
+/*
+** test_vlse8_v_i8m2_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m2,\s*t[au],\s*m[au]
+**  ...
+** 
vlse8\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])
+**  ...
+**  ret
+*/
+vint8m2_t
+test_vlse8_v_i8m2_vl32 (int8_t *base, ptrdiff_t bstride)
+{
+  return vlse8_v_i8m2 (base, bstride, 32);
+}
+
+/*
+** test_vlse8_v_i8m2_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m2,\s*tu,\s*mu
+**  ...
+** 
vlse8\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m2_t
+test_vlse8_v_i8m2_m_vl32 (vbool4_t mask, vint8m2_t dest, int8_t *base, 
ptrdiff_t bstride)
+{
+  return vlse8_v_i8m2_m (mask, dest, base, bstride, 32);
+}
+
+/*
+** test_vlse8_v_i8m4_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m4,\s*t[au],\s*m[au]
+**  ...
+** 
vlse8\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])
+**  ...
+**  ret
+*/
+vint8m4_t
+test_vlse8_v_i8m4_vl32 (int8_t *base, ptrdiff_t bstride)
+{
+  return vlse8_v_i8m4 (base, bstride, 32);
+}
+
+/*
+** test_vlse8_v_i8m4_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m4,\s*tu,\s*mu
+**  ...
+** 
vlse8\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m4_t
+test_vlse8_v_i8m4_m_vl32 (vbool2_t mask, vint8m4_t dest, int8_t *base, 
ptrdiff_t bstride)
+{
+  return vlse8_v_i8m4_m (mask, dest, base, bstride, 32);
+}
+
+/*
+** test_vlse8_v_i8m8_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m8,\s*t[au],\s*m[au]
+**  ...
+** 
vlse8\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])
+**  ...
+**  ret
+*/
+vint8m8_t
+test_vlse8_v_i8m8_vl32 (int8_t *base, ptrdiff_t bstride)
+{
+  return vlse8_v_i8m8 (base, bstride, 32);
+}
+
+/*
+** test_vlse8_v_i8m8_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m8,\s*tu,\s*mu
+**  ...
+** 
vlse8\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m8_t
+test_vlse8_v_i8m8_m_vl32 (vbool1_t mask, vint8m8_t dest, int8_t *base, 
ptrdiff_t bstride)
+{
+  return vlse8_v_i8m8_m (mask, dest, base, bstride, 32);
+}
+
+/*
+** test_

[PATCH v4 06/34] RISC-V: Add vlexff_2.c

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/intrinsic/vlexff_2.c: New test.

---
 .../gcc.target/riscv/rvv/intrinsic/vlexff_2.c | 1251 +
 1 file changed, 1251 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlexff_2.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlexff_2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlexff_2.c
new file mode 100644
index 000..8cdc87dfaee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlexff_2.c
@@ -0,0 +1,1251 @@
+/* { dg-do compile } */
+/* { dg-skip-if "test vector intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } 
*/
+/* { dg-final { check-function-bodies "**" "" } } */
+#include 
+#include 
+
+/*
+** test_vle8ff_v_i8mf2_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf2,\s*tu,\s*mu
+**  ...
+** 
vle8ff\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8mf2_t
+test_vle8ff_v_i8mf2_m_vl32 (vbool16_t mask, vint8mf2_t dest, int8_t *base, 
size_t *new_vl)
+{
+  return vle8ff_v_i8mf2_m (mask, dest, base, new_vl, 32);
+}
+
+/*
+** test_vle8ff_v_i8m1_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vle8ff\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+vint8m1_t
+test_vle8ff_v_i8m1_vl32 (int8_t *base, size_t *new_vl)
+{
+  return vle8ff_v_i8m1 (base, new_vl, 32);
+}
+
+/*
+** test_vle8ff_v_i8m1_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m1,\s*tu,\s*mu
+**  ...
+** 
vle8ff\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m1_t
+test_vle8ff_v_i8m1_m_vl32 (vbool8_t mask, vint8m1_t dest, int8_t *base, size_t 
*new_vl)
+{
+  return vle8ff_v_i8m1_m (mask, dest, base, new_vl, 32);
+}
+
+/*
+** test_vle8ff_v_i8m2_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m2,\s*t[au],\s*m[au]
+**  ...
+** 
vle8ff\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+vint8m2_t
+test_vle8ff_v_i8m2_vl32 (int8_t *base, size_t *new_vl)
+{
+  return vle8ff_v_i8m2 (base, new_vl, 32);
+}
+
+/*
+** test_vle8ff_v_i8m2_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m2,\s*tu,\s*mu
+**  ...
+** 
vle8ff\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m2_t
+test_vle8ff_v_i8m2_m_vl32 (vbool4_t mask, vint8m2_t dest, int8_t *base, size_t 
*new_vl)
+{
+  return vle8ff_v_i8m2_m (mask, dest, base, new_vl, 32);
+}
+
+/*
+** test_vle8ff_v_i8m4_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m4,\s*t[au],\s*m[au]
+**  ...
+** 
vle8ff\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+vint8m4_t
+test_vle8ff_v_i8m4_vl32 (int8_t *base, size_t *new_vl)
+{
+  return vle8ff_v_i8m4 (base, new_vl, 32);
+}
+
+/*
+** test_vle8ff_v_i8m4_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m4,\s*tu,\s*mu
+**  ...
+** 
vle8ff\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m4_t
+test_vle8ff_v_i8m4_m_vl32 (vbool2_t mask, vint8m4_t dest, int8_t *base, size_t 
*new_vl)
+{
+  return vle8ff_v_i8m4_m (mask, dest, base, new_vl, 32);
+}
+
+/*
+** test_vle8ff_v_i8m8_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m8,\s*t[au],\s*m[au]
+**  ...
+** 
vle8ff\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+vint8m8_t
+test_vle8ff_v_i8m8_vl32 (int8_t *base, size_t *new_vl)
+{
+  return vle8ff_v_i8m8 (base, new_vl, 32);
+}
+
+/*
+** test_vle8ff_v_i8m8_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m8,\s*tu,\s*mu
+**  ...
+** 
vle8ff\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m8_t
+test_vle8ff_v_i8m8_m_vl32 (vbool1_t mask, vint8m8_t dest, int8_t *base, size_t 
*new_vl)
+{
+  return vle8ff_v_i8m8_m (mask, dest, base, new_vl, 32);
+}
+
+/*
+** test_vle16ff_v_i16mf4_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  ...
+** 
vle16ff\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+vint16mf4_t
+test_vle16ff_v_i16mf4_vl32 (int16_t *base, size_t *new_vl)
+{
+  return vle16ff_v_i16mf4 (base, new_vl, 32);
+}
+
+/*
+** test_vle16ff_v_i16mf

[PATCH v4 04/34] RISC-V: Add mask load store testcases

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/intrinsic/mask_load_store.c: New test.
* gcc.target/riscv/rvv/intrinsic/mask_load_store_31.c: New test.
* gcc.target/riscv/rvv/intrinsic/mask_load_store_32.c: New test.

---
 .../riscv/rvv/intrinsic/mask_load_store.c | 77 +++
 .../riscv/rvv/intrinsic/mask_load_store_31.c  | 77 +++
 .../riscv/rvv/intrinsic/mask_load_store_32.c  | 77 +++
 3 files changed, 231 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/intrinsic/mask_load_store.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/intrinsic/mask_load_store_31.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/intrinsic/mask_load_store_32.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/mask_load_store.c 
b/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/mask_load_store.c
new file mode 100644
index 000..01117233024
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/mask_load_store.c
@@ -0,0 +1,77 @@
+
+/* { dg-do compile } */
+/* { dg-skip-if "test vector intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } 
*/
+
+#include 
+#include 
+
+
+vbool1_t test_vlm_v_b1_vl(const uint8_t *base, size_t vl) {
+  return vlm_v_b1(base, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m8,\s*ta,\s*mu\s+vlm\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)\n(?:
 test_vlm_v_b1 )?} 1 } } */
+
+void test_vsm_v_b1_vl(uint8_t *base, vbool1_t value, size_t vl) {
+  vsm_v_b1(base, value, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m8,\s*ta,\s*mu\s+vsm\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)\n(?:
 test_vsm_v_b1 )?} 1 } } */
+
+vbool2_t test_vlm_v_b2_vl(const uint8_t *base, size_t vl) {
+  return vlm_v_b2(base, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m4,\s*ta,\s*mu\s+vlm\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)\n(?:
 test_vlm_v_b2 )?} 1 } } */
+
+void test_vsm_v_b2_vl(uint8_t *base, vbool2_t value, size_t vl) {
+  vsm_v_b2(base, value, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m4,\s*ta,\s*mu\s+vsm\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)\n(?:
 test_vsm_v_b2 )?} 1 } } */
+
+vbool4_t test_vlm_v_b4_vl(const uint8_t *base, size_t vl) {
+  return vlm_v_b4(base, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m2,\s*ta,\s*mu\s+vlm\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)\n(?:
 test_vlm_v_b4 )?} 1 } } */
+
+void test_vsm_v_b4_vl(uint8_t *base, vbool4_t value, size_t vl) {
+  vsm_v_b4(base, value, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m2,\s*ta,\s*mu\s+vsm\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)\n(?:
 test_vsm_v_b4 )?} 1 } } */
+
+vbool8_t test_vlm_v_b8_vl(const uint8_t *base, size_t vl) {
+  return vlm_v_b8(base, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m1,\s*ta,\s*mu\s+vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)\n(?:
 test_vlm_v_b8 )?} 1 } } */
+
+void test_vsm_v_b8_vl(uint8_t *base, vbool8_t value, size_t vl) {
+  vsm_v_b8(base, value, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m1,\s*ta,\s*mu\s+vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)\n(?:
 test_vsm_v_b8 )?} 1 } } */
+
+vbool16_t test_vlm_v_b16_vl(const uint8_t *base, size_t vl) {
+  return vlm_v_b16(base, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf2,\s*ta,\s*mu\s+vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)\n(?:
 test_vlm_v_b16 )?} 1 } } */
+
+void test_vsm_v_b16_vl(uint8_t *base, vbool16_t value, size_t vl) {
+  vsm_v_b16(base, value, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf2,\s*ta,\s*mu\s+vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)\n(?:
 test_vsm_v_b16 )?} 1 } } */
+
+vbool32_t test_vlm_v_b32_vl(const uint8_t *base, size_t vl) {
+  return vlm_v_b32(base, vl);
+}
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf4,\s*ta,\s*mu\s+vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s1

[PATCH v4 02/34] RISC-V: Add vlex_2.c

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/intrinsic/vlex_2.c: New test.

---
 .../gcc.target/riscv/rvv/intrinsic/vlex_2.c   | 1251 +
 1 file changed, 1251 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlex_2.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlex_2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlex_2.c
new file mode 100644
index 000..15fc3bfc2c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlex_2.c
@@ -0,0 +1,1251 @@
+/* { dg-do compile } */
+/* { dg-skip-if "test vector intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } 
*/
+/* { dg-final { check-function-bodies "**" "" } } */
+#include 
+#include 
+
+/*
+** test_vle8_v_i8mf2_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*mf2,\s*tu,\s*mu
+**  ...
+** 
vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8mf2_t
+test_vle8_v_i8mf2_m_vl32 (vbool16_t mask, vint8mf2_t dest, int8_t *base)
+{
+  return vle8_v_i8mf2_m (mask, dest, base, 32);
+}
+
+/*
+** test_vle8_v_i8m1_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m1,\s*t[au],\s*m[au]
+**  ...
+** 
vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+vint8m1_t
+test_vle8_v_i8m1_vl32 (int8_t *base)
+{
+  return vle8_v_i8m1 (base, 32);
+}
+
+/*
+** test_vle8_v_i8m1_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m1,\s*tu,\s*mu
+**  ...
+** 
vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m1_t
+test_vle8_v_i8m1_m_vl32 (vbool8_t mask, vint8m1_t dest, int8_t *base)
+{
+  return vle8_v_i8m1_m (mask, dest, base, 32);
+}
+
+/*
+** test_vle8_v_i8m2_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m2,\s*t[au],\s*m[au]
+**  ...
+** 
vle8\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+vint8m2_t
+test_vle8_v_i8m2_vl32 (int8_t *base)
+{
+  return vle8_v_i8m2 (base, 32);
+}
+
+/*
+** test_vle8_v_i8m2_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m2,\s*tu,\s*mu
+**  ...
+** 
vle8\.v\s+(?:v[02468]|v[1-2][02468]|v30),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m2_t
+test_vle8_v_i8m2_m_vl32 (vbool4_t mask, vint8m2_t dest, int8_t *base)
+{
+  return vle8_v_i8m2_m (mask, dest, base, 32);
+}
+
+/*
+** test_vle8_v_i8m4_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m4,\s*t[au],\s*m[au]
+**  ...
+** 
vle8\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+vint8m4_t
+test_vle8_v_i8m4_vl32 (int8_t *base)
+{
+  return vle8_v_i8m4 (base, 32);
+}
+
+/*
+** test_vle8_v_i8m4_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m4,\s*tu,\s*mu
+**  ...
+** 
vle8\.v\s+(?:v[048]|v1[26]|v2[048]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m4_t
+test_vle8_v_i8m4_m_vl32 (vbool2_t mask, vint8m4_t dest, int8_t *base)
+{
+  return vle8_v_i8m4_m (mask, dest, base, 32);
+}
+
+/*
+** test_vle8_v_i8m8_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m8,\s*t[au],\s*m[au]
+**  ...
+** 
vle8\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+vint8m8_t
+test_vle8_v_i8m8_vl32 (int8_t *base)
+{
+  return vle8_v_i8m8 (base, 32);
+}
+
+/*
+** test_vle8_v_i8m8_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e8,\s*m8,\s*tu,\s*mu
+**  ...
+** 
vle8\.v\s+(?:v[08]|v16|v24),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint8m8_t
+test_vle8_v_i8m8_m_vl32 (vbool1_t mask, vint8m8_t dest, int8_t *base)
+{
+  return vle8_v_i8m8_m (mask, dest, base, 32);
+}
+
+/*
+** test_vle16_v_i16mf4_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  ...
+** 
vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+**  ...
+**  ret
+*/
+vint16mf4_t
+test_vle16_v_i16mf4_vl32 (int16_t *base)
+{
+  return vle16_v_i16mf4 (base, 32);
+}
+
+/*
+** test_vle16_v_i16mf4_m_vl32:
+**  ...
+** 
vsetvli\s+zero,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*e16,\s*mf4,\s*tu,\s*mu
+**  ...
+** 
vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\),\s*v0\.t
+**  ...
+**  ret
+*/
+vint16mf4_t
+test_vle16_v_i16mf4_m_vl32 (vbool64_t mask, vint16mf4_t d

[PATCH 00/34] RISC-V: Add RVV (RISC-V 'V' Extension) support

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

This patche add the testcases that are missed in v1.

*** BLURB HERE ***

zhongjuzhe (34):
  RISC-V: Add vlex_1.c
  RISC-V: Add vlex_2.c
  RISC-V: Add vlex_1.C
  RISC-V: Add mask load store testcases
  RISC-V: Add vlexff_1.c
  RISC-V: Add vlexff_2.c
  RISC-V: Add vloxeix_1.c
  RISC-V: Add vloxeix_2.c
  RISC-V: Add vloxeix_3.c
  RISC-V: Add vloxeix_4.c
  RISC-V: Add vlsex_1.c
  RISC-V: Add vlsex_2.c
  RISC-V: Add vluxeix_1.c
  RISC-V: Add vluxeix_2.c
  RISC-V: Add vluxeix_3.c
  RISC-V: Add vluxeix_4.c
  RISC-V: Add vsex.c
  RISC-V: Add vsoxeix.c
  RISC-V: Add vssex.c
  RISC-V: Add vsuxeix.c
  RISC-V: Add vlexff_1.C
  RISC-V: Add vloxeix_1.C
  RISC-V: Add vloxeix_2.C
  RISC-V: Add vloxeix_3.C
  RISC-V: Add vloxeix_4.C
  RISC-V: Add vlsex_1.C
  RISC-V: Add vluxeix_1.C
  RISC-V: Add vluxeix_2.C
  RISC-V: Add vluxeix_3.C
  RISC-V: Add vluxeix_4.C
  RISC-V: Add vsex.C
  RISC-V: Add vsoxeix.C
  RISC-V: Add vssex.C
  RISC-V: Add vsuxeix.C

 gcc/testsuite/g++.target/riscv/rvv/vlex_1.C   |  6792 ++
 gcc/testsuite/g++.target/riscv/rvv/vlexff_1.C |  6792 ++
 .../g++.target/riscv/rvv/vloxeix_1.C  |  8663 +++
 .../g++.target/riscv/rvv/vloxeix_2.C  |  7191 ++
 .../g++.target/riscv/rvv/vloxeix_3.C  |  6120 +
 .../g++.target/riscv/rvv/vloxeix_4.C  |  2503 +++
 gcc/testsuite/g++.target/riscv/rvv/vlsex_1.C  |  6792 ++
 .../g++.target/riscv/rvv/vluxeix_1.C  |  8663 +++
 .../g++.target/riscv/rvv/vluxeix_2.C  |  7191 ++
 .../g++.target/riscv/rvv/vluxeix_3.C  |  6120 +
 .../g++.target/riscv/rvv/vluxeix_4.C  |  2503 +++
 gcc/testsuite/g++.target/riscv/rvv/vsex.C |  1704 ++
 gcc/testsuite/g++.target/riscv/rvv/vsoxeix.C  |  6120 +
 gcc/testsuite/g++.target/riscv/rvv/vssex.C|  1704 ++
 gcc/testsuite/g++.target/riscv/rvv/vsuxeix.C  |  6120 +
 .../riscv/rvv/intrinsic/mask_load_store.c |77 +
 .../riscv/rvv/intrinsic/mask_load_store_31.c  |77 +
 .../riscv/rvv/intrinsic/mask_load_store_32.c  |77 +
 .../gcc.target/riscv/rvv/intrinsic/vlex_1.c   | 17840 +++
 .../gcc.target/riscv/rvv/intrinsic/vlex_2.c   |  1251 ++
 .../gcc.target/riscv/rvv/intrinsic/vlexff_1.c | 17840 +++
 .../gcc.target/riscv/rvv/intrinsic/vlexff_2.c |  1251 ++
 .../riscv/rvv/intrinsic/vloxeix_1.c   | 16220 +
 .../riscv/rvv/intrinsic/vloxeix_2.c   | 18755 
 .../riscv/rvv/intrinsic/vloxeix_3.c   | 18320 +++
 .../riscv/rvv/intrinsic/vloxeix_4.c   | 15486 +
 .../gcc.target/riscv/rvv/intrinsic/vlsex_1.c  | 17840 +++
 .../gcc.target/riscv/rvv/intrinsic/vlsex_2.c  |  1251 ++
 .../riscv/rvv/intrinsic/vluxeix_1.c   | 16220 +
 .../riscv/rvv/intrinsic/vluxeix_2.c   | 18755 
 .../riscv/rvv/intrinsic/vluxeix_3.c   | 18320 +++
 .../riscv/rvv/intrinsic/vluxeix_4.c   | 15486 +
 .../gcc.target/riscv/rvv/intrinsic/vsex.c |  4776 
 .../gcc.target/riscv/rvv/intrinsic/vsoxeix.c  | 17196 ++
 .../gcc.target/riscv/rvv/intrinsic/vssex.c|  4776 
 .../gcc.target/riscv/rvv/intrinsic/vsuxeix.c  | 17196 ++
 36 files changed, 323988 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vlex_1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vlexff_1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vloxeix_1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vloxeix_2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vloxeix_3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vloxeix_4.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vlsex_1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vluxeix_1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vluxeix_2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vluxeix_3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vluxeix_4.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vsex.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vsoxeix.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vssex.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/vsuxeix.C
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/intrinsic/mask_load_store.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/intrinsic/mask_load_store_31.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/intrinsic/mask_load_store_32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlex_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlex_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlexff_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vlexff_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vloxeix_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vloxeix_2.c
 c

[PATCH v3] RISC-V: Add load and store intrinsics support for RVV support

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

This patch is supplemental patch for [PATCH 14/21] which is missed in v1.

gcc/ChangeLog:

* config/riscv/constraints.md (vi): New constraint.
(vj): New constraint.
(vk): New constraint.
(vc): New constraint.
(Wn5): New constraint.
* config/riscv/predicates.md (vector_any_register_operand): New 
predicate.
(p_reg_or_0_operand): New predicate.
(vector_reg_or_const0_operand): New predicate.
(vector_move_operand): New predicate.
(p_reg_or_uimm5_operand): New predicate.
(reg_or_const_int_operand): New predicate.
(reg_or_uimm5_operand): New predicate.
(reg_or_neg_simm5_operand): New predicate.
(vector_const_simm5_operand): New predicate.
(vector_neg_const_simm5_operand): New predicate.
(vector_const_uimm5_operand): New predicate.
(vector_arith_operand): New predicate.
(vector_neg_arith_operand): New predicate.
(vector_shift_operand): New predicate.
(vector_constant_vector_operand): New predicate.
(vector_perm_operand): New predicate.
* config/riscv/riscv-vector-builtins-functions.cc (intrinsic_rename): 
New function.
(vector_scalar_operation_p): New function.
(readvl::call_properties): New function.
(readvl::assemble_name): New function.
(readvl::get_return_type): New function.
(readvl::get_argument_types): New function.
(readvl::expand): New function.
(vlse::call_properties): New function.
(vlse::get_return_type): New function.
(vlse::get_argument_types): New function.
(vlse::can_be_overloaded_p): New function.
(vlse::expand): New function.
(vsse::call_properties): New function.
(vsse::get_argument_types): New function.
(vsse::can_be_overloaded_p): New function.
(vsse::expand): New function.
(vlm::assemble_name): New function.
(vlm::call_properties): New function.
(vlm::get_return_type): New function.
(vlm::get_argument_types): New function.
(vlm::expand): New function.
(vsm::assemble_name): New function.
(vsm::call_properties): New function.
(vsm::get_argument_types): New function.
(vsm::expand): New function.
(indexedloadstore::assemble_name): New function.
(indexedloadstore::get_argument_types): New function.
(vlxei::call_properties): New function.
(vlxei::get_return_type): New function.
(vluxei::expand): New function.
(vloxei::expand): New function.
(vsuxei::call_properties): New function.
(vsuxei::expand): New function.
(vsoxei::call_properties): New function.
(vsoxei::expand): New function.
(vleff::call_properties): New function.
(vleff::assemble_name): New function.
(vleff::get_return_type): New function.
(vleff::get_argument_types): New function.
(vleff::can_be_overloaded_p): New function.
(vleff::fold): New function.
(vleff::expand): New function.
* config/riscv/riscv-vector-builtins-functions.def (readvl): New macro 
define.
(vlm): New macro define.
(vsm): New macro define.
(vlse): New macro define.
(vsse): New macro define.
(vluxei): New macro define.
(vloxei): New macro define.
(vsuxei): New macro define.
(vsoxei): New macro define.
(vleff): New macro define.
* config/riscv/riscv-vector-builtins-functions.h (class readvl): New 
class.
(class vlse): New class.
(class vsse): New class.
(class vlm): New class.
(class vsm): New class.
(class indexedloadstore): New class.
(class vlxei): New class.
(class vluxei): New class.
(class vloxei): New class.
(class vsuxei): New class.
(class vsoxei): New class.
(class vleff): New class.
* config/riscv/riscv-vector-builtins-iterators.def (VNOT64BITI): New 
iterator.
(V16): New iterator.
(VI16): New iterator.
(V2UNITS): New iterator.
(V4UNITS): New iterator.
(V8UNITS): New iterator.
(V16UNITS): New iterator.
(V32UNITS): New iterator.
(V64UNITS): New iterator.
(V2UNITSI): New iterator.
(V4UNITSI): New iterator.
(V8UNITSI): New iterator.
(V16UNITSI): New iterator.
(V32UNITSI): New iterator.
(V64UNITSI): New iterator.
(V128UNITSI): New iterator.
(VWI): New iterator.
(VWINOQI): New iterator.
(VWF): New iterator.
(VQWI): New iterator.
(VOWI): New iterator.
(VWREDI): New iterator.
(VWREDF): New iterator.
(VW): New iterator.
(VQW): New iterator.
(VOW): New iterator.
(VMAP): New iterator.
(VMAPI16): New iterator.
(VWMAP): New iterator.
(VWFMAP): New

Re: [PATCH v2, rs6000] Fix ICE on expand bcd__ [PR100736]

2022-05-31 Thread Segher Boessenkool

Hi!

On Mon, May 30, 2022 at 06:12:26PM +0800, Kewen.Lin wrote:
> on 2022/5/26 15:35, HAO CHEN GUI wrote:
> >   This patch fixes the ICE reported in PR100736. It removes the condition
> > check of finite math only flag not setting in "*_cc" pattern.
> > With or without this flag, we still can use "cror" to check if either
> > two bits of CC is set or not for "fp_two" codes. We don't need a reverse
> > comparison (implemented by crnot) here when the finite math flag is set,
> > as the latency of "cror" and "crnor" are the same.

> > --- a/gcc/config/rs6000/rs6000.md
> > +++ b/gcc/config/rs6000/rs6000.md
> > @@ -12995,9 +12995,9 @@ (define_insn_and_split "*_cc"
> >[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
> > (fp_two:GPR (match_operand:CCFP 1 "cc_reg_operand" "y")
> >   (const_int 0)))]
> > -  "!flag_finite_math_only"
> > +  ""
> >"#"
> > -  "&& 1"
> > +  ""
> 
> Segher added this hunk, not sure if he prefer to keep the condition unchanged
> and update the expansion side, looking forward to his comments.  :)

It's not clear to me how this can ever happen without finite_math_only?
The patch is safe, sure, but it may the real problem is elsewhere.

> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/pr100736.c
> > @@ -0,0 +1,12 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target powerpc_p8vector_ok } */
> > +/* { dg-options "-mdejagnu-cpu=power8 -O2 -ffinite-math-only" } */

The usual flag to use would be -ffast-math :-)

> > +/* { dg-final { scan-assembler {\mcror\M} } } */
> 
> The case of PR100736 fails with ICE as reported, maybe we can remove this 
> dg-final check,
> since as you noted in the description above either "cror" or "crnor" are 
> acceptable,
> this extra check could probably make this case fragile.

Check for \mcrn?or\M then?  But, is crnor something we want here ever?

The reason we do not have cror for finte-math-only is that comparisons
can only (validly :-) ) return LT, GT, or EQ then, and we can branch on
that without twiddling CRF bits first.  Is this not true for BCD
compares, is that what the problem is?  Or, is our builtin expansion
returning something invalid?  Or something else :-)


Segher

Re: [PATCH] Add a bit dislike for separate mem alternative when op is REG_P.

2022-05-31 Thread Hongtao Liu via Gcc-patches

On Wed, Jun 1, 2022 at 12:40 AM Richard Sandiford
 wrote:
>
> Vladimir Makarov via Gcc-patches  writes:
> > On 2022-05-29 23:05, Hongtao Liu wrote:
> >> On Fri, May 27, 2022 at 5:12 AM Vladimir Makarov via Gcc-patches
> >>  wrote:
> >>>
> >>> On 2022-05-24 23:39, liuhongt wrote:
>  Rigt now, mem_cost for separate mem alternative is 1 * frequency which
>  is pretty small and caused the unnecessary SSE spill in the PR, I've 
>  tried
>  to rework backend cost model, but RA still not happy with that(regress
>  somewhere else). I think the root cause of this is cost for separate 'm'
>  alternative cost is too small, especially considering that the mov cost
>  of gpr are 2(default for REGISTER_MOVE_COST). So this patch increase 
>  mem_cost
>  to 2*frequency, also increase 1 for reg_class cost when m alternative.
> 
> 
>  Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
>  Ok for trunk?
> >>> Thank you for addressing this problem. And sorry I can not approve this
> >>> patch at least w/o your additional work on benchmarking this change.
> >>>
> >>> This code is very old.  It is coming from older RA (former file
> >>> regclass.c) and existed practically since GCC day 1.  People tried many
> >>> times to improve this code.  The code also affects many targets.
> >> Yes, that's why I increased it as low as possible, so it won't regress
> >> #c6 in the PR.
> >>> I can approve this patch if you show that there is no regression at
> >>> least on x86-64 on some credible benchmark, e.g. SPEC2006 or SPEC2017.
> >>>
> >> I've tested the patch for SPEC2017 with both  -march=cascadelake
> >> -Ofast -flto and -O2 -mtune=generic.
> >> No obvious regression is observed, the binaries are all different from
> >> before, so I looked at 2 of them, the difference mainly comes from
> >> different choices of registers(xmm13 -> xmm12).
> >> Ok for trunk then?
> >
> > OK.
> >
> > Thank you for checking SPEC2017.
> >
> > I hope it will not create troubles for other targets.
>
> Can we hold off for a bit?  Like Alexander says, there seem to be
> some inconsistencies in the target patterns, so I think we should
> first rule out any changes being needed there.
Yes, i'm also testing another patch.
>
> Thanks,
> Richard



-- 
BR,
Hongtao

Re: [PATCH] libgo: Recognize off64_t / loff_t type definition of musl libc

2022-05-31 Thread Sören Tempel via Gcc-patches

PING.

If there is anything else that needs to be addressed please let me know.

Sören Tempel  wrote:
> I modified your patch to also define libgo_off_t_type (along to
> libgo_loff_t_type) and used that to define Offset_t in mksysinfo.sh.
> Furthermore, I fixed the include for the loff_t feature check.
> 
> With those two modifications your patch works for me (see attachment).
> 
> Greetings,
> Sören
> 
> Ian Lance Taylor  wrote:
> > On Thu, May 12, 2022 at 11:23 AM Sören Tempel via Gcc-patches
> >  wrote:
> > >
> > > The off64_t type is used for defining Offset_t:
> > >
> > > 
> > > https://github.com/golang/gofrontend/blob/4bdff733a0c2a9ddc3eff104b1be03df058a79c4/libgo/mksysinfo.sh#L406-L410
> > >
> > > On musl, _HAVE_OFF64_T is defined since autoconf doesn't mind it
> > > being defined as a macro but -fdump-go-spec does, hence you end up
> > > with the following compilation error (even with your patch applied):
> > 
> > Ah, thanks.
> > 
> > 
> > > Apart from off64_t stuff, there is only one minor issue (see below).
> > >
> > > > index 7e2b98ba6..487099a33 100644
> > > > --- a/libgo/configure.ac
> > > > +++ b/libgo/configure.ac
> > > > @@ -579,7 +579,7 @@ AC_C_BIGENDIAN
> > > > +
> > > > +CFLAGS_hold="$CFLAGS"
> > > > +CFLAGS="$OSCFLAGS $CFLAGS"
> > > >  AC_CHECK_TYPES([loff_t])
> > > > +CFLAGS="$CFLAGS_hold"
> > >
> > > The AC_CHECK_TYPES invocation is missing an include of fcntl.h (which
> > > defines loff_t in musl) and as such fails and causes libgo compilation
> > > to fail with "reference to undefined name '_libgo_loff_t_type'" as
> > > HAVE_LOFF_T is not defined. The invocation needs to be changed to:
> > >
> > > AC_CHECK_TYPES([loff_t], [], [], [[#include ]])
> > >
> > > and this needs to be adjusted accordingly in configure as well.
> > 
> > Hmmm, I added fcntl.h to AC_CHECK_HEADERS.  I thought that would be
> > enough to cause it to be included in future tests.  Perhaps not.
> > 
> > Ian
> 
> diff --git a/libgo/config.h.in b/libgo/config.h.in
> index 25b8ab8f9ee..2c3c7469675 100644
> --- a/libgo/config.h.in
> +++ b/libgo/config.h.in
> @@ -70,6 +70,9 @@
>  /* Define to 1 if you have the `fchownat' function. */
>  #undef HAVE_FCHOWNAT
>  
> +/* Define to 1 if you have the  header file. */
> +#undef HAVE_FCNTL_H
> +
>  /* Define to 1 if you have the `futimesat' function. */
>  #undef HAVE_FUTIMESAT
>  
> diff --git a/libgo/configure b/libgo/configure
> index ffe17c9be55..13e21d60c62 100755
> --- a/libgo/configure
> +++ b/libgo/configure
> @@ -15249,7 +15249,7 @@ $as_echo "#define HAVE_GETIPINFO 1" >>confdefs.h
>fi
>  
>  
> -for ac_header in port.h sched.h semaphore.h sys/file.h sys/mman.h syscall.h 
> sys/epoll.h sys/event.h sys/inotify.h sys/ptrace.h sys/syscall.h sys/sysctl.h 
> sys/user.h sys/utsname.h sys/select.h sys/socket.h net/bpf.h net/if.h 
> net/if_arp.h net/route.h netpacket/packet.h sys/prctl.h sys/mount.h sys/vfs.h 
> sys/statfs.h sys/timex.h sys/sysinfo.h utime.h linux/ether.h linux/fs.h 
> linux/ptrace.h linux/reboot.h netinet/in_syst.h netinet/ip.h 
> netinet/ip_mroute.h netinet/if_ether.h lwp.h
> +for ac_header in fcntl.h port.h sched.h semaphore.h sys/file.h sys/mman.h 
> syscall.h sys/epoll.h sys/event.h sys/inotify.h sys/ptrace.h sys/syscall.h 
> sys/sysctl.h sys/user.h sys/utsname.h sys/select.h sys/socket.h net/bpf.h 
> net/if.h net/if_arp.h net/route.h netpacket/packet.h sys/prctl.h sys/mount.h 
> sys/vfs.h sys/statfs.h sys/timex.h sys/sysinfo.h utime.h linux/ether.h 
> linux/fs.h linux/ptrace.h linux/reboot.h netinet/in_syst.h netinet/ip.h 
> netinet/ip_mroute.h netinet/if_ether.h lwp.h
>  do :
>as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
>  ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" 
> "$ac_includes_default"
> @@ -15546,7 +15546,10 @@ _ACEOF
>  
>  fi
>  
> -ac_fn_c_check_type "$LINENO" "loff_t" "ac_cv_type_loff_t" 
> "$ac_includes_default"
> +
> +CFLAGS_hold="$CFLAGS"
> +CFLAGS="$OSCFLAGS $CFLAGS"
> +ac_fn_c_check_type "$LINENO" "loff_t" "ac_cv_type_loff_t" "#include 
> "
>  if test "x$ac_cv_type_loff_t" = xyes; then :
>  
>  cat >>confdefs.h <<_ACEOF
> @@ -15556,6 +15559,7 @@ _ACEOF
>  
>  fi
>  
> +CFLAGS="$CFLAGS_hold"
>  
>  LIBS_hold="$LIBS"
>  LIBS="$LIBS -lm"
> diff --git a/libgo/configure.ac b/libgo/configure.ac
> index 7e2b98ba67c..bac58b07b41 100644
> --- a/libgo/configure.ac
> +++ b/libgo/configure.ac
> @@ -579,7 +579,7 @@ AC_C_BIGENDIAN
>  
>  GCC_CHECK_UNWIND_GETIPINFO
>  
> -AC_CHECK_HEADERS(port.h sched.h semaphore.h sys/file.h sys/mman.h syscall.h 
> sys/epoll.h sys/event.h sys/inotify.h sys/ptrace.h sys/syscall.h sys/sysctl.h 
> sys/user.h sys/utsname.h sys/select.h sys/socket.h net/bpf.h net/if.h 
> net/if_arp.h net/route.h netpacket/packet.h sys/prctl.h sys/mount.h sys/vfs.h 
> sys/statfs.h sys/timex.h sys/sysinfo.h utime.h linux/ether.h linux/fs.h 
> linux/ptrace.h linux/reboot.h netinet/in_syst.h netinet/ip.h 
> netinet/ip_mroute.h netinet/if_ether.h lwp.h)
> +AC_CHECK_HEADERS(fcntl.

[pushed] build: TAGS and .cc transition

2022-05-31 Thread Jason Merrill via Gcc-patches

A few globs missed in the .c -> .cc transition.  Some targets were looking
at both *.c and *.cc, but there are no longer any .c files to scan.

Applying as obvious.

gcc/ChangeLog:

* Makefile.in (TAGS): Look at libcpp/*.cc.

gcc/c/ChangeLog:

* Make-lang.in (c.tags): Look at *.cc.

gcc/cp/ChangeLog:

* Make-lang.in (c++.tags): Just look at *.cc.

gcc/d/ChangeLog:

* Make-lang.in (d.tags): Just look at *.cc.

gcc/fortran/ChangeLog:

* Make-lang.in (fortran.tags): Look at *.cc.

gcc/go/ChangeLog:

* Make-lang.in (go.tags): Look at *.cc.

gcc/objc/ChangeLog:

* Make-lang.in (objc.tags): Look at *.cc.

gcc/objcp/ChangeLog:

* Make-lang.in (obj-c++.tags): Look at *.cc.
---
 gcc/Makefile.in  | 6 +++---
 gcc/c/Make-lang.in   | 2 +-
 gcc/cp/Make-lang.in  | 2 +-
 gcc/d/Make-lang.in   | 2 +-
 gcc/fortran/Make-lang.in | 2 +-
 gcc/go/Make-lang.in  | 2 +-
 gcc/objc/Make-lang.in| 2 +-
 gcc/objcp/Make-lang.in   | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 731d8dd2a69..020b3b13943 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -4338,10 +4338,10 @@ TAGS: lang.tags
incs="$$incs --include $$dir/TAGS.sub"; \
  fi;   \
done;   \
-   $(ETAGS) -o TAGS.sub c-family/*.h c-family/*.cc c-family/*.cc \
- *.h *.c *.cc \
+   $(ETAGS) -o TAGS.sub c-family/*.h c-family/*.cc \
+ *.h *.cc \
  ../include/*.h ../libiberty/*.c \
- ../libcpp/*.c ../libcpp/include/*.h \
+ ../libcpp/*.cc ../libcpp/include/*.h \
  --language=none --regex="/\(char\|unsigned 
int\|int\|bool\|void\|HOST_WIDE_INT\|enum [A-Za-z_0-9]+\) 
[*]?\([A-Za-z_0-9]+\)/\2/" common.opt\
  --language=none 
--regex="/\(DEF_RTL_EXPR\|DEFTREECODE\|DEFGSCODE\|DEFTIMEVAR\|DEFPARAM\|DEFPARAMENUM5\)[
 ]?(\([A-Za-z_0-9]+\)/\2/" rtl.def tree.def gimple.def timevar.def \
; \
diff --git a/gcc/c/Make-lang.in b/gcc/c/Make-lang.in
index 1367a10cee6..9bd9c0ea123 100644
--- a/gcc/c/Make-lang.in
+++ b/gcc/c/Make-lang.in
@@ -109,7 +109,7 @@ c.srcinfo:
 c.srcextra: gengtype-lex.cc
-cp -p $^ $(srcdir)
 c.tags: force
-   cd $(srcdir)/c; $(ETAGS) -o TAGS.sub *.c *.h; \
+   cd $(srcdir)/c; $(ETAGS) -o TAGS.sub *.cc *.h; \
$(ETAGS) --include TAGS.sub --include ../TAGS.sub
 c.man:
 c.srcman:
diff --git a/gcc/cp/Make-lang.in b/gcc/cp/Make-lang.in
index 2de4e47c659..23d98c897b5 100644
--- a/gcc/cp/Make-lang.in
+++ b/gcc/cp/Make-lang.in
@@ -174,7 +174,7 @@ c++.srcinfo:
 c++.srcextra:
 
 c++.tags: force
-   cd $(srcdir)/cp; $(ETAGS) -o TAGS.sub *.c *.cc *.h --language=none \
+   cd $(srcdir)/cp; $(ETAGS) -o TAGS.sub *.cc *.h --language=none \
  --regex='/DEFTREECODE [(]\([A-Z_]+\)/\1/' cp-tree.def; \
$(ETAGS) --include TAGS.sub --include ../TAGS.sub
 
diff --git a/gcc/d/Make-lang.in b/gcc/d/Make-lang.in
index f3e34c54015..9f134370218 100644
--- a/gcc/d/Make-lang.in
+++ b/gcc/d/Make-lang.in
@@ -279,7 +279,7 @@ d.srcextra:
 
 d.tags: force
cd $(srcdir)/d; \
-   $(ETAGS) -o TAGS.sub *.c *.cc *.h dmd/*.h dmd/root/*.h; \
+   $(ETAGS) -o TAGS.sub *.cc *.h dmd/*.h dmd/root/*.h; \
$(ETAGS) --include TAGS.sub --include ../TAGS.sub
 
 d.man: doc/gdc.1
diff --git a/gcc/fortran/Make-lang.in b/gcc/fortran/Make-lang.in
index a558fc886ea..1cb47cb1a52 100644
--- a/gcc/fortran/Make-lang.in
+++ b/gcc/fortran/Make-lang.in
@@ -113,7 +113,7 @@ fortran.srcinfo: doc/gfortran.info
-cp -p $^ $(srcdir)/fortran
 
 fortran.tags: force
-   cd $(srcdir)/fortran; $(ETAGS) -o TAGS.sub *.c *.h; \
+   cd $(srcdir)/fortran; $(ETAGS) -o TAGS.sub *.cc *.h; \
$(ETAGS) --include TAGS.sub --include ../TAGS.sub
 
 fortran.info: doc/gfortran.info doc/gfc-internals.info
diff --git a/gcc/go/Make-lang.in b/gcc/go/Make-lang.in
index 31c677366e4..0e81268ece3 100644
--- a/gcc/go/Make-lang.in
+++ b/gcc/go/Make-lang.in
@@ -133,7 +133,7 @@ go.srcinfo: doc/gccgo.info
 go.srcextra:
 go.tags: force
cd $(srcdir)/go; \
-   $(ETAGS) -o TAGS.sub *.c *.h gofrontend/*.h gofrontend/*.cc; \
+   $(ETAGS) -o TAGS.sub *.cc *.h gofrontend/*.h gofrontend/*.cc; \
$(ETAGS) --include TAGS.sub --include ../TAGS.sub
 go.man: doc/gccgo.1
 go.srcman: doc/gccgo.1
diff --git a/gcc/objc/Make-lang.in b/gcc/objc/Make-lang.in
index 6e4ebf58497..b2ebd869480 100644
--- a/gcc/objc/Make-lang.in
+++ b/gcc/objc/Make-lang.in
@@ -102,7 +102,7 @@ objc.srcman:
 objc.install-plugin:
 
 objc.tags: force
-   cd $(srcdir)/objc; $(ETAGS) -o TAGS.sub *.c *.h; \
+   cd $(srcdir)/objc; $(ETAGS) -o TAGS.sub *.cc *.h; \
$(ETAGS) --include TAGS.sub --include ../TAGS.sub
 
 lang_checks += check-objc
diff --git a/gcc/objcp/Make-lang.in b/gcc/objcp/Make-lang.in
index fc8e05befa1..bd2466c44

Re: [PATCH] c++: use auto_timevar instead of timevar_push/pop

2022-05-31 Thread Jason Merrill via Gcc-patches


On 5/31/22 13:47, Patrick Palka wrote:

On Tue, 31 May 2022, Patrick Palka wrote:


r12-5487-g9bf69a8558638c replaced uses of timevar_cond_push/pop with
auto_cond_timevar and removed now unnecessary wrapper functions.  This
patch does the same for timevar_push/pop and auto_timevar.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?


OK.


gcc/cp/ChangeLog:

* parser.cc:
* pt.cc:
Use auto_timevar instead of timevar_push/pop.
Remove wrapper functions.
---
  gcc/cp/parser.cc | 31 
  gcc/cp/pt.cc | 61 +++-
  2 files changed, 24 insertions(+), 68 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 21066421a02..3acfbd43c5b 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -19106,7 +19106,7 @@ cp_parser_explicit_instantiation (cp_parser* parser)
cp_decl_specifier_seq decl_specifiers;
tree extension_specifier = NULL_TREE;
  
-  timevar_push (TV_TEMPLATE_INST);

+  auto_timevar time (TV_TEMPLATE_INST);


Er, I named the new variables 'time' to be consistent with the existing
uses of auto_timevar in constexpr/constraint/logic.cc, but on second
thought 'tv', as used by r12-5487-g9bf69a8558638c, seems better.
So consider the names changed to 'tv':

-- >8 --

Subject: [PATCH] c++: use auto_timevar instead of timevar_push/pop

r12-5487-g9bf69a8558638c replaced uses of timevar_cond_push/pop with
auto_cond_timevar and removed now unnecessary wrapper functions.  This
patch does the same with timevar_push/pop and auto_timevar.

gcc/cp/ChangeLog:

* parser.cc:
* pt.cc:
Use auto_timevar instead of timevar_push/pop.
Remove wrapper functions.
---
  gcc/cp/parser.cc | 37 +++--
  gcc/cp/pt.cc | 61 +++-
  2 files changed, 27 insertions(+), 71 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 21066421a02..5a52c32f38b 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -19106,7 +19106,7 @@ cp_parser_explicit_instantiation (cp_parser* parser)
cp_decl_specifier_seq decl_specifiers;
tree extension_specifier = NULL_TREE;
  
-  timevar_push (TV_TEMPLATE_INST);

+  auto_timevar tv (TV_TEMPLATE_INST);
  
/* Look for an (optional) storage-class-specifier or

   function-specifier.  */
@@ -19207,8 +19207,6 @@ cp_parser_explicit_instantiation (cp_parser* parser)
  
cp_parser_consume_semicolon_at_end_of_statement (parser);
  
-  timevar_pop (TV_TEMPLATE_INST);

-
cp_finalize_omp_declare_simd (parser, &odsd);
  }
  
@@ -20966,7 +20964,8 @@ cp_parser_enum_specifier (cp_parser* parser)

   elaborated-type-specifier.  */
if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
  {
-  timevar_push (TV_PARSE_ENUM);
+  auto_timevar tv (TV_PARSE_ENUM);
+
if (nested_name_specifier
  && nested_name_specifier != error_mark_node)
{
@@ -21072,7 +21071,6 @@ cp_parser_enum_specifier (cp_parser* parser)
  
if (scoped_enum_p)

finish_scope ();
-  timevar_pop (TV_PARSE_ENUM);
  }
else
  {
@@ -25927,9 +25925,11 @@ pop_injected_parms (void)
  
 Returns the TREE_TYPE representing the class.  */
  
-static tree

-cp_parser_class_specifier_1 (cp_parser* parser)
+tree
+cp_parser_class_specifier (cp_parser* parser)
  {
+  auto_timevar tv (TV_PARSE_STRUCT);
+
tree type;
tree attributes = NULL_TREE;
bool nested_name_specifier_p;
@@ -26321,16 +26321,6 @@ cp_parser_class_specifier_1 (cp_parser* parser)
return type;
  }
  
-static tree

-cp_parser_class_specifier (cp_parser* parser)
-{
-  tree ret;
-  timevar_push (TV_PARSE_STRUCT);
-  ret = cp_parser_class_specifier_1 (parser);
-  timevar_pop (TV_PARSE_STRUCT);
-  return ret;
-}
-
  /* Parse a class-head.
  
 class-head:

@@ -31276,15 +31266,14 @@ 
cp_parser_function_definition_from_specifiers_and_declarator
  }
else
  {
-  timevar_id_t tv;
+  timevar_id_t tv_id;
if (DECL_DECLARED_INLINE_P (current_function_decl))
-tv = TV_PARSE_INLINE;
+   tv_id = TV_PARSE_INLINE;
else
-tv = TV_PARSE_FUNC;
-  timevar_push (tv);
+   tv_id = TV_PARSE_FUNC;
+  auto_timevar tv (tv_id);
fn = cp_parser_function_definition_after_declarator (parser,
 /*inline_p=*/false);
-  timevar_pop (tv);
  }
  
return fn;

@@ -32276,7 +32265,8 @@ cp_parser_enclosed_template_argument_list (cp_parser* 
parser)
  static void
  cp_parser_late_parsing_for_member (cp_parser* parser, tree member_function)
  {
-  timevar_push (TV_PARSE_INMETH);
+  auto_timevar tv (TV_PARSE_INMETH);
+
/* If this member is a template, get the underlying
   FUNCTION_DECL.  */
if (DECL_FUNCTION_TEMPLATE_P (member_function))
@@ -32346,7 +32336,6 @@ cp_parser_late_parsing_for_member (cp_parser* parser, 
tree member_functio

Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-31 Thread Jason Merrill via Gcc-patches


On 5/31/22 12:41, Patrick Palka wrote:

On Wed, 18 May 2022, Jason Merrill wrote:


On 5/17/22 12:34, Patrick Palka wrote:

On Sat, May 7, 2022 at 5:18 PM Jason Merrill  wrote:


On 5/6/22 16:46, Patrick Palka wrote:

On Fri, 6 May 2022, Jason Merrill wrote:


On 5/6/22 16:10, Patrick Palka wrote:

On Fri, 6 May 2022, Patrick Palka wrote:


On Fri, 6 May 2022, Jason Merrill wrote:


On 5/6/22 14:00, Patrick Palka wrote:

On Fri, 6 May 2022, Patrick Palka wrote:


On Fri, 6 May 2022, Jason Merrill wrote:


On 5/6/22 11:22, Patrick Palka wrote:

Here ever since r10-7313-gb599bf9d6d1e18,
reduced_constant_expression_p
in C++11/14 is rejecting the marked sub-aggregate
initializer
(of type
S)

W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
   ^

ultimately because said initializer has
CONSTRUCTOR_NO_CLEARING
set,
and
so the function proceeds to verify that all fields of S
are
initialized.
And before C++17 we don't expect to see base class
fields (since
next_initializable_field skips over the), so the base
class
initializer
causes r_c_e_p to return false.


That seems like the primary bug.  I guess r_c_e_p
shouldn't be
using
next_initializable_field.  Really that function should
only be
used for
aggregates.


I see, I'll try replacing it in r_c_e_p.  Would that be in
addition
to
or instead of the clear_no_implicit_zero approach?


I'm testing the following, which uses a custom predicate
instead of
next_initializable_field in r_c_e_p.


Let's make it a public predicate, not internal to r_c_e_p.
Maybe it
could be
next_subobject_field, and the current next_initializable_field
change to
next_aggregate_field?


Will do.




Looks like the inner initializer {.D.2387={.m=0}, .b=0} is
formed
during
the subobject constructor call:

  V::V (&((struct S *) this)->D.2120);

after the evaluation of which, 'result' in
cxx_eval_call_expression is
NULL
(presumably because it's a CALL_EXPR, not AGGR_INIT_EXPR?):

  /* This can be null for a subobject constructor call, in
 which case what we care about is the initialization
 side-effects rather than the value.  We could get at
the
 value by evaluating *this, but we don't bother;
there's
 no need to put such a call in the hash table.  */
  result = lval ? ctx->object : ctx->ctor;

so we end up not calling clear_no_implicit_zero for the inner
initializer
directly.  We only call clear_no_implicit_zero after
evaluating the
AGGR_INIT_EXPR for outermost initializer (of type W).


Maybe for constructors we could call it on ctx->ctor instead of
result,
or
call r_c_e_p in C++20+?


But both ctx->ctor and ->object are NULL during a subobject
constructor
call (since we apparently clear these fields when entering a
STATEMENT_LIST):

So I tried instead obtaining the constructor by evaluating new_obj
via

--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -2993,6 +2988,9 @@ cxx_eval_call_expression (const
constexpr_ctx *ctx,
tree t,
  in order to detect reading an unitialized object in
constexpr
instead
  of value-initializing it.  (reduced_constant_expression_p
is
expected to
  take care of clearing the flag.)  */
+  if (new_obj && DECL_CONSTRUCTOR_P (fun))
+result = cxx_eval_constant_expression (ctx, new_obj,
/*lval=*/false,
+  non_constant_p,
overflow_p);
   if (TREE_CODE (result) == CONSTRUCTOR
   && (cxx_dialect < cxx20
  || !DECL_CONSTRUCTOR_P (fun)))

but that seems to break e.g. g++.dg/cpp2a/constexpr-init12.C
because
after the subobject constructor call

  S::S (&((struct W *) this)->s, NON_LVALUE_EXPR <8>);

the constructor for the subobject a.s in new_obj is still
completely
missing (I suppose because S::S doesn't initialize any of its
members)
so trying to obtain it causes us to complain too soon from
cxx_eval_component_reference:

constexpr-init12.C:16:24:   in ‘constexpr’ expansion of ‘W(42)’
constexpr-init12.C:10:22:   in ‘constexpr’ expansion of
‘((W*)this)->W::s.S::S(8)’
constexpr-init12.C:16:24: error: accessing uninitialized member
‘W::s’
   16 | constexpr auto a = W(42); // { dg-error "not a constant
expression" }
  |^



It does seem dubious that we would clear the flag on an outer
ctor when
it's
still set on an inner ctor, should probably add an assert
somewhere.


Makes sense, not sure where the best place would be..


On second thought, if I'm understanding your suggestion correctly, I
don't think we can generally enforce such a property for
CONSTRUCTOR_NO_CLEARING, given how cxx_eval_store_expression uses it
for
unions:

  union U {
struct { int x, y; } a;
  } u;
  u.a.x = 0;

Here after evaluating the assignment, the outer ctor for the union
will
have CONSTRUCTOR_NO_CLEARING cleared to indicate we finished
activating
the union member, but the inner ctor is certainly not fully
initialized
so it'll have CONSTRUCTOR_NO_CLEARING set still.


Why c

Re: [PATCH] c++: squash cp_build_qualified_type/_real

2022-05-31 Thread Jason Merrill via Gcc-patches


On 5/31/22 13:13, Patrick Palka wrote:

This replaces the two differently named versions of the same function
with a single function using a default function argument.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
for trunk?


OK.


gcc/cp/ChangeLog:

* cp-tree.h (cp_build_qualified_type_real): Rename to ...
(cp_build_qualified_type): ... this.  Give its last parameter
a default argument.  Remove macro of the same name.
* decl.cc (grokdeclarator): Adjust accordingly.
* pt.cc (tsubst_aggr_type): Likewise.
(rebuild_function_or_method_type): Likewise.
(tsubst): Likewise.
(maybe_dependent_member_ref): Likewise.
(unify): Likewise.
* tree.cc (cp_build_qualified_type_real): Rename to ...
(cp_build_qualified_type): ... this.  Adjust accordingly.
---
  gcc/cp/cp-tree.h |  5 ++---
  gcc/cp/decl.cc   |  2 +-
  gcc/cp/pt.cc | 52 
  gcc/cp/tree.cc   | 11 --
  4 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index da8898155e0..cc13809f38a 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7877,9 +7877,8 @@ extern tree make_ptrmem_cst   (tree, 
tree);
  extern tree cp_build_type_attribute_variant (tree, tree);
  extern tree cp_build_reference_type   (tree, bool);
  extern tree move  (tree);
-extern tree cp_build_qualified_type_real   (tree, int, tsubst_flags_t);
-#define cp_build_qualified_type(TYPE, QUALS) \
-  cp_build_qualified_type_real ((TYPE), (QUALS), tf_warning_or_error)
+extern tree cp_build_qualified_type(tree, int,
+tsubst_flags_t = 
tf_warning_or_error);
  extern bool cv_qualified_p(const_tree);
  extern tree cv_unqualified(tree);
  extern special_function_kind special_function_p (const_tree);
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 663a72fae15..e0d397d5a07 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -12375,7 +12375,7 @@ grokdeclarator (const cp_declarator *declarator,
  type = DECL_ORIGINAL_TYPE (TYPE_NAME (type));
  
type_quals |= cp_type_quals (type);

-  type = cp_build_qualified_type_real
+  type = cp_build_qualified_type
  (type, type_quals, typedef_decl && !DECL_ARTIFICIAL (typedef_decl))
  || declspecs->decltype_p)
 ? tf_ignore_bad_quals : 0) | tf_warning_or_error));
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 4a3f38b1f5a..759f119abc2 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -13767,7 +13767,7 @@ tsubst_aggr_type (tree t,
{
  r = lookup_template_class (t, argvec, in_decl, context,
 entering_scope, complain);
- r = cp_build_qualified_type_real (r, cp_type_quals (t), complain);
+ r = cp_build_qualified_type (r, cp_type_quals (t), complain);
}
  
  	  return r;

@@ -13963,7 +13963,7 @@ rebuild_function_or_method_type (tree t, tree 
return_type, tree arg_types,
  {
tree r = TREE_TYPE (TREE_VALUE (arg_types));
/* Don't pick up extra function qualifiers from the basetype.  */
-  r = cp_build_qualified_type_real (r, type_memfn_quals (t), complain);
+  r = cp_build_qualified_type (r, type_memfn_quals (t), complain);
if (! MAYBE_CLASS_TYPE_P (r))
{
  /* [temp.deduct]
@@ -15651,7 +15651,7 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
if (r)
{
  r = TREE_TYPE (r);
- r = cp_build_qualified_type_real
+ r = cp_build_qualified_type
(r, cp_type_quals (t) | cp_type_quals (r),
 complain | tf_ignore_bad_quals);
  return r;
@@ -15661,8 +15661,8 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
  /* We don't have an instantiation yet, so drop the typedef.  */
  int quals = cp_type_quals (t);
  t = DECL_ORIGINAL_TYPE (decl);
- t = cp_build_qualified_type_real (t, quals,
-   complain | tf_ignore_bad_quals);
+ t = cp_build_qualified_type (t, quals,
+  complain | tf_ignore_bad_quals);
}
  }
  
@@ -15809,7 +15809,7 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl)
  
  		quals = cp_type_quals (arg) | cp_type_quals (t);
  
-		return cp_build_qualified_type_real

+   return cp_build_qualified_type
  (arg, quals, complain | tf_ignore_bad_quals);
  }
else if (code == BOUND_TEMPLATE_TEMPLATE_PARM)
@@ -15874,7 +15874,7 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
   DECL_CONTEXT (arg),
/*en

Re: [PATCH] c++: non-dep call with empty TYPE_BINFO [PR105758]

2022-05-31 Thread Jason Merrill via Gcc-patches


On 5/31/22 12:28, Patrick Palka wrote:

Here the out-of-line definition of Z::z causes duplicate_decls to
change z's type to use the implicit instantiation Z rather than the
corresponding primary template type (which is also the type of the
injected class name), and the former, being a dependent specialization,
lacks a TYPE_BINFO (although its TYPE_CANONICAL was set by a special
case in lookup_template_class_1 to point to the latter).

Later, when processing the non-dependent call z->foo(0), build_over_call
relies on the object argument's TYPE_BINFO to build the templated form
for this call, which fails because the object argument type has empty
TYPE_BINFO due to the above.

It seems weird that the implicit instantiation Z doesn't have the
same TYPE_BINFO as the primary template type Z, despite them being
proclaimed equivalent via TYPE_CANONICAL.  So I tried also setting
TYPE_BINFO in the special case in lookup_template_class_1, but that led
to some problems with constrained partial specializations of the form
Z.  I'm not sure what, if anything, we ought to do about the subtle
differences between these two kinds of the same type.

Fortunately it seems we don't need to rely on TYPE_BINFO at all in
build_over_call here -- the z_candidate struct already contains the
exact binfos we need to rebuild the BASELINK for the templated form.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk/12?


OK.


PR c++/105758

gcc/cp/ChangeLog:

* call.cc (build_over_call): Use z_candidate::conversion_path
and ::access_path instead of TYPE_BINFO to build the BASELINK
for the templated form.

gcc/testsuite/ChangeLog:

* g++.dg/template/non-dependent24.C: New test.
---
  gcc/cp/call.cc|  4 ++--
  .../g++.dg/template/non-dependent24.C | 19 +++
  2 files changed, 21 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/template/non-dependent24.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 14c6037729f..85fe9b5ab85 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -9244,8 +9244,8 @@ build_over_call (struct z_candidate *cand, int flags, 
tsubst_flags_t complain)
}
else
{
- tree binfo = TYPE_BINFO (TREE_TYPE (first_arg));
- callee = build_baselink (binfo, binfo, fn, NULL_TREE);
+ callee = build_baselink (cand->conversion_path, cand->access_path,
+  fn, NULL_TREE);
  callee = build_min (COMPONENT_REF, TREE_TYPE (fn),
  first_arg, callee, NULL_TREE);
}
diff --git a/gcc/testsuite/g++.dg/template/non-dependent24.C 
b/gcc/testsuite/g++.dg/template/non-dependent24.C
new file mode 100644
index 000..0ddc75c78ee
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/non-dependent24.C
@@ -0,0 +1,19 @@
+// PR c++/105758
+
+struct A {
+  void foo(int);
+};
+
+template
+struct Z : A {
+  static Z *z;
+  void bar();
+};
+
+template
+Z *Z::z;
+
+template
+void Z::bar() {
+  z->foo(0);
+}

Re: [PATCH] c++: don't substitute TEMPLATE_PARM_CONSTRAINT [PR100374]

2022-05-31 Thread Jason Merrill via Gcc-patches


On 5/31/22 08:56, Patrick Palka wrote:

On Sun, 29 May 2022, Jason Merrill wrote:


On 5/29/22 22:10, Jason Merrill wrote:

On 5/27/22 14:05, Patrick Palka wrote:

This makes us avoid substituting into the TEMPLATE_PARM_CONSTRAINT of
each template parameter except as necessary for (friend) declaration
matching, like we already do for the overall TEMPLATE_PARMS_CONSTRAINTS
of a template parameter list.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps 12.2?  Also tested on range-v3 and cmcstl2.


Are there already tests that cover the friend cases?


Yes, by cpp2a/concepts-friend{2,3,7}.C I think.



Also, don't you also need to handle specialization of partial instantiations?


Hmm, do you have an example?  IIUC we call tsubst_friend_function and
tsubst_friend_class only from instantiate_class_template_1, which always
uses the most general template and full template argument set to
instantiate any friend declarations.  So friend declarations are never
partially instantiated I think.  (And IIUC non-friends are irrelevant
here since we don't ever want to substitute their constraints outside of
satisfaction.)


From C++20 CA104:

  template  struct A {
template  U f(U) requires C;
template  U f(U) requires C;
  };

  // Substitute int for T in above requirements to find match.
  template <> template  U A::f(U) requires C  { }


 PR c++/100374

gcc/cp/ChangeLog:

 * pt.cc (tsubst_each_template_parm_constraint): Define.
 (tsubst_friend_function): Use it.
 (tsubst_friend_class): Use it.
 (tsubst_template_parm): Don't substitute TEMPLATE_PARM_CONSTRAINT.

gcc/testsuite/ChangeLog:

 * g++.dg/cpp2a/concepts-template-parm11.C: New test.
---
   gcc/cp/pt.cc  | 35 ---
   .../g++.dg/cpp2a/concepts-template-parm11.C   | 16 +
   2 files changed, 47 insertions(+), 4 deletions(-)
   create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-template-parm11.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 24bbe2f4060..ec168234325 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -184,6 +184,7 @@ static int unify_pack_expansion (tree, tree, tree,
    tree, unification_kind_t, bool, bool);
   static tree copy_template_args (tree);
   static tree tsubst_template_parms (tree, tree, tsubst_flags_t);
+static void tsubst_each_template_parm_constraint (tree, tree,
tsubst_flags_t);
   tree most_specialized_partial_spec (tree, tsubst_flags_t);
   static tree tsubst_aggr_type (tree, tree, tsubst_flags_t, tree, int);
   static tree tsubst_arg_types (tree, tree, tree, tsubst_flags_t, tree);
@@ -11254,7 +11255,12 @@ tsubst_friend_function (tree decl, tree args)
     tree parms = DECL_TEMPLATE_PARMS (new_friend);
     tree treqs = TEMPLATE_PARMS_CONSTRAINTS (parms);
     treqs = maybe_substitute_reqs_for (treqs, new_friend);
-  TEMPLATE_PARMS_CONSTRAINTS (parms) = treqs;
+  if (treqs != TEMPLATE_PARMS_CONSTRAINTS (parms))
+    {
+  TEMPLATE_PARMS_CONSTRAINTS (parms) = treqs;
+  /* As well as each TEMPLATE_PARM_CONSTRAINT.  */
+  tsubst_each_template_parm_constraint (parms, args,
tf_warning_or_error);
+    }
   }
     /* The mangled name for the NEW_FRIEND is incorrect.  The function
@@ -11500,6 +11506,8 @@ tsubst_friend_class (tree friend_tmpl, tree args)
   {
     tree parms = tsubst_template_parms (DECL_TEMPLATE_PARMS
(friend_tmpl),
     args, tf_warning_or_error);
+  tsubst_each_template_parm_constraint (parms, args,
+    tf_warning_or_error);
     location_t saved_input_location = input_location;
     input_location = DECL_SOURCE_LOCATION (friend_tmpl);
     tree cons = get_constraints (tmpl);
@@ -11534,6 +11542,8 @@ tsubst_friend_class (tree friend_tmpl, tree args)
  DECL_FRIEND_CONTEXT (friend_tmpl));
     --processing_template_decl;
     set_constraints (tmpl, ci);
+  tsubst_each_template_parm_constraint (DECL_TEMPLATE_PARMS
(tmpl),
+    args, tf_warning_or_error);
   }
     /* Inject this template into the enclosing namspace scope.  */
@@ -13656,7 +13666,6 @@ tsubst_template_parm (tree t, tree args,
tsubst_flags_t complain)
     default_value = TREE_PURPOSE (t);
     parm_decl = TREE_VALUE (t);
-  tree constraint = TEMPLATE_PARM_CONSTRAINTS (t);
     parm_decl = tsubst (parm_decl, args, complain, NULL_TREE);
     if (TREE_CODE (parm_decl) == PARM_DECL
@@ -13664,13 +13673,31 @@ tsubst_template_parm (tree t, tree args,
tsubst_flags_t complain)
   parm_decl = error_mark_node;
     default_value = tsubst_template_arg (default_value, args,
  complain, NULL_TREE);
-  constraint = tsubst_constraint (constraint, args, complain, NULL_TREE);
     tree r = build_tree_list (default_value, parm_decl);
-  TEMPLATE_PARM_CONSTRAINTS (r) = constraint;
+  TEMPLATE_PARM_CONSTRAINTS (r)

Re: [PATCH] c++: use auto_timevar instead of timevar_push/pop

2022-05-31 Thread Patrick Palka via Gcc-patches

On Tue, 31 May 2022, Patrick Palka wrote:

> r12-5487-g9bf69a8558638c replaced uses of timevar_cond_push/pop with
> auto_cond_timevar and removed now unnecessary wrapper functions.  This
> patch does the same for timevar_push/pop and auto_timevar.
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
> trunk?
> 
> gcc/cp/ChangeLog:
> 
>   * parser.cc:
>   * pt.cc:
>   Use auto_timevar instead of timevar_push/pop.
>   Remove wrapper functions.
> ---
>  gcc/cp/parser.cc | 31 
>  gcc/cp/pt.cc | 61 +++-
>  2 files changed, 24 insertions(+), 68 deletions(-)
> 
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index 21066421a02..3acfbd43c5b 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -19106,7 +19106,7 @@ cp_parser_explicit_instantiation (cp_parser* parser)
>cp_decl_specifier_seq decl_specifiers;
>tree extension_specifier = NULL_TREE;
>  
> -  timevar_push (TV_TEMPLATE_INST);
> +  auto_timevar time (TV_TEMPLATE_INST);

Er, I named the new variables 'time' to be consistent with the existing
uses of auto_timevar in constexpr/constraint/logic.cc, but on second
thought 'tv', as used by r12-5487-g9bf69a8558638c, seems better.
So consider the names changed to 'tv':

-- >8 --

Subject: [PATCH] c++: use auto_timevar instead of timevar_push/pop

r12-5487-g9bf69a8558638c replaced uses of timevar_cond_push/pop with
auto_cond_timevar and removed now unnecessary wrapper functions.  This
patch does the same with timevar_push/pop and auto_timevar.

gcc/cp/ChangeLog:

* parser.cc:
* pt.cc:
Use auto_timevar instead of timevar_push/pop.
Remove wrapper functions.
---
 gcc/cp/parser.cc | 37 +++--
 gcc/cp/pt.cc | 61 +++-
 2 files changed, 27 insertions(+), 71 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 21066421a02..5a52c32f38b 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -19106,7 +19106,7 @@ cp_parser_explicit_instantiation (cp_parser* parser)
   cp_decl_specifier_seq decl_specifiers;
   tree extension_specifier = NULL_TREE;
 
-  timevar_push (TV_TEMPLATE_INST);
+  auto_timevar tv (TV_TEMPLATE_INST);
 
   /* Look for an (optional) storage-class-specifier or
  function-specifier.  */
@@ -19207,8 +19207,6 @@ cp_parser_explicit_instantiation (cp_parser* parser)
 
   cp_parser_consume_semicolon_at_end_of_statement (parser);
 
-  timevar_pop (TV_TEMPLATE_INST);
-
   cp_finalize_omp_declare_simd (parser, &odsd);
 }
 
@@ -20966,7 +20964,8 @@ cp_parser_enum_specifier (cp_parser* parser)
  elaborated-type-specifier.  */
   if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
 {
-  timevar_push (TV_PARSE_ENUM);
+  auto_timevar tv (TV_PARSE_ENUM);
+
   if (nested_name_specifier
  && nested_name_specifier != error_mark_node)
{
@@ -21072,7 +21071,6 @@ cp_parser_enum_specifier (cp_parser* parser)
 
   if (scoped_enum_p)
finish_scope ();
-  timevar_pop (TV_PARSE_ENUM);
 }
   else
 {
@@ -25927,9 +25925,11 @@ pop_injected_parms (void)
 
Returns the TREE_TYPE representing the class.  */
 
-static tree
-cp_parser_class_specifier_1 (cp_parser* parser)
+tree
+cp_parser_class_specifier (cp_parser* parser)
 {
+  auto_timevar tv (TV_PARSE_STRUCT);
+
   tree type;
   tree attributes = NULL_TREE;
   bool nested_name_specifier_p;
@@ -26321,16 +26321,6 @@ cp_parser_class_specifier_1 (cp_parser* parser)
   return type;
 }
 
-static tree
-cp_parser_class_specifier (cp_parser* parser)
-{
-  tree ret;
-  timevar_push (TV_PARSE_STRUCT);
-  ret = cp_parser_class_specifier_1 (parser);
-  timevar_pop (TV_PARSE_STRUCT);
-  return ret;
-}
-
 /* Parse a class-head.
 
class-head:
@@ -31276,15 +31266,14 @@ 
cp_parser_function_definition_from_specifiers_and_declarator
 }
   else
 {
-  timevar_id_t tv;
+  timevar_id_t tv_id;
   if (DECL_DECLARED_INLINE_P (current_function_decl))
-tv = TV_PARSE_INLINE;
+   tv_id = TV_PARSE_INLINE;
   else
-tv = TV_PARSE_FUNC;
-  timevar_push (tv);
+   tv_id = TV_PARSE_FUNC;
+  auto_timevar tv (tv_id);
   fn = cp_parser_function_definition_after_declarator (parser,
 /*inline_p=*/false);
-  timevar_pop (tv);
 }
 
   return fn;
@@ -32276,7 +32265,8 @@ cp_parser_enclosed_template_argument_list (cp_parser* 
parser)
 static void
 cp_parser_late_parsing_for_member (cp_parser* parser, tree member_function)
 {
-  timevar_push (TV_PARSE_INMETH);
+  auto_timevar tv (TV_PARSE_INMETH);
+
   /* If this member is a template, get the underlying
  FUNCTION_DECL.  */
   if (DECL_FUNCTION_TEMPLATE_P (member_function))
@@ -32346,7 +32336,6 @@ cp_parser_late_parsing_for_member (cp_parser* parser, 
tree member_function)
 
   /* Restore the queue.  */
   pop_unparsed_function_queues (par

[PATCH] c++: use auto_timevar instead of timevar_push/pop

2022-05-31 Thread Patrick Palka via Gcc-patches

r12-5487-g9bf69a8558638c replaced uses of timevar_cond_push/pop with
auto_cond_timevar and removed now unnecessary wrapper functions.  This
patch does the same for timevar_push/pop and auto_timevar.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

gcc/cp/ChangeLog:

* parser.cc:
* pt.cc:
Use auto_timevar instead of timevar_push/pop.
Remove wrapper functions.
---
 gcc/cp/parser.cc | 31 
 gcc/cp/pt.cc | 61 +++-
 2 files changed, 24 insertions(+), 68 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 21066421a02..3acfbd43c5b 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -19106,7 +19106,7 @@ cp_parser_explicit_instantiation (cp_parser* parser)
   cp_decl_specifier_seq decl_specifiers;
   tree extension_specifier = NULL_TREE;
 
-  timevar_push (TV_TEMPLATE_INST);
+  auto_timevar time (TV_TEMPLATE_INST);
 
   /* Look for an (optional) storage-class-specifier or
  function-specifier.  */
@@ -19207,8 +19207,6 @@ cp_parser_explicit_instantiation (cp_parser* parser)
 
   cp_parser_consume_semicolon_at_end_of_statement (parser);
 
-  timevar_pop (TV_TEMPLATE_INST);
-
   cp_finalize_omp_declare_simd (parser, &odsd);
 }
 
@@ -20966,7 +20964,8 @@ cp_parser_enum_specifier (cp_parser* parser)
  elaborated-type-specifier.  */
   if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
 {
-  timevar_push (TV_PARSE_ENUM);
+  auto_timevar time (TV_PARSE_ENUM);
+
   if (nested_name_specifier
  && nested_name_specifier != error_mark_node)
{
@@ -21072,7 +21071,6 @@ cp_parser_enum_specifier (cp_parser* parser)
 
   if (scoped_enum_p)
finish_scope ();
-  timevar_pop (TV_PARSE_ENUM);
 }
   else
 {
@@ -25927,9 +25925,11 @@ pop_injected_parms (void)
 
Returns the TREE_TYPE representing the class.  */
 
-static tree
-cp_parser_class_specifier_1 (cp_parser* parser)
+tree
+cp_parser_class_specifier (cp_parser* parser)
 {
+  auto_timevar time (TV_PARSE_STRUCT);
+
   tree type;
   tree attributes = NULL_TREE;
   bool nested_name_specifier_p;
@@ -26321,16 +26321,6 @@ cp_parser_class_specifier_1 (cp_parser* parser)
   return type;
 }
 
-static tree
-cp_parser_class_specifier (cp_parser* parser)
-{
-  tree ret;
-  timevar_push (TV_PARSE_STRUCT);
-  ret = cp_parser_class_specifier_1 (parser);
-  timevar_pop (TV_PARSE_STRUCT);
-  return ret;
-}
-
 /* Parse a class-head.
 
class-head:
@@ -31281,10 +31271,9 @@ 
cp_parser_function_definition_from_specifiers_and_declarator
 tv = TV_PARSE_INLINE;
   else
 tv = TV_PARSE_FUNC;
-  timevar_push (tv);
+  auto_timevar time (tv);
   fn = cp_parser_function_definition_after_declarator (parser,
 /*inline_p=*/false);
-  timevar_pop (tv);
 }
 
   return fn;
@@ -32276,7 +32265,8 @@ cp_parser_enclosed_template_argument_list (cp_parser* 
parser)
 static void
 cp_parser_late_parsing_for_member (cp_parser* parser, tree member_function)
 {
-  timevar_push (TV_PARSE_INMETH);
+  auto_timevar time (TV_PARSE_INMETH);
+
   /* If this member is a template, get the underlying
  FUNCTION_DECL.  */
   if (DECL_FUNCTION_TEMPLATE_P (member_function))
@@ -32346,7 +32336,6 @@ cp_parser_late_parsing_for_member (cp_parser* parser, 
tree member_function)
 
   /* Restore the queue.  */
   pop_unparsed_function_queues (parser);
-  timevar_pop (TV_PARSE_INMETH);
 }
 
 /* If DECL contains any default args, remember it on the unparsed
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 759f119abc2..ffa3a05d8c8 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -9813,10 +9813,12 @@ maybe_get_template_decl_from_type_decl (tree decl)
that we want to avoid. It also causes some problems with argument
coercion (see convert_nontype_argument for more information on this).  */
 
-static tree
-lookup_template_class_1 (tree d1, tree arglist, tree in_decl, tree context,
-int entering_scope, tsubst_flags_t complain)
+tree
+lookup_template_class (tree d1, tree arglist, tree in_decl, tree context,
+  int entering_scope, tsubst_flags_t complain)
 {
+  auto_timevar time (TV_TEMPLATE_INST);
+
   tree templ = NULL_TREE, parmlist;
   tree t;
   spec_entry **slot;
@@ -10354,20 +10356,6 @@ lookup_template_class_1 (tree d1, tree arglist, tree 
in_decl, tree context,
 }
 }
 
-/* Wrapper for lookup_template_class_1.  */
-
-tree
-lookup_template_class (tree d1, tree arglist, tree in_decl, tree context,
-   int entering_scope, tsubst_flags_t complain)
-{
-  tree ret;
-  timevar_push (TV_TEMPLATE_INST);
-  ret = lookup_template_class_1 (d1, arglist, in_decl, context,
- entering_scope, complain);
-  timevar_pop (TV_TEMPLATE_INST);
-  return ret;
-}
-
 /* Return a TEMPLATE_ID_EXPR for the given variable template and ARGLIST.  */
 
 t

[PATCH] c++: squash cp_build_qualified_type/_real

2022-05-31 Thread Patrick Palka via Gcc-patches

This replaces the two differently named versions of the same function
with a single function using a default function argument.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
for trunk?

gcc/cp/ChangeLog:

* cp-tree.h (cp_build_qualified_type_real): Rename to ...
(cp_build_qualified_type): ... this.  Give its last parameter
a default argument.  Remove macro of the same name.
* decl.cc (grokdeclarator): Adjust accordingly.
* pt.cc (tsubst_aggr_type): Likewise.
(rebuild_function_or_method_type): Likewise.
(tsubst): Likewise.
(maybe_dependent_member_ref): Likewise.
(unify): Likewise.
* tree.cc (cp_build_qualified_type_real): Rename to ...
(cp_build_qualified_type): ... this.  Adjust accordingly.
---
 gcc/cp/cp-tree.h |  5 ++---
 gcc/cp/decl.cc   |  2 +-
 gcc/cp/pt.cc | 52 
 gcc/cp/tree.cc   | 11 --
 4 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index da8898155e0..cc13809f38a 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7877,9 +7877,8 @@ extern tree make_ptrmem_cst   (tree, 
tree);
 extern tree cp_build_type_attribute_variant (tree, tree);
 extern tree cp_build_reference_type(tree, bool);
 extern tree move   (tree);
-extern tree cp_build_qualified_type_real   (tree, int, tsubst_flags_t);
-#define cp_build_qualified_type(TYPE, QUALS) \
-  cp_build_qualified_type_real ((TYPE), (QUALS), tf_warning_or_error)
+extern tree cp_build_qualified_type(tree, int,
+tsubst_flags_t = 
tf_warning_or_error);
 extern bool cv_qualified_p (const_tree);
 extern tree cv_unqualified (tree);
 extern special_function_kind special_function_p (const_tree);
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 663a72fae15..e0d397d5a07 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -12375,7 +12375,7 @@ grokdeclarator (const cp_declarator *declarator,
 type = DECL_ORIGINAL_TYPE (TYPE_NAME (type));
 
   type_quals |= cp_type_quals (type);
-  type = cp_build_qualified_type_real
+  type = cp_build_qualified_type
 (type, type_quals, typedef_decl && !DECL_ARTIFICIAL (typedef_decl))
  || declspecs->decltype_p)
 ? tf_ignore_bad_quals : 0) | tf_warning_or_error));
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 4a3f38b1f5a..759f119abc2 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -13767,7 +13767,7 @@ tsubst_aggr_type (tree t,
{
  r = lookup_template_class (t, argvec, in_decl, context,
 entering_scope, complain);
- r = cp_build_qualified_type_real (r, cp_type_quals (t), complain);
+ r = cp_build_qualified_type (r, cp_type_quals (t), complain);
}
 
  return r;
@@ -13963,7 +13963,7 @@ rebuild_function_or_method_type (tree t, tree 
return_type, tree arg_types,
 {
   tree r = TREE_TYPE (TREE_VALUE (arg_types));
   /* Don't pick up extra function qualifiers from the basetype.  */
-  r = cp_build_qualified_type_real (r, type_memfn_quals (t), complain);
+  r = cp_build_qualified_type (r, type_memfn_quals (t), complain);
   if (! MAYBE_CLASS_TYPE_P (r))
{
  /* [temp.deduct]
@@ -15651,7 +15651,7 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
   if (r)
{
  r = TREE_TYPE (r);
- r = cp_build_qualified_type_real
+ r = cp_build_qualified_type
(r, cp_type_quals (t) | cp_type_quals (r),
 complain | tf_ignore_bad_quals);
  return r;
@@ -15661,8 +15661,8 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
  /* We don't have an instantiation yet, so drop the typedef.  */
  int quals = cp_type_quals (t);
  t = DECL_ORIGINAL_TYPE (decl);
- t = cp_build_qualified_type_real (t, quals,
-   complain | tf_ignore_bad_quals);
+ t = cp_build_qualified_type (t, quals,
+  complain | tf_ignore_bad_quals);
}
 }
 
@@ -15809,7 +15809,7 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
 
quals = cp_type_quals (arg) | cp_type_quals (t);
 
-   return cp_build_qualified_type_real
+   return cp_build_qualified_type
  (arg, quals, complain | tf_ignore_bad_quals);
  }
else if (code == BOUND_TEMPLATE_TEMPLATE_PARM)
@@ -15874,7 +15874,7 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
   DECL_CONTEXT (arg),
/*entering_scope=*/0,

Re: [PATCH v4] DSE: Use the constant store source if possible

2022-05-31 Thread H.J. Lu via Gcc-patches

On Mon, May 30, 2022 at 09:35:43AM +0100, Richard Sandiford wrote:
> "H.J. Lu"  writes:
> > ---
> > RTL DSE tracks redundant constant stores within a basic block.  When RTL
> > loop invariant motion hoists a constant initialization out of the loop
> > into a separate basic block, the constant store value becomes unknown
> > within the original basic block.  When recording store for RTL DSE, check
> > if the source register is set only once to a constant by a non-partial
> > unconditional load.  If yes, record the constant as the constant store
> > source.  It eliminates unrolled zero stores after memset 0 in a loop
> > where a vector register is used as the zero store source.
> >
> > Extract find_single_def_src from loop-iv.cc and move it to df-core.cc:
> >
> > 1. Rename to df_find_single_def_src.
> > 2. Change the argument to rtx and use rtx_equal_p.
> > 3. Return null for partial or conditional defs.
> >
> > gcc/
> >
> > PR rtl-optimization/105638
> > * df-core.cc (df_find_single_def_sr): Moved and renamed from
> > find_single_def_src in loop-iv.cc.  Change the argument to rtx
> > and use rtx_equal_p.  Return null for partial or conditional
> > defs.
> > * df.h (df_find_single_def_src): New prototype.
> > * dse.cc (record_store): Use the constant source if the source
> > register is set only once.
> > * loop-iv.cc (find_single_def_src): Moved to df-core.cc.
> > (replace_single_def_regs): Replace find_single_def_src with
> > df_find_single_def_src.
> >
> > gcc/testsuite/
> >
> > PR rtl-optimization/105638
> > * g++.target/i386/pr105638.C: New test.
> > ---
> >  gcc/df-core.cc   | 44 +++
> >  gcc/df.h |  1 +
> >  gcc/dse.cc   | 14 
> >  gcc/loop-iv.cc   | 45 +---
> >  gcc/testsuite/g++.target/i386/pr105638.C | 44 +++
> >  5 files changed, 104 insertions(+), 44 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr105638.C
> >
> > diff --git a/gcc/df-core.cc b/gcc/df-core.cc
> > index a901b84878f..f9b4de8eb7a 100644
> > --- a/gcc/df-core.cc
> > +++ b/gcc/df-core.cc
> > @@ -2009,6 +2009,50 @@ df_reg_used (rtx_insn *insn, rtx reg)
> >return df_find_use (insn, reg) != NULL;
> >  }
> >  
> > +/* If REG has a single definition, return its known value, otherwise return
> > +   null.  */
> > +
> > +rtx
> > +df_find_single_def_src (rtx reg)
> > +{
> > +  rtx src = NULL_RTX;
> > +
> > +  /* Don't look through unbounded number of single definition REG copies,
> > + there might be loops for sources with uninitialized variables.  */
> > +  for (int cnt = 0; cnt < 128; cnt++)
> > +{
> > +  df_ref adef = DF_REG_DEF_CHAIN (REGNO (reg));
> > +  if (adef == NULL || DF_REF_NEXT_REG (adef) != NULL
> > + || DF_REF_IS_ARTIFICIAL (adef)
> > + || (DF_REF_FLAGS (adef)
> > + & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
> > +   return NULL_RTX;
> > +
> > +  rtx set = single_set (DF_REF_INSN (adef));
> > +  if (set == NULL || !rtx_equal_p (SET_DEST (set), reg))
> > +   return NULL_RTX;
> > +
> > +  rtx note = find_reg_equal_equiv_note (DF_REF_INSN (adef));
> > +  if (note && function_invariant_p (XEXP (note, 0)))
> > +   {
> > + src = XEXP (note, 0);
> > + break;
> > +   }
> 
> Seems simpler to return this directly, rather than break and then
> check function_invariant_p again.

Fixed.

> 
> > +  src = SET_SRC (set);
> > +
> > +  if (REG_P (src))
> > +   {
> > + reg = src;
> > + continue;
> > +   }
> > +  break;
> > +}
> > +  if (!function_invariant_p (src))
> > +return NULL_RTX;
> > +
> > +  return src;
> > +}
> > +
> >
> >  
> > /*
> > Debugging and printing functions.
> > diff --git a/gcc/df.h b/gcc/df.h
> > index bd329205d08..71e249ad20a 100644
> > --- a/gcc/df.h
> > +++ b/gcc/df.h
> > @@ -991,6 +991,7 @@ extern df_ref df_find_def (rtx_insn *, rtx);
> >  extern bool df_reg_defined (rtx_insn *, rtx);
> >  extern df_ref df_find_use (rtx_insn *, rtx);
> >  extern bool df_reg_used (rtx_insn *, rtx);
> > +extern rtx df_find_single_def_src (rtx);
> >  extern void df_worklist_dataflow (struct dataflow *,bitmap, int *, int);
> >  extern void df_print_regset (FILE *file, const_bitmap r);
> >  extern void df_print_word_regset (FILE *file, const_bitmap r);
> > diff --git a/gcc/dse.cc b/gcc/dse.cc
> > index 30c11cee034..c915266f025 100644
> > --- a/gcc/dse.cc
> > +++ b/gcc/dse.cc
> > @@ -1508,6 +1508,20 @@ record_store (rtx body, bb_info_t bb_info)
> >  
> >   if (tem && CONSTANT_P (tem))
> > const_rhs = tem;
> > + else
> > +   {
> > + /* If RHS is set only once to a constant, set CONST_RHS
> > +to the constant.  */
> > + rtx def_src = df_find_single_def_src (rhs);
> > + if (def_src !=

RE: [ping][vect-patterns] Refactor widen_plus/widen_minus as internal_fns

2022-05-31 Thread Tamar Christina via Gcc-patches

> Just checking there is still interest in this

Definitely,  I am waiting for this to be able to send a new patch upstream 😊

Cheers,
Tamar.

> -Original Message-
> From: Gcc-patches  bounces+tamar.christina=arm@gcc.gnu.org> On Behalf Of Joel Hutton
> via Gcc-patches
> Sent: Tuesday, May 31, 2022 11:08 AM
> To: Richard Biener 
> Cc: Richard Sandiford ; gcc-
> patc...@gcc.gnu.org
> Subject: RE: [ping][vect-patterns] Refactor widen_plus/widen_minus as
> internal_fns
> 
> > Can you post an updated patch (after the .cc renaming, and code_helper
> > now already moved to tree.h).
> >
> > Thanks,
> > Richard.
> 
> Patches attached. They already incorporated the .cc rename, now rebased to
> be after the change to tree.h
> 
> Joel

Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-31 Thread Patrick Palka via Gcc-patches

On Wed, 18 May 2022, Jason Merrill wrote:

> On 5/17/22 12:34, Patrick Palka wrote:
> > On Sat, May 7, 2022 at 5:18 PM Jason Merrill  wrote:
> > > 
> > > On 5/6/22 16:46, Patrick Palka wrote:
> > > > On Fri, 6 May 2022, Jason Merrill wrote:
> > > > 
> > > > > On 5/6/22 16:10, Patrick Palka wrote:
> > > > > > On Fri, 6 May 2022, Patrick Palka wrote:
> > > > > > 
> > > > > > > On Fri, 6 May 2022, Jason Merrill wrote:
> > > > > > > 
> > > > > > > > On 5/6/22 14:00, Patrick Palka wrote:
> > > > > > > > > On Fri, 6 May 2022, Patrick Palka wrote:
> > > > > > > > > 
> > > > > > > > > > On Fri, 6 May 2022, Jason Merrill wrote:
> > > > > > > > > > 
> > > > > > > > > > > On 5/6/22 11:22, Patrick Palka wrote:
> > > > > > > > > > > > Here ever since r10-7313-gb599bf9d6d1e18,
> > > > > > > > > > > > reduced_constant_expression_p
> > > > > > > > > > > > in C++11/14 is rejecting the marked sub-aggregate
> > > > > > > > > > > > initializer
> > > > > > > > > > > > (of type
> > > > > > > > > > > > S)
> > > > > > > > > > > > 
> > > > > > > > > > > >W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
> > > > > > > > > > > >   ^
> > > > > > > > > > > > 
> > > > > > > > > > > > ultimately because said initializer has
> > > > > > > > > > > > CONSTRUCTOR_NO_CLEARING
> > > > > > > > > > > > set,
> > > > > > > > > > > > and
> > > > > > > > > > > > so the function proceeds to verify that all fields of S
> > > > > > > > > > > > are
> > > > > > > > > > > > initialized.
> > > > > > > > > > > > And before C++17 we don't expect to see base class
> > > > > > > > > > > > fields (since
> > > > > > > > > > > > next_initializable_field skips over the), so the base
> > > > > > > > > > > > class
> > > > > > > > > > > > initializer
> > > > > > > > > > > > causes r_c_e_p to return false.
> > > > > > > > > > > 
> > > > > > > > > > > That seems like the primary bug.  I guess r_c_e_p
> > > > > > > > > > > shouldn't be
> > > > > > > > > > > using
> > > > > > > > > > > next_initializable_field.  Really that function should
> > > > > > > > > > > only be
> > > > > > > > > > > used for
> > > > > > > > > > > aggregates.
> > > > > > > > > > 
> > > > > > > > > > I see, I'll try replacing it in r_c_e_p.  Would that be in
> > > > > > > > > > addition
> > > > > > > > > > to
> > > > > > > > > > or instead of the clear_no_implicit_zero approach?
> > > > > > > > > 
> > > > > > > > > I'm testing the following, which uses a custom predicate
> > > > > > > > > instead of
> > > > > > > > > next_initializable_field in r_c_e_p.
> > > > > > > > 
> > > > > > > > Let's make it a public predicate, not internal to r_c_e_p.
> > > > > > > > Maybe it
> > > > > > > > could be
> > > > > > > > next_subobject_field, and the current next_initializable_field
> > > > > > > > change to
> > > > > > > > next_aggregate_field?
> > > > > > > 
> > > > > > > Will do.
> > > > > > > 
> > > > > > > > 
> > > > > > > > > Looks like the inner initializer {.D.2387={.m=0}, .b=0} is
> > > > > > > > > formed
> > > > > > > > > during
> > > > > > > > > the subobject constructor call:
> > > > > > > > > 
> > > > > > > > >  V::V (&((struct S *) this)->D.2120);
> > > > > > > > > 
> > > > > > > > > after the evaluation of which, 'result' in
> > > > > > > > > cxx_eval_call_expression is
> > > > > > > > > NULL
> > > > > > > > > (presumably because it's a CALL_EXPR, not AGGR_INIT_EXPR?):
> > > > > > > > > 
> > > > > > > > >  /* This can be null for a subobject constructor call, in
> > > > > > > > > which case what we care about is the initialization
> > > > > > > > > side-effects rather than the value.  We could get at
> > > > > > > > > the
> > > > > > > > > value by evaluating *this, but we don't bother;
> > > > > > > > > there's
> > > > > > > > > no need to put such a call in the hash table.  */
> > > > > > > > >  result = lval ? ctx->object : ctx->ctor;
> > > > > > > > > 
> > > > > > > > > so we end up not calling clear_no_implicit_zero for the inner
> > > > > > > > > initializer
> > > > > > > > > directly.  We only call clear_no_implicit_zero after
> > > > > > > > > evaluating the
> > > > > > > > > AGGR_INIT_EXPR for outermost initializer (of type W).
> > > > > > > > 
> > > > > > > > Maybe for constructors we could call it on ctx->ctor instead of
> > > > > > > > result,
> > > > > > > > or
> > > > > > > > call r_c_e_p in C++20+?
> > > > > > > 
> > > > > > > But both ctx->ctor and ->object are NULL during a subobject
> > > > > > > constructor
> > > > > > > call (since we apparently clear these fields when entering a
> > > > > > > STATEMENT_LIST):
> > > > > > > 
> > > > > > > So I tried instead obtaining the constructor by evaluating new_obj
> > > > > > > via
> > > > > > > 
> > > > > > > --- a/gcc/cp/constexpr.cc
> > > > > > > +++ b/gcc/cp/constexpr.cc
> > > > > > > @@ -2993,6 +2988,9 @@ cxx_eval_call_expression (const
> > > > > > > constexpr_ctx *ctx,
> > > > > > > tree t,
> > > > > > >  in order to detect reading an unitialized object

Re: [PATCH 2/5] Implement generic range temporaries.

2022-05-31 Thread Andrew MacLeod via Gcc-patches


On 5/31/22 02:21, Aldy Hernandez wrote:

On Mon, May 30, 2022 at 4:56 PM Andrew MacLeod  wrote:

On 5/30/22 09:27, Aldy Hernandez wrote:

Now that we have generic ranges, we need a way to define generic local
temporaries on the stack for intermediate calculations in the ranger
and elsewhere.  We need temporaries analogous to int_range_max, but
for any of the supported types (currently just integers, but soon
integers, pointers, and floats).

The tmp_range object is such a temporary.  It is designed to be
transparently used as a vrange.  It shares vrange's abstract API, and
implicitly casts itself to a vrange when passed around.

The ultimate name will be value_range, but we need to remove legacy
first for that to happen.  Until then, tmp_range will do.


I was going to suggest maybe renaming value_range to legacy_range or
something, and then start using value_range for ranges of any time.
Then it occurred to me that numerous places which use value_range
will/can continue to use value_range going forward.. ie

value_range vr;
   if (!rvals->range_of_expr (vr, name, stmt))
 return -1;

would be unaffected, to it would be pointless turmoil to rename that and
then rename it back to value_range.

I also notice there are already a few instance of local variable named
tmp_range, which make name renames annoying.   Perhaps we should use
Value_Range or something like that in the interim for the multi-type
ranges?   Then the rename is trivial down the road, formatting will be
unaffected, and then we're kinda sorta using the end_goal name?

OMG that is so ugly!  Although I guess it would be temporary.

Speaking of which, how far away are we from enabling ranger in VRP1?
Because once we do that, we can start nuking legacy and cleaning all
this up.

Aldy

Im thinking about making the switch mid juneish...   i still have a few 
verifications to make.  We want to leave legacy for at least a while so 
we can manually switch back to it for investigation of any issues that 
come up during the transition.   so id expect early august time frame 
before trying to remove legacy    at least legacy VRP.


thats my thoughts anyway.

Andrew

Andrew

Re: [PATCH] Add a bit dislike for separate mem alternative when op is REG_P.

2022-05-31 Thread Richard Sandiford via Gcc-patches

Vladimir Makarov via Gcc-patches  writes:
> On 2022-05-29 23:05, Hongtao Liu wrote:
>> On Fri, May 27, 2022 at 5:12 AM Vladimir Makarov via Gcc-patches
>>  wrote:
>>>
>>> On 2022-05-24 23:39, liuhongt wrote:
 Rigt now, mem_cost for separate mem alternative is 1 * frequency which
 is pretty small and caused the unnecessary SSE spill in the PR, I've tried
 to rework backend cost model, but RA still not happy with that(regress
 somewhere else). I think the root cause of this is cost for separate 'm'
 alternative cost is too small, especially considering that the mov cost
 of gpr are 2(default for REGISTER_MOVE_COST). So this patch increase 
 mem_cost
 to 2*frequency, also increase 1 for reg_class cost when m alternative.


 Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
 Ok for trunk?
>>> Thank you for addressing this problem. And sorry I can not approve this
>>> patch at least w/o your additional work on benchmarking this change.
>>>
>>> This code is very old.  It is coming from older RA (former file
>>> regclass.c) and existed practically since GCC day 1.  People tried many
>>> times to improve this code.  The code also affects many targets.
>> Yes, that's why I increased it as low as possible, so it won't regress
>> #c6 in the PR.
>>> I can approve this patch if you show that there is no regression at
>>> least on x86-64 on some credible benchmark, e.g. SPEC2006 or SPEC2017.
>>>
>> I've tested the patch for SPEC2017 with both  -march=cascadelake
>> -Ofast -flto and -O2 -mtune=generic.
>> No obvious regression is observed, the binaries are all different from
>> before, so I looked at 2 of them, the difference mainly comes from
>> different choices of registers(xmm13 -> xmm12).
>> Ok for trunk then?
>
> OK.
>
> Thank you for checking SPEC2017.
>
> I hope it will not create troubles for other targets.

Can we hold off for a bit?  Like Alexander says, there seem to be
some inconsistencies in the target patterns, so I think we should
first rule out any changes being needed there.

Thanks,
Richard

[GCC-12][committed] d: Fix D lexer sometimes fails to compile code read from stdin

2022-05-31 Thread Iain Buclaw via Gcc-patches

Hi,

As of gdc-12, the lexer expects there 4 bytes of zero padding at the end
of the source buffer to mark the end of input.  Sometimes when reading
from stdin, the data at the end of input is garbage rather than zeroes.
Fix that by explicitly calling memset past the end of the buffer.

Bootstrapped and regression tested on x86_64-linux-gnu, committed to
mainline and backported to the releases/gcc-12 branch.

Regards,
Iain.
---
PR d/105544

gcc/d/ChangeLog:

* d-lang.cc (d_parse_file): Zero padding past the end of the stdin
buffer so the D lexer has a sentinel to stop parsing at.
---
 gcc/d/d-lang.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/d/d-lang.cc b/gcc/d/d-lang.cc
index ef0fe0b8adb..b7c8685f779 100644
--- a/gcc/d/d-lang.cc
+++ b/gcc/d/d-lang.cc
@@ -1077,6 +1077,10 @@ d_parse_file (void)
  global.params.dihdr.doOutput);
  modules.push (m);
 
+ /* Zero the padding past the end of the buffer so the D lexer has a
+sentinel.  The lexer only reads up to 4 bytes at a time.  */
+ memset (buffer + len, '\0', 16);
+
  /* Overwrite the source file for the module, the one created by
 Module::create would have a forced a `.d' suffix.  */
  m->src.length = len;
-- 
2.34.1

Re: [PATCH] Add a bit dislike for separate mem alternative when op is REG_P.

2022-05-31 Thread Vladimir Makarov via Gcc-patches




On 2022-05-29 23:05, Hongtao Liu wrote:

On Fri, May 27, 2022 at 5:12 AM Vladimir Makarov via Gcc-patches
 wrote:


On 2022-05-24 23:39, liuhongt wrote:

Rigt now, mem_cost for separate mem alternative is 1 * frequency which
is pretty small and caused the unnecessary SSE spill in the PR, I've tried
to rework backend cost model, but RA still not happy with that(regress
somewhere else). I think the root cause of this is cost for separate 'm'
alternative cost is too small, especially considering that the mov cost
of gpr are 2(default for REGISTER_MOVE_COST). So this patch increase mem_cost
to 2*frequency, also increase 1 for reg_class cost when m alternative.


Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

Thank you for addressing this problem. And sorry I can not approve this
patch at least w/o your additional work on benchmarking this change.

This code is very old.  It is coming from older RA (former file
regclass.c) and existed practically since GCC day 1.  People tried many
times to improve this code.  The code also affects many targets.

Yes, that's why I increased it as low as possible, so it won't regress
#c6 in the PR.

I can approve this patch if you show that there is no regression at
least on x86-64 on some credible benchmark, e.g. SPEC2006 or SPEC2017.


I've tested the patch for SPEC2017 with both  -march=cascadelake
-Ofast -flto and -O2 -mtune=generic.
No obvious regression is observed, the binaries are all different from
before, so I looked at 2 of them, the difference mainly comes from
different choices of registers(xmm13 -> xmm12).
Ok for trunk then?


OK.

Thank you for checking SPEC2017.

I hope it will not create troubles for other targets.

[PATCH] c++: non-dep call with empty TYPE_BINFO [PR105758]

2022-05-31 Thread Patrick Palka via Gcc-patches

Here the out-of-line definition of Z::z causes duplicate_decls to
change z's type to use the implicit instantiation Z rather than the
corresponding primary template type (which is also the type of the
injected class name), and the former, being a dependent specialization,
lacks a TYPE_BINFO (although its TYPE_CANONICAL was set by a special
case in lookup_template_class_1 to point to the latter).

Later, when processing the non-dependent call z->foo(0), build_over_call
relies on the object argument's TYPE_BINFO to build the templated form
for this call, which fails because the object argument type has empty
TYPE_BINFO due to the above.

It seems weird that the implicit instantiation Z doesn't have the
same TYPE_BINFO as the primary template type Z, despite them being
proclaimed equivalent via TYPE_CANONICAL.  So I tried also setting
TYPE_BINFO in the special case in lookup_template_class_1, but that led
to some problems with constrained partial specializations of the form
Z.  I'm not sure what, if anything, we ought to do about the subtle
differences between these two kinds of the same type.

Fortunately it seems we don't need to rely on TYPE_BINFO at all in
build_over_call here -- the z_candidate struct already contains the
exact binfos we need to rebuild the BASELINK for the templated form.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk/12?

PR c++/105758

gcc/cp/ChangeLog:

* call.cc (build_over_call): Use z_candidate::conversion_path
and ::access_path instead of TYPE_BINFO to build the BASELINK
for the templated form.

gcc/testsuite/ChangeLog:

* g++.dg/template/non-dependent24.C: New test.
---
 gcc/cp/call.cc|  4 ++--
 .../g++.dg/template/non-dependent24.C | 19 +++
 2 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/non-dependent24.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 14c6037729f..85fe9b5ab85 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -9244,8 +9244,8 @@ build_over_call (struct z_candidate *cand, int flags, 
tsubst_flags_t complain)
}
   else
{
- tree binfo = TYPE_BINFO (TREE_TYPE (first_arg));
- callee = build_baselink (binfo, binfo, fn, NULL_TREE);
+ callee = build_baselink (cand->conversion_path, cand->access_path,
+  fn, NULL_TREE);
  callee = build_min (COMPONENT_REF, TREE_TYPE (fn),
  first_arg, callee, NULL_TREE);
}
diff --git a/gcc/testsuite/g++.dg/template/non-dependent24.C 
b/gcc/testsuite/g++.dg/template/non-dependent24.C
new file mode 100644
index 000..0ddc75c78ee
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/non-dependent24.C
@@ -0,0 +1,19 @@
+// PR c++/105758
+
+struct A {
+  void foo(int);
+};
+
+template
+struct Z : A {
+  static Z *z;
+  void bar();
+};
+
+template
+Z *Z::z;
+
+template
+void Z::bar() {
+  z->foo(0);
+}
-- 
2.36.1.203.g1bcf4f6271

[PATCH] aarch64: Fix build with gcc-4.8

2022-05-31 Thread Christophe Lyon via Gcc-patches

My r13-680-g0dc8e1e7026d9b commit to add support for Decimal Floating
Point introduced:
  case SDmode:
  case DDmode:
  case TDmode:
which are rejected by gcc-4.8 as build compiler.

This patch replaces them with E_SDmode, E_DDmode and E_TD_mode.

Committed as obvious.

* config/aarch64/aarch64.cc (aarch64_gimplify_va_arg_expr):
Prefix mode names with E_.
---
 gcc/config/aarch64/aarch64.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index c1d072fec4c..43bb93db133 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -19805,15 +19805,15 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, 
gimple_seq *pre_p,
  field_t = long_double_type_node;
  field_ptr_t = long_double_ptr_type_node;
  break;
-   case SDmode:
+   case E_SDmode:
  field_t = dfloat32_type_node;
  field_ptr_t = build_pointer_type (dfloat32_type_node);
  break;
-   case DDmode:
+   case E_DDmode:
  field_t = dfloat64_type_node;
  field_ptr_t = build_pointer_type (dfloat64_type_node);
  break;
-   case TDmode:
+   case E_TDmode:
  field_t = dfloat128_type_node;
  field_ptr_t = build_pointer_type (dfloat128_type_node);
  break;
-- 
2.25.1

Correct spelling of DW_AT_namelist_item

2022-05-31 Thread Alan Modra via Gcc-patches

This typo was fixed a little while ago in binutils-gdb with commit
e951225303.  I noticed the difference today when importing libiberty
from gcc.  Committed as obvious.

include/
* dwarf2.def: Correct spelling of DW_AT_namelist_item.
gcc/
* dwarf2out.cc (gen_namelist_decl): Adjust to suit correct
spelling of DW_AT_namelist_item.

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index fccf59e8ec3..29f32ec6939 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -27479,7 +27479,7 @@ gen_namelist_decl (tree name, dw_die_ref scope_die, 
tree item_decls)
nml_item_ref_die = force_decl_die (value);
 
   nml_item_die = new_die (DW_TAG_namelist_item, nml_die, NULL);
-  add_AT_die_ref (nml_item_die, DW_AT_namelist_items, nml_item_ref_die);
+  add_AT_die_ref (nml_item_die, DW_AT_namelist_item, nml_item_ref_die);
 }
   return nml_die;
 }
diff --git a/include/dwarf2.def b/include/dwarf2.def
index 4214c80907a..530c6f849f9 100644
--- a/include/dwarf2.def
+++ b/include/dwarf2.def
@@ -289,7 +289,7 @@ DW_AT (DW_AT_frame_base, 0x40)
 DW_AT (DW_AT_friend, 0x41)
 DW_AT (DW_AT_identifier_case, 0x42)
 DW_AT (DW_AT_macro_info, 0x43)
-DW_AT (DW_AT_namelist_items, 0x44)
+DW_AT (DW_AT_namelist_item, 0x44)
 DW_AT (DW_AT_priority, 0x45)
 DW_AT (DW_AT_segment, 0x46)
 DW_AT (DW_AT_specification, 0x47)

-- 
Alan Modra
Australia Development Lab, IBM

Re: [PATCH] [PR105665] ivopts: check defs of names in base for undefs

2022-05-31 Thread Alexandre Oliva via Gcc-patches

On May 30, 2022, Richard Biener  wrote:

> I don't think you can rely on TREE_VISITED not set at the start of the
> pass (and you don't clear it either).

I don't clear it, but I loop over all SSA names and set TREE_VISITED to
either true or false, so that's covered.

I even had a test patch that checked that TREE_VISITED remains unchanged
and still matched the expected value, with a recursive verification.

I could switch to an sbitmap if that's preferred, though.

> I also wonder how you decide that tracking PHIs with (one) uninit arg
> is "enough"?

It's a conservative assumption, granted.  One could imagine cases in
which an uninit def is never actually used, say because of conditionals
forced by external circumstances the compiler cannot possibly know
about.  But then, just as this sort of bug shows, sometimes even when an
uninit is not actually used, the fact that it is uninit and thus
undefined may end up percolating onto stuff that is actually used, so I
figured we'd be better off leaving alone whatever is potentially derived
from an uninit value.

> Is it important which edge of the PHI the undef appears in?

At some point, I added recursion to find_ssa_undef, at PHI nodes and
assignments, and pondered whether to recurse at PHI nodes only for defs
that were "earlier" ones, rather than coming from back edges.  I ended
up reasoning that back edges would affect step and rule out an IV
candidate even sooner.  But the forward propagation of maybe-undef
obviated that reasoning.  Now, almost tautologically, if circumstances
are such that the compiler could only tell that an ssa name is defined
with external knowledge, then, since such external knowledge is not
available to the compiler, it has to assume that the ssa name may be
undefined.

> I presume the testcase might have it on the loop entry edge?

The original testcase did.  The modified one (the added increment) shows
it can be an earlier edge that has the maybe-undef name. 

> I presume only PHIs in loop headers are to be considered?

As in the modified testcase, earlier PHIs that are entirely outside the
loop can still trigger the bug.  Adding more increments of g guarded by
conditionals involving other global variables pushes the undef ssa name
further and further away from the inner loop, while still rendering g an
unsuitable IV.

>> +int a, b, c[1], d[2], *e = c;
>> +int main() {
>> +  int f = 0;
>> +  for (; b < 2; b++) {
>> +int g;
>> +if (f)
>> +  g++, b = 40;
>> +a = d[b * b];
>> +for (f = 0; f < 3; f++) {
>> +  if (e)
>> +break;
>> +  g--;
>> +  if (a)
>> +a = g;
>> +}
>> +  }
>> +  return 0;
>> +}

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about

Re: [PATCH] c++: don't substitute TEMPLATE_PARM_CONSTRAINT [PR100374]

2022-05-31 Thread Patrick Palka via Gcc-patches

On Sun, 29 May 2022, Jason Merrill wrote:

> On 5/29/22 22:10, Jason Merrill wrote:
> > On 5/27/22 14:05, Patrick Palka wrote:
> > > This makes us avoid substituting into the TEMPLATE_PARM_CONSTRAINT of
> > > each template parameter except as necessary for (friend) declaration
> > > matching, like we already do for the overall TEMPLATE_PARMS_CONSTRAINTS
> > > of a template parameter list.
> > > 
> > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
> > > trunk and perhaps 12.2?  Also tested on range-v3 and cmcstl2.
> > 
> > Are there already tests that cover the friend cases?

Yes, by cpp2a/concepts-friend{2,3,7}.C I think.

> 
> Also, don't you also need to handle specialization of partial instantiations?

Hmm, do you have an example?  IIUC we call tsubst_friend_function and
tsubst_friend_class only from instantiate_class_template_1, which always
uses the most general template and full template argument set to
instantiate any friend declarations.  So friend declarations are never
partially instantiated I think.  (And IIUC non-friends are irrelevant
here since we don't ever want to substitute their constraints outside of
satisfaction.)

> 
> > > PR c++/100374
> > > 
> > > gcc/cp/ChangeLog:
> > > 
> > > * pt.cc (tsubst_each_template_parm_constraint): Define.
> > > (tsubst_friend_function): Use it.
> > > (tsubst_friend_class): Use it.
> > > (tsubst_template_parm): Don't substitute TEMPLATE_PARM_CONSTRAINT.
> > > 
> > > gcc/testsuite/ChangeLog:
> > > 
> > > * g++.dg/cpp2a/concepts-template-parm11.C: New test.
> > > ---
> > >   gcc/cp/pt.cc  | 35 ---
> > >   .../g++.dg/cpp2a/concepts-template-parm11.C   | 16 +
> > >   2 files changed, 47 insertions(+), 4 deletions(-)
> > >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-template-parm11.C
> > > 
> > > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> > > index 24bbe2f4060..ec168234325 100644
> > > --- a/gcc/cp/pt.cc
> > > +++ b/gcc/cp/pt.cc
> > > @@ -184,6 +184,7 @@ static int unify_pack_expansion (tree, tree, tree,
> > >    tree, unification_kind_t, bool, bool);
> > >   static tree copy_template_args (tree);
> > >   static tree tsubst_template_parms (tree, tree, tsubst_flags_t);
> > > +static void tsubst_each_template_parm_constraint (tree, tree,
> > > tsubst_flags_t);
> > >   tree most_specialized_partial_spec (tree, tsubst_flags_t);
> > >   static tree tsubst_aggr_type (tree, tree, tsubst_flags_t, tree, int);
> > >   static tree tsubst_arg_types (tree, tree, tree, tsubst_flags_t, tree);
> > > @@ -11254,7 +11255,12 @@ tsubst_friend_function (tree decl, tree args)
> > >     tree parms = DECL_TEMPLATE_PARMS (new_friend);
> > >     tree treqs = TEMPLATE_PARMS_CONSTRAINTS (parms);
> > >     treqs = maybe_substitute_reqs_for (treqs, new_friend);
> > > -  TEMPLATE_PARMS_CONSTRAINTS (parms) = treqs;
> > > +  if (treqs != TEMPLATE_PARMS_CONSTRAINTS (parms))
> > > +    {
> > > +  TEMPLATE_PARMS_CONSTRAINTS (parms) = treqs;
> > > +  /* As well as each TEMPLATE_PARM_CONSTRAINT.  */
> > > +  tsubst_each_template_parm_constraint (parms, args,
> > > tf_warning_or_error);
> > > +    }
> > >   }
> > >     /* The mangled name for the NEW_FRIEND is incorrect.  The function
> > > @@ -11500,6 +11506,8 @@ tsubst_friend_class (tree friend_tmpl, tree args)
> > >   {
> > >     tree parms = tsubst_template_parms (DECL_TEMPLATE_PARMS
> > > (friend_tmpl),
> > >     args, tf_warning_or_error);
> > > +  tsubst_each_template_parm_constraint (parms, args,
> > > +    tf_warning_or_error);
> > >     location_t saved_input_location = input_location;
> > >     input_location = DECL_SOURCE_LOCATION (friend_tmpl);
> > >     tree cons = get_constraints (tmpl);
> > > @@ -11534,6 +11542,8 @@ tsubst_friend_class (tree friend_tmpl, tree args)
> > >  DECL_FRIEND_CONTEXT (friend_tmpl));
> > >     --processing_template_decl;
> > >     set_constraints (tmpl, ci);
> > > +  tsubst_each_template_parm_constraint (DECL_TEMPLATE_PARMS
> > > (tmpl),
> > > +    args, tf_warning_or_error);
> > >   }
> > >     /* Inject this template into the enclosing namspace scope.  */
> > > @@ -13656,7 +13666,6 @@ tsubst_template_parm (tree t, tree args,
> > > tsubst_flags_t complain)
> > >     default_value = TREE_PURPOSE (t);
> > >     parm_decl = TREE_VALUE (t);
> > > -  tree constraint = TEMPLATE_PARM_CONSTRAINTS (t);
> > >     parm_decl = tsubst (parm_decl, args, complain, NULL_TREE);
> > >     if (TREE_CODE (parm_decl) == PARM_DECL
> > > @@ -13664,13 +13673,31 @@ tsubst_template_parm (tree t, tree args,
> > > tsubst_flags_t complain)
> > >   parm_decl = error_mark_node;
> > >     default_value = tsubst_template_arg (default_value, args,
> > >  complain, NULL_TREE);
> > > -  co

Re: [2/2] PR96463 -- changes to type checking vec_perm_expr in middle end

2022-05-31 Thread Prathamesh Kulkarni via Gcc-patches

On Mon, 23 May 2022 at 22:57, Prathamesh Kulkarni
 wrote:
>
> On Mon, 9 May 2022 at 21:21, Prathamesh Kulkarni
>  wrote:
> >
> > On Mon, 9 May 2022 at 19:22, Richard Sandiford
> >  wrote:
> > >
> > > Prathamesh Kulkarni  writes:
> > > > On Tue, 3 May 2022 at 18:25, Richard Sandiford
> > > >  wrote:
> > > >>
> > > >> Prathamesh Kulkarni  writes:
> > > >> > On Tue, 4 Jan 2022 at 19:12, Richard Sandiford
> > > >> >  wrote:
> > > >> >>
> > > >> >> Richard Biener  writes:
> > > >> >> > On Tue, 4 Jan 2022, Richard Sandiford wrote:
> > > >> >> >
> > > >> >> >> Richard Biener  writes:
> > > >> >> >> > On Fri, 17 Dec 2021, Richard Sandiford wrote:
> > > >> >> >> >
> > > >> >> >> >> Prathamesh Kulkarni  writes:
> > > >> >> >> >> > Hi,
> > > >> >> >> >> > The attached patch rearranges order of type-check for 
> > > >> >> >> >> > vec_perm_expr
> > > >> >> >> >> > and relaxes type checking for
> > > >> >> >> >> > lhs = vec_perm_expr
> > > >> >> >> >> >
> > > >> >> >> >> > when:
> > > >> >> >> >> > rhs1 == rhs2,
> > > >> >> >> >> > lhs is variable length vector,
> > > >> >> >> >> > rhs1 is fixed length vector,
> > > >> >> >> >> > TREE_TYPE (lhs) == TREE_TYPE (rhs1)
> > > >> >> >> >> >
> > > >> >> >> >> > I am not sure tho if this check is correct ? My intent was 
> > > >> >> >> >> > to capture
> > > >> >> >> >> > case when vec_perm_expr is used to "extend" fixed length 
> > > >> >> >> >> > vector to
> > > >> >> >> >> > it's VLA equivalent.
> > > >> >> >> >>
> > > >> >> >> >> VLAness isn't really the issue.  We want the same thing to 
> > > >> >> >> >> work for
> > > >> >> >> >> -msve-vector-bits=256, -msve-vector-bits=512, etc., even 
> > > >> >> >> >> though the
> > > >> >> >> >> vectors are fixed-length in that case.
> > > >> >> >> >>
> > > >> >> >> >> The principle is that for:
> > > >> >> >> >>
> > > >> >> >> >>   A = VEC_PERM_EXPR ;
> > > >> >> >> >>
> > > >> >> >> >> the requirements are:
> > > >> >> >> >>
> > > >> >> >> >> - A, B, C and D must be vectors
> > > >> >> >> >> - A, B and C must have the same element type
> > > >> >> >> >> - D must have an integer element type
> > > >> >> >> >> - A and D must have the same number of elements (NA)
> > > >> >> >> >> - B and C must have the same number of elements (NB)
> > > >> >> >> >>
> > > >> >> >> >> The semantics are that we create a joined vector BC (all 
> > > >> >> >> >> elements of B
> > > >> >> >> >> followed by all element of C) and that:
> > > >> >> >> >>
> > > >> >> >> >>   A[i] = BC[D[i] % (NB+NB)]
> > > >> >> >> >>
> > > >> >> >> >> for 0 ≤ i < NA.
> > > >> >> >> >>
> > > >> >> >> >> This operation makes sense even if NA != NB.
> > > >> >> >> >
> > > >> >> >> > But note that we don't currently expect NA != NB and the optab 
> > > >> >> >> > just
> > > >> >> >> > has a single mode.
> > > >> >> >>
> > > >> >> >> True, but we only need this for constant permutes.  They are 
> > > >> >> >> already
> > > >> >> >> special in that they allow the index elements to be wider than 
> > > >> >> >> the data
> > > >> >> >> elements.
> > > >> >> >
> > > >> >> > OK, then we should reflect this in the stmt verification and only 
> > > >> >> > relax
> > > >> >> > the constant permute vector case and also amend the
> > > >> >> > TARGET_VECTORIZE_VEC_PERM_CONST accordingly.
> > > >> >>
> > > >> >> Sounds good.
> > > >> >>
> > > >> >> > For non-constant permutes the docs say the mode of vec_perm is
> > > >> >> > the common mode of operands 1 and 2 whilst the mode of operand 0
> > > >> >> > is unspecified - even unconstrained by the docs.  I'm not sure
> > > >> >> > if vec_perm expansion is expected to eventually FAIL.  Updating 
> > > >> >> > the
> > > >> >> > docs of vec_perm would be appreciated as well.
> > > >> >>
> > > >> >> Yeah, I guess de facto operand 0 has to be the same mode as operands
> > > >> >> 1 and 2.  Maybe that was just an oversight, or maybe it seemed 
> > > >> >> obvious
> > > >> >> or self-explanatory at the time. :-)
> > > >> >>
> > > >> >> > As said I prefer to not mangle the existing stmt checking too much
> > > >> >> > at this stage so minimal adjustment is prefered there.
> > > >> >>
> > > >> >> The PR is only an enhancement request rather than a bug, so I think 
> > > >> >> the
> > > >> >> patch would need to wait for GCC 13 whatever happens.
> > > >> > Hi,
> > > >> > In attached patch, the type checking is relaxed only if mask is 
> > > >> > constant.
> > > >> > Does this look OK ?
> > > >> >
> > > >> > Thanks,
> > > >> > Prathamesh
> > > >> >>
> > > >> >> Thanks,
> > > >> >> Richard
> > > >> >
> > > >> > diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
> > > >> > index e321d929fd0..02b88f67855 100644
> > > >> > --- a/gcc/tree-cfg.cc
> > > >> > +++ b/gcc/tree-cfg.cc
> > > >> > @@ -4307,6 +4307,24 @@ verify_gimple_assign_ternary (gassign *stmt)
> > > >> >break;
> > > >> >
> > > >> >  case VEC_PERM_EXPR:
> > > >> > +  /* If permute is constant, then we allow for lhs and rhs
> > > >> > +  to have different vector types, provided:
> >

Re: [1/2] PR96463 - aarch64 specific changes

2022-05-31 Thread Prathamesh Kulkarni via Gcc-patches

On Thu, 12 May 2022 at 16:15, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> > On Wed, 11 May 2022 at 12:44, Richard Sandiford
> >  wrote:
> >>
> >> Prathamesh Kulkarni  writes:
> >> > On Fri, 6 May 2022 at 16:00, Richard Sandiford
> >> >  wrote:
> >> >>
> >> >> Prathamesh Kulkarni  writes:
> >> >> > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
> >> >> > b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> >> >> > index c24c0548724..1ef4ea2087b 100644
> >> >> > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> >> >> > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> >> >> > @@ -44,6 +44,14 @@
> >> >> >  #include "aarch64-sve-builtins-shapes.h"
> >> >> >  #include "aarch64-sve-builtins-base.h"
> >> >> >  #include "aarch64-sve-builtins-functions.h"
> >> >> > +#include "aarch64-builtins.h"
> >> >> > +#include "gimple-ssa.h"
> >> >> > +#include "tree-phinodes.h"
> >> >> > +#include "tree-ssa-operands.h"
> >> >> > +#include "ssa-iterators.h"
> >> >> > +#include "stringpool.h"
> >> >> > +#include "value-range.h"
> >> >> > +#include "tree-ssanames.h"
> >> >>
> >> >> Minor, but: I think the preferred approach is to include "ssa.h"
> >> >> rather than include some of these headers directly.
> >> >>
> >> >> >
> >> >> >  using namespace aarch64_sve;
> >> >> >
> >> >> > @@ -1207,6 +1215,56 @@ public:
> >> >> >  insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
> >> >> >  return e.use_contiguous_load_insn (icode);
> >> >> >}
> >> >> > +
> >> >> > +  gimple *
> >> >> > +  fold (gimple_folder &f) const OVERRIDE
> >> >> > +  {
> >> >> > +tree arg0 = gimple_call_arg (f.call, 0);
> >> >> > +tree arg1 = gimple_call_arg (f.call, 1);
> >> >> > +
> >> >> > +/* Transform:
> >> >> > +   lhs = svld1rq ({-1, -1, ... }, arg1)
> >> >> > +   into:
> >> >> > +   tmp = mem_ref [(int * {ref-all}) arg1]
> >> >> > +   lhs = vec_perm_expr.
> >> >> > +   on little endian target.  */
> >> >> > +
> >> >> > +if (!BYTES_BIG_ENDIAN
> >> >> > + && integer_all_onesp (arg0))
> >> >> > +  {
> >> >> > + tree lhs = gimple_call_lhs (f.call);
> >> >> > + auto simd_type = aarch64_get_simd_info_for_type (Int32x4_t);
> >> >>
> >> >> Does this work for other element sizes?  I would have expected it
> >> >> to be the (128-bit) Advanced SIMD vector associated with the same
> >> >> element type as the SVE vector.
> >> >>
> >> >> The testcase should cover more than just int32x4_t -> svint32_t,
> >> >> just to be sure.
> >> > In the attached patch, it obtains corresponding advsimd type with:
> >> >
> >> > tree eltype = TREE_TYPE (lhs_type);
> >> > unsigned nunits = 128 / TREE_INT_CST_LOW (TYPE_SIZE (eltype));
> >> > tree vectype = build_vector_type (eltype, nunits);
> >> >
> >> > While this seems to work with different element sizes, I am not sure if 
> >> > it's
> >> > the correct approach ?
> >>
> >> Yeah, that looks correct.  Other SVE code uses aarch64_vq_mode
> >> to get the vector mode associated with a .Q “element”, so an
> >> alternative would be:
> >>
> >> machine_mode vq_mode = aarch64_vq_mode (TYPE_MODE (eltype)).require ();
> >> tree vectype = build_vector_type_for_mode (eltype, vq_mode);
> >>
> >> which is more explicit about wanting an Advanced SIMD vector.
> >>
> >> >> > +
> >> >> > + tree elt_ptr_type
> >> >> > +   = build_pointer_type_for_mode (simd_type.eltype, VOIDmode, 
> >> >> > true);
> >> >> > + tree zero = build_zero_cst (elt_ptr_type);
> >> >> > +
> >> >> > + /* Use element type alignment.  */
> >> >> > + tree access_type
> >> >> > +   = build_aligned_type (simd_type.itype, TYPE_ALIGN 
> >> >> > (simd_type.eltype));
> >> >> > +
> >> >> > + tree tmp = make_ssa_name_fn (cfun, access_type, 0);
> >> >> > + gimple *mem_ref_stmt
> >> >> > +   = gimple_build_assign (tmp, fold_build2 (MEM_REF, 
> >> >> > access_type, arg1, zero));
> >> >>
> >> >> Long line.  Might be easier to format by assigning the fold_build2 
> >> >> result
> >> >> to a temporary variable.
> >> >>
> >> >> > + gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT);
> >> >> > +
> >> >> > + tree mem_ref_lhs = gimple_get_lhs (mem_ref_stmt);
> >> >> > + tree vectype = TREE_TYPE (mem_ref_lhs);
> >> >> > + tree lhs_type = TREE_TYPE (lhs);
> >> >>
> >> >> Is this necessary?  The code above supplied the types and I wouldn't
> >> >> have expected them to change during the build process.
> >> >>
> >> >> > +
> >> >> > + int source_nelts = TYPE_VECTOR_SUBPARTS (vectype).to_constant 
> >> >> > ();
> >> >> > + vec_perm_builder sel (TYPE_VECTOR_SUBPARTS (lhs_type), 
> >> >> > source_nelts, 1);
> >> >> > + for (int i = 0; i < source_nelts; i++)
> >> >> > +   sel.quick_push (i);
> >> >> > +
> >> >> > + vec_perm_indices indices (sel, 1, source_nelts);
> >> >> > + gcc_checking_assert (can_vec_perm_const_p (TYPE_MODE 
> >> >> > (lhs_type), indices));
> >> >> > + tree mask = vec_perm_indice

[PATCH v2 0/1] RISC-V: Add RVV (RISC-V 'V' Extension) support

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

This patch adds implementation which missed in the V1 patch.

*** BLURB HERE ***

zhongjuzhe (1):
  Add unit-stride load store intrinsics

 .../riscv/riscv-vector-builtins-functions.cc  | 80 +++
 .../riscv/riscv-vector-builtins-functions.def |  7 ++
 .../riscv/riscv-vector-builtins-functions.h   | 42 ++
 gcc/config/riscv/riscv-vector-builtins.cc | 14 
 4 files changed, 143 insertions(+)

-- 
2.36.1

[PATCH v2 1/1] Add unit-stride load store intrinsics

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-functions.cc 
(loadstore::assemble_name): New function.
(loadstore::get_argument_types): New function.
(vle::call_properties): New function.
(vle::get_return_type): New function.
(vle::can_be_overloaded_p): New function.
(vle::expand): New function.
(vse::call_properties): New function.
(vse::can_be_overloaded_p): New function.
(vse::expand): New function.
* config/riscv/riscv-vector-builtins-functions.def (vle): New macro 
define.
(vse): New macro define.
* config/riscv/riscv-vector-builtins-functions.h (class loadstore): New 
class.
(class vle): New class.
(class vse): New class.
* config/riscv/riscv-vector-builtins.cc (init_def_variables): New local 
constant declare.

---
 .../riscv/riscv-vector-builtins-functions.cc  | 80 +++
 .../riscv/riscv-vector-builtins-functions.def |  7 ++
 .../riscv/riscv-vector-builtins-functions.h   | 42 ++
 gcc/config/riscv/riscv-vector-builtins.cc | 14 
 4 files changed, 143 insertions(+)

diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.cc 
b/gcc/config/riscv/riscv-vector-builtins-functions.cc
index a25f167f40e..fa39eedcd86 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.cc
@@ -1510,6 +1510,86 @@ vundefined::expand (const function_instance &, tree, rtx 
target) const
   return target;
 }
 
+/* A function implementation for loadstore functions.  */
+char *
+loadstore::assemble_name (function_instance &instance)
+{
+  machine_mode mode = instance.get_arg_pattern ().arg_list[0];
+  bool unsigned_p = instance.get_data_type_list ()[0] == DT_unsigned;
+  int sew = GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+  char name[8];
+  snprintf (name, 8, "%s%d", instance.get_base_name (), sew);
+  const char *op = get_operation_str (instance.get_operation ());
+  const char *dt = mode2data_type_str (mode, unsigned_p, false);
+  const char *pred = get_pred_str (instance.get_pred ());
+  snprintf (instance.function_name, NAME_MAXLEN, "%s%s%s%s", name, op, dt, 
pred);
+  if (this->can_be_overloaded_p (instance))
+{
+  append_name (name);
+  append_name (get_pred_str (instance.get_pred (), true));
+  return finish_name ();
+}
+  return nullptr;
+}
+
+void
+loadstore::get_argument_types (const function_instance &instance,
+   vec &argument_types) const
+{
+  for (unsigned int i = 1; i < instance.get_arg_pattern ().arg_len; i++)
+argument_types.quick_push (get_dt_t_with_index (instance, i));
+}
+
+/* A function implementation for vle functions.  */
+unsigned int
+vle::call_properties () const
+{
+  return CP_READ_MEMORY;
+}
+
+tree
+vle::get_return_type (const function_instance &instance) const
+{
+  return get_dt_t_with_index (instance, 0);
+}
+
+bool
+vle::can_be_overloaded_p (const function_instance &instance) const
+{
+  return instance.get_pred () == PRED_m || instance.get_pred () == PRED_tu ||
+ instance.get_pred () == PRED_tamu ||
+ instance.get_pred () == PRED_tuma || instance.get_pred () == 
PRED_tumu;
+}
+
+rtx
+vle::expand (const function_instance &instance, tree exp, rtx target) const
+{
+  machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
+  enum insn_code icode = code_for_vle (mode);
+  return expand_builtin_insn (icode, exp, target, instance);
+}
+
+/* A function implementation for vse functions.  */
+unsigned int
+vse::call_properties () const
+{
+  return CP_WRITE_MEMORY;
+}
+
+bool
+vse::can_be_overloaded_p (const function_instance &) const
+{
+  return true;
+}
+
+rtx
+vse::expand (const function_instance &instance, tree exp, rtx target) const
+{
+  machine_mode mode = instance.get_arg_pattern ().arg_list[0];
+  enum insn_code icode = code_for_vse (mode);
+  return expand_builtin_insn (icode, exp, target, instance);
+}
+
 } // end namespace riscv_vector
 
 using namespace riscv_vector;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 86130e02381..deb32ccd031 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -56,6 +56,13 @@ DEF_RVV_FUNCTION(vlmul_trunc, vlmul_trunc, (2, 
VITER(VLMULTRUNC, signed), VITER(
 DEF_RVV_FUNCTION(vundefined, vundefined, (1, VITER(VI, signed)), PAT_none, 
PRED_none, OP_none)
 DEF_RVV_FUNCTION(vundefined, vundefined, (1, VITER(VI, unsigned)), PAT_none, 
PRED_none, OP_none)
 DEF_RVV_FUNCTION(vundefined, vundefined, (1, VITER(VF, signed)), PAT_none, 
PRED_none, OP_none)
+/* 7. Vector Loads and Stores. */
+DEF_RVV_FUNCTION(vle, vle, (2, VITER(VI, signed), VATTR(0, VSUB, c_ptr)), 
pat_mask_tail, pred_all, OP_v)
+DEF_RVV_FUNCTION(vle, vle, (2, VITER(VI, unsigned), VATTR(0, VSUB, c_uptr)), 
pat_mask_tail, pred_all, OP_v)
+DEF_RVV_FUNCTI

RE: [ping][vect-patterns] Refactor widen_plus/widen_minus as internal_fns

2022-05-31 Thread Joel Hutton via Gcc-patches

> Can you post an updated patch (after the .cc renaming, and code_helper
> now already moved to tree.h).
> 
> Thanks,
> Richard.

Patches attached. They already incorporated the .cc rename, now rebased to be 
after the change to tree.h

Joel


0001-Refactor-to-allow-internal_fn-s.patch
Description: 0001-Refactor-to-allow-internal_fn-s.patch


0002-Refactor-widen_plus-as-internal_fn.patch
Description: 0002-Refactor-widen_plus-as-internal_fn.patch


0003-Remove-widen_plus-minus_expr-tree-codes.patch
Description: 0003-Remove-widen_plus-minus_expr-tree-codes.patch

Re: [PATCH, OpenMP, v2] Implement uses_allocators clause for target regions

2022-05-31 Thread Jakub Jelinek via Gcc-patches

On Mon, May 30, 2022 at 07:23:55PM +0200, Jakub Jelinek via Gcc-patches wrote:
> On Mon, May 30, 2022 at 10:43:30PM +0800, Chung-Lin Tang wrote:
> > > This feels like you only accept a single allocator in the new syntax,
> > > but that isn't my reading of the spec, I'd understand it as:
> > > uses_allocators (memspace(omp_high_bw_mem_space), traits(foo_traits) : 
> > > bar, baz, qux)
> > > being valid too.
> > 
> > This patch now allows multiple allocators to be specified in new syntax, 
> > although I have
> > to note that the 5.2 specification of uses_allocators (page 181) 
> > specifically says
> > "allocator: expression of allocator_handle_type" for the "Arguments" 
> > description,
> > not a "list" like the allocate clause.
> 
> I guess this should be raised on omp-lang then what we really want.
> Because the 5.1 syntax definitely allowed multiple allocators.

The response I got on omp-lang is that it is intentional that in the new
syntax only a single allocator is allowed.
So I'd suggest to implement:
1) if has_modifiers (i.e. certainly new syntax), only allow a single
   enumerator / identifier for a variable and no ()s after it
2) if !has_modifiers and there is exactly one allocator without ()s,
   treat it like new syntax
3) otherwise, it is the old (5.1) syntax, which allows a list and that
   list can contain ()s for traits, but in the light of the 5.2 wording,
   I'd even for that case avoid diagnosing missing traits for non-predefined
   allocators
4) omp_null_allocator should be diagnosed as invalid,
   private (omp_null_allocator) is rejected...
5) for C++, we should handle FIELD_DECLs, but it shouldn't be hard, just
   look how it is handled for private too

Jakub

[committed] openmp: Add support for firstprivate and allocate clauses on scope construct

2022-05-31 Thread Jakub Jelinek via Gcc-patches

Hi!

OpenMP 5.2 adds support for firstprivate and allocate clauses on the scope
construct and this patch adds that support to GCC.
5.2 unfortunately (IMNSHO mistakenly) marked scope construct as worksharing,
which implies that it isn't possible to nest inside of it other scope,
worksharing loop, sections, explicit barriers, single etc. which would
make scope far less useful.  I'm not implementing that part, keeping the
5.1 behavior here, and will file an issue to revert that for OpenMP 6.0.
But, for firstprivate it keeps the restriction that is now implied from
worksharing construct that listed var can't be private in outer context,
where for reduction 5.1 had similar restriction explicit even for scope
and 5.2 has it implicitly through worksharing construct.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2022-05-31  Jakub Jelinek  

gcc/
* omp-low.cc (build_outer_var_ref): For code == OMP_CLAUSE_ALLOCATE
allow var to be private in the outer context.
(lower_private_allocate): Pass OMP_CLAUSE_ALLOCATE as last argument
to build_outer_var_ref.
gcc/c/
* c-parser.cc (OMP_SCOPE_CLAUSE_MASK): Add firstprivate and allocate
clauses.
gcc/cp/
* parser.cc (OMP_SCOPE_CLAUSE_MASK): Add firstprivate and allocate
clauses.
gcc/testsuite/
* c-c++-common/gomp/scope-5.c: New test.
* c-c++-common/gomp/scope-6.c: New test.
* g++.dg/gomp/attrs-1.C (bar): Add firstprivate and allocate clauses
to scope construct.
* g++.dg/gomp/attrs-2.C (bar): Likewise.
libgomp/
* testsuite/libgomp.c-c++-common/allocate-1.c (foo): Add testcase for
scope construct with allocate clause.
* testsuite/libgomp.c-c++-common/allocate-3.c (foo): Likewise.
* testsuite/libgomp.c-c++-common/scope-2.c: New test.

--- gcc/omp-low.cc.jj   2022-05-30 14:07:02.126305076 +0200
+++ gcc/omp-low.cc  2022-05-30 15:39:25.378822038 +0200
@@ -683,6 +683,7 @@ build_outer_var_ref (tree var, omp_conte
   else if ((gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_SIMD)
   || ctx->loop_p
+  || code == OMP_CLAUSE_ALLOCATE
   || (code == OMP_CLAUSE_PRIVATE
   && (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
   || gimple_code (ctx->stmt) == GIMPLE_OMP_SECTIONS
@@ -4849,7 +4850,7 @@ lower_private_allocate (tree var, tree n
   allocator = TREE_PURPOSE (allocator);
 }
   if (TREE_CODE (allocator) != INTEGER_CST)
-allocator = build_outer_var_ref (allocator, ctx);
+allocator = build_outer_var_ref (allocator, ctx, OMP_CLAUSE_ALLOCATE);
   allocator = fold_convert (pointer_sized_int_node, allocator);
   if (TREE_CODE (allocator) != INTEGER_CST)
 {
--- gcc/c/c-parser.cc.jj2022-05-30 14:07:11.877200823 +0200
+++ gcc/c/c-parser.cc   2022-05-30 14:15:31.887994854 +0200
@@ -20413,7 +20413,9 @@ c_parser_omp_single (location_t loc, c_p
 
 #define OMP_SCOPE_CLAUSE_MASK  \
( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_PRIVATE)  \
+   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_FIRSTPRIVATE) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_REDUCTION)\
+   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NOWAIT))
 
 static tree
--- gcc/cp/parser.cc.jj 2022-05-30 14:07:12.063198834 +0200
+++ gcc/cp/parser.cc2022-05-30 14:18:13.990334122 +0200
@@ -43747,7 +43747,9 @@ cp_parser_omp_single (cp_parser *parser,
 
 #define OMP_SCOPE_CLAUSE_MASK  \
( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_PRIVATE)  \
+   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_FIRSTPRIVATE) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_REDUCTION)\
+   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NOWAIT))
 
 static tree
--- gcc/testsuite/c-c++-common/gomp/scope-5.c.jj2022-05-30 
14:56:03.467452993 +0200
+++ gcc/testsuite/c-c++-common/gomp/scope-5.c   2022-05-30 14:57:06.921648755 
+0200
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+
+void
+foo ()
+{
+  int f = 0;
+  #pragma omp scope firstprivate(f)/* { dg-error "firstprivate variable 
'f' is private in outer context" } */
+  f++;
+}
--- gcc/testsuite/c-c++-common/gomp/scope-6.c.jj2022-05-30 
15:45:16.286996461 +0200
+++ gcc/testsuite/c-c++-common/gomp/scope-6.c   2022-05-30 15:40:07.250364770 
+0200
@@ -0,0 +1,31 @@
+typedef enum omp_allocator_handle_t
+#if __cplusplus >= 201103L
+: __UINTPTR_TYPE__
+#endif
+{
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  __omp_allocator_handle_t_max__ = __UINTPTR_MAX__
+} omp_allocat

Re: [PATCH] unswitch: Fold case label lo/hi values to index type [PR105770]

2022-05-31 Thread Richard Biener via Gcc-patches




> Am 31.05.2022 um 11:33 schrieb Jakub Jelinek via Gcc-patches 
> :
> 
> Hi!
> 
> The following testcase ICEs because we use different types in comparison,
> idx has int type, while CASE_LOW has char type.
> 
> While I believe all CASE_{LOW,HIGH} in the same switch have to use the same
> or compatible type, the index expression can have a promoted type as happens
> in this testcase.  Other spots that handle switches do such foldings too.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Richard 

> 2022-05-31  Jakub Jelinek  
> 
>PR tree-optimization/105770
>* tree-ssa-loop-unswitch.cc (find_unswitching_predicates_for_bb): Cast
>CASE_LOW and CASE_HIGH to TREE_TYPE (idx) before comparisons with idx.
> 
>* gcc.dg/pr105770.c: New test.
> 
> --- gcc/tree-ssa-loop-unswitch.cc.jj2022-05-25 11:07:29.754185772 +0200
> +++ gcc/tree-ssa-loop-unswitch.cc2022-05-30 10:57:23.165131441 +0200
> @@ -494,6 +494,7 @@ find_unswitching_predicates_for_bb (basi
> {
>   unsigned nlabels = gimple_switch_num_labels (stmt);
>   tree idx = gimple_switch_index (stmt);
> +  tree idx_type = TREE_TYPE (idx);
>   if (TREE_CODE (idx) != SSA_NAME
>  || nlabels < 1)
>return;
> @@ -526,16 +527,18 @@ find_unswitching_predicates_for_bb (basi
>  if (CASE_HIGH (lab) != NULL_TREE)
>{
>  tree cmp1 = fold_build2 (GE_EXPR, boolean_type_node, idx,
> -   CASE_LOW (lab));
> +   fold_convert (idx_type,
> + CASE_LOW (lab)));
>  tree cmp2 = fold_build2 (LE_EXPR, boolean_type_node, idx,
> -   CASE_HIGH (lab));
> +   fold_convert (idx_type,
> + CASE_HIGH (lab)));
>  cmp = fold_build2 (BIT_AND_EXPR, boolean_type_node, cmp1, cmp2);
>  lab_range.set (CASE_LOW (lab), CASE_HIGH (lab));
>}
>  else
>{
>  cmp = fold_build2 (EQ_EXPR, boolean_type_node, idx,
> - CASE_LOW (lab));
> + fold_convert (idx_type, CASE_LOW (lab)));
>  lab_range.set (CASE_LOW (lab));
>}
> 
> --- gcc/testsuite/gcc.dg/pr105770.c.jj2022-05-30 11:08:30.603530499 +0200
> +++ gcc/testsuite/gcc.dg/pr105770.c2022-05-30 11:07:12.066406193 +0200
> @@ -0,0 +1,19 @@
> +/* PR tree-optimization/105770 */
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -funswitch-loops -fno-tree-forwprop" } */
> +
> +char a;
> +
> +void
> +foo (void)
> +{
> +  while (a)
> +switch (a)
> +  {
> +  case ' ':
> +  case '\t':
> +return;
> +  }
> +
> +  __builtin_unreachable ();
> +}
> 
>Jakub
>

[PATCH] unswitch: Fold case label lo/hi values to index type [PR105770]

2022-05-31 Thread Jakub Jelinek via Gcc-patches

Hi!

The following testcase ICEs because we use different types in comparison,
idx has int type, while CASE_LOW has char type.

While I believe all CASE_{LOW,HIGH} in the same switch have to use the same
or compatible type, the index expression can have a promoted type as happens
in this testcase.  Other spots that handle switches do such foldings too.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-05-31  Jakub Jelinek  

PR tree-optimization/105770
* tree-ssa-loop-unswitch.cc (find_unswitching_predicates_for_bb): Cast
CASE_LOW and CASE_HIGH to TREE_TYPE (idx) before comparisons with idx.

* gcc.dg/pr105770.c: New test.

--- gcc/tree-ssa-loop-unswitch.cc.jj2022-05-25 11:07:29.754185772 +0200
+++ gcc/tree-ssa-loop-unswitch.cc   2022-05-30 10:57:23.165131441 +0200
@@ -494,6 +494,7 @@ find_unswitching_predicates_for_bb (basi
 {
   unsigned nlabels = gimple_switch_num_labels (stmt);
   tree idx = gimple_switch_index (stmt);
+  tree idx_type = TREE_TYPE (idx);
   if (TREE_CODE (idx) != SSA_NAME
  || nlabels < 1)
return;
@@ -526,16 +527,18 @@ find_unswitching_predicates_for_bb (basi
  if (CASE_HIGH (lab) != NULL_TREE)
{
  tree cmp1 = fold_build2 (GE_EXPR, boolean_type_node, idx,
-  CASE_LOW (lab));
+  fold_convert (idx_type,
+CASE_LOW (lab)));
  tree cmp2 = fold_build2 (LE_EXPR, boolean_type_node, idx,
-  CASE_HIGH (lab));
+  fold_convert (idx_type,
+CASE_HIGH (lab)));
  cmp = fold_build2 (BIT_AND_EXPR, boolean_type_node, cmp1, cmp2);
  lab_range.set (CASE_LOW (lab), CASE_HIGH (lab));
}
  else
{
  cmp = fold_build2 (EQ_EXPR, boolean_type_node, idx,
-CASE_LOW (lab));
+fold_convert (idx_type, CASE_LOW (lab)));
  lab_range.set (CASE_LOW (lab));
}
 
--- gcc/testsuite/gcc.dg/pr105770.c.jj  2022-05-30 11:08:30.603530499 +0200
+++ gcc/testsuite/gcc.dg/pr105770.c 2022-05-30 11:07:12.066406193 +0200
@@ -0,0 +1,19 @@
+/* PR tree-optimization/105770 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -funswitch-loops -fno-tree-forwprop" } */
+
+char a;
+
+void
+foo (void)
+{
+  while (a)
+switch (a)
+  {
+  case ' ':
+  case '\t':
+   return;
+  }
+
+  __builtin_unreachable ();
+}

Jakub

[PATCH 13/21] Adjust scalable frame and full testcases

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config/riscv/riscv-vector.cc (rvv_adjust_frame): Adjust frame 
manipulation for RVV scalable vector.
* config/riscv/riscv-vector.h (rvv_adjust_frame): Adjust frame 
manipulation for RVV scalable vector.
* config/riscv/riscv.cc (riscv_compute_frame_info): Adjust frame 
manipulation for RVV scalable vector.
(riscv_first_stack_step): Adjust frame manipulation for RVV scalable 
vector.
(riscv_expand_prologue): Adjust frame manipulation for RVV scalable 
vector.
(riscv_expand_epilogue): Adjust frame manipulation for RVV scalable 
vector.
(riscv_dwarf_poly_indeterminate_value): New function.
(riscv_estimated_poly_value): New function.
(TARGET_DWARF_POLY_INDETERMINATE_VALUE): New targethook.
(TARGET_ESTIMATED_POLY_VALUE): New targethook.
* config/riscv/riscv.h (RISCV_PROLOGUE_TEMP2_REGNUM): New macro define.
(RISCV_PROLOGUE_TEMP2): New macro define.
(RISCV_DWARF_VLENB): New macro define.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/stack/rvv-stack.exp: New test.
* gcc.target/riscv/rvv/stack/stack-check-alloca-scalar.c: New test.
* gcc.target/riscv/rvv/stack/stack-check-alloca-vector.c: New test.
* gcc.target/riscv/rvv/stack/stack-check-save-restore-scalar.c: New 
test.
* gcc.target/riscv/rvv/stack/stack-check-save-restore-vector.c: New 
test.
* gcc.target/riscv/rvv/stack/stack-check-scalar.c: New test.
* gcc.target/riscv/rvv/stack/stack-check-vararg-scalar.c: New test.
* gcc.target/riscv/rvv/stack/stack-check-vector_1.c: New test.
* gcc.target/riscv/rvv/stack/stack-check-vector_2.c: New test.

---
 gcc/config/riscv/riscv-vector.cc  |  33 +++
 gcc/config/riscv/riscv-vector.h   |   1 +
 gcc/config/riscv/riscv.cc | 275 -
 gcc/config/riscv/riscv.h  |   4 +
 .../gcc.target/riscv/rvv/stack/rvv-stack.exp  |  47 +++
 .../rvv/stack/stack-check-alloca-scalar.c |  53 
 .../rvv/stack/stack-check-alloca-vector.c |  45 +++
 .../stack/stack-check-save-restore-scalar.c   |  48 +++
 .../stack/stack-check-save-restore-vector.c   |  62 
 .../riscv/rvv/stack/stack-check-scalar.c  | 205 +
 .../rvv/stack/stack-check-vararg-scalar.c |  33 +++
 .../riscv/rvv/stack/stack-check-vector_1.c| 277 ++
 .../riscv/rvv/stack/stack-check-vector_2.c| 141 +
 13 files changed, 1143 insertions(+), 81 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/stack/rvv-stack.exp
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/stack/stack-check-alloca-scalar.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/stack/stack-check-alloca-vector.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/stack/stack-check-save-restore-scalar.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/stack/stack-check-save-restore-vector.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/stack/stack-check-scalar.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/stack/stack-check-vararg-scalar.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/stack/stack-check-vector_1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/stack/stack-check-vector_2.c

diff --git a/gcc/config/riscv/riscv-vector.cc b/gcc/config/riscv/riscv-vector.cc
index d09fc1b8e49..4cb5e79421d 100644
--- a/gcc/config/riscv/riscv-vector.cc
+++ b/gcc/config/riscv/riscv-vector.cc
@@ -846,6 +846,39 @@ rvv_expand_poly_move (machine_mode mode, rtx dest, rtx 
clobber, rtx src)
 emit_insn (gen_rtx_SET (dest, riscv_add_offset (clobber, dest, constant)));
 }
 
+/* Adjust frame of vector for prologue && epilogue. */
+void
+rvv_adjust_frame (rtx target, poly_int64 offset, bool epilogue)
+{
+  rtx clobber = RISCV_PROLOGUE_TEMP (Pmode);
+  rtx space = RISCV_PROLOGUE_TEMP2 (Pmode);
+  rtx insn, dwarf, adjust_frame_rtx;
+
+  rvv_expand_poly_move (Pmode, space, clobber, gen_int_mode (offset, Pmode));
+
+  if (epilogue)
+{
+  insn = gen_add3_insn (target, target, space);
+}
+  else
+{
+  insn = gen_sub3_insn (target, target, space);
+}
+
+  insn = emit_insn (insn);
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  adjust_frame_rtx =
+gen_rtx_SET (target,
+ plus_constant (Pmode, target, epilogue ? offset : -offset));
+
+  dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR,
+  copy_rtx (adjust_frame_rtx), NULL_RTX);
+
+  REG_NOTES (insn) = dwarf;
+}
+
 /* Helper functions for handling sew=64 on RV32 system. */
 bool
 imm32_p (rtx a)
diff --git a/gcc/config/riscv/riscv-vector.h b/gcc/config/riscv/riscv-vector.h
index b70cf676e26..98f47ea0ec1 100644
--- a/gcc/config/riscv/riscv-vector.h
+++ b/gcc/config/riscv/riscv-vector.h
@@ -22,4 +22,5 @@
 #define GCC_RISCV_VECTOR_H
 void rvv_report_required (void);
 void rvv_expand_poly_move (machine_mode

[PATCH 12/21] Add set get intrinsic support

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-functions.cc 
(vset::assemble_name): New function.
(vset::get_argument_types): New function.
(vset::expand): New function.
(vget::assemble_name): New function.
(vget::get_argument_types): New function.
(vget::expand): New function.
* config/riscv/riscv-vector-builtins-functions.def (vset): New macro 
define.
(vget): New macro define.
* config/riscv/riscv-vector-builtins-functions.h (class vset): New 
class.
(class vget): New class.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/set-get.C: New test.
* gcc.target/riscv/rvv/intrinsic/set-get.c: New test.

---
 .../riscv/riscv-vector-builtins-functions.cc  |  73 ++
 .../riscv/riscv-vector-builtins-functions.def |   6 +
 .../riscv/riscv-vector-builtins-functions.h   |  28 +
 gcc/testsuite/g++.target/riscv/rvv/set-get.C  | 730 ++
 .../gcc.target/riscv/rvv/intrinsic/set-get.c  | 730 ++
 5 files changed, 1567 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/set-get.C
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/set-get.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.cc 
b/gcc/config/riscv/riscv-vector-builtins-functions.cc
index fa39eedcd86..9d2895c3d3e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.cc
@@ -1510,6 +1510,79 @@ vundefined::expand (const function_instance &, tree, rtx 
target) const
   return target;
 }
 
+/* A function implementation for vset functions.  */
+char *
+vset::assemble_name (function_instance &instance)
+{
+  machine_mode tmode = instance.get_arg_pattern ().arg_list[0];
+  machine_mode smode = instance.get_arg_pattern ().arg_list[2];
+  if (GET_MODE_INNER (tmode) != GET_MODE_INNER (smode))
+return nullptr;
+
+  if (tmode == smode)
+return nullptr;
+  
+  if (known_lt (GET_MODE_SIZE (tmode), GET_MODE_SIZE (smode)))
+return nullptr;
+
+  intrinsic_rename (instance, 0, 2);
+  append_name (instance.get_base_name ());
+  return finish_name ();
+}
+
+void
+vset::get_argument_types (const function_instance &instance,
+  vec &argument_types) const
+{
+  misc::get_argument_types (instance, argument_types);
+  argument_types.quick_push (size_type_node);
+  argument_types.quick_push (get_dt_t_with_index (instance, 2));
+}
+
+rtx
+vset::expand (const function_instance &instance, tree exp, rtx target) const
+{
+  enum insn_code icode = code_for_vset (instance.get_arg_pattern 
().arg_list[0]);
+  return expand_builtin_insn (icode, exp, target, instance);
+}
+
+/* A function implementation for vget functions.  */
+char *
+vget::assemble_name (function_instance &instance)
+{
+  machine_mode tmode = instance.get_arg_pattern ().arg_list[0];
+  machine_mode smode = instance.get_arg_pattern ().arg_list[1];
+  if (GET_MODE_INNER (tmode) != GET_MODE_INNER (smode))
+return nullptr;
+
+  if (tmode == smode)
+return nullptr;
+  
+  if (known_gt (GET_MODE_SIZE (tmode), GET_MODE_SIZE (smode)))
+return nullptr;
+  
+  bool unsigned_p = instance.get_data_type_list ()[0] == DT_unsigned;
+  intrinsic_rename (instance, 0, 1);
+  append_name (instance.get_base_name ());
+  append_name (mode2data_type_str (tmode, unsigned_p, false));
+  return finish_name ();
+}
+
+void
+vget::get_argument_types (const function_instance &instance,
+  vec &argument_types) const
+{
+  misc::get_argument_types (instance, argument_types);
+  argument_types.quick_push (size_type_node);
+}
+
+rtx
+vget::expand (const function_instance &instance, tree exp, rtx target) const
+{
+  enum insn_code icode = code_for_vget (instance.get_arg_pattern 
().arg_list[0]);
+  return expand_builtin_insn (icode, exp, target, instance);
+}
+
 /* A function implementation for loadstore functions.  */
 char *
 loadstore::assemble_name (function_instance &instance)
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index deb32ccd031..739ae60fff5 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -56,6 +56,12 @@ DEF_RVV_FUNCTION(vlmul_trunc, vlmul_trunc, (2, 
VITER(VLMULTRUNC, signed), VITER(
 DEF_RVV_FUNCTION(vundefined, vundefined, (1, VITER(VI, signed)), PAT_none, 
PRED_none, OP_none)
 DEF_RVV_FUNCTION(vundefined, vundefined, (1, VITER(VI, unsigned)), PAT_none, 
PRED_none, OP_none)
 DEF_RVV_FUNCTION(vundefined, vundefined, (1, VITER(VF, signed)), PAT_none, 
PRED_none, OP_none)
+DEF_RVV_FUNCTION(vset, vset, (3, VITER(VSETI, signed), VATTR(0, VSETI, 
signed), VITER(VFULL, signed)), PAT_none, PRED_none, OP_v)
+DEF_RVV_FUNCTION(vset, vset, (3, VITER(VSETI, unsigned), VATTR(0, VSETI, 
unsigned), VITER(VFULL, unsigned)), PAT_none, PRED_none, OP_v)
+DEF_RVV_FUNCTION

[PATCH 11/21] Add calling function support

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config/riscv/riscv.cc (struct riscv_arg_info): Add calling convention 
support.
(riscv_get_arg_info): Add calling convention support.
(riscv_function_arg_advance): Add calling convention support.
(riscv_pass_by_reference): Add calling convention support.
* config/riscv/riscv.h (GCC_RISCV_H): include .
(V_RETURN): New macro define.
(MAX_ARGS_IN_VECTOR_REGISTERS): New macro define.
(MAX_ARGS_IN_MASK_REGISTERS): New macro define.
(V_ARG_FIRST): New macro define.
(V_ARG_LAST): New macro define.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/custom/calling_convention_1.c: New test.
* gcc.target/riscv/rvv/custom/rvv-custom.exp: New test.

---
 gcc/config/riscv/riscv.cc | 90 +++
 gcc/config/riscv/riscv.h  | 14 +++
 .../riscv/rvv/custom/calling_convention_1.c   | 46 ++
 .../riscv/rvv/custom/rvv-custom.exp   | 47 ++
 4 files changed, 197 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/custom/calling_convention_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/custom/rvv-custom.exp

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e88057e992a..832c1754002 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -181,6 +181,18 @@ struct riscv_arg_info {
 
   /* The offset of the first register used, provided num_fprs is nonzero.  */
   unsigned int fpr_offset;
+
+  /* The number of vector registers allocated to this argument.  */
+  unsigned int num_vrs;
+
+  /* The offset of the first register used, provided num_vrs is nonzero.  */
+  unsigned int vr_offset;
+
+  /* The number of mask registers allocated to this argument.  */
+  unsigned int num_mrs;
+
+  /* The offset of the first register used, provided num_mrs is nonzero.  */
+  unsigned int mr_offset;
 };
 
 /* Information about an address described by riscv_address_type.
@@ -3225,11 +3237,13 @@ riscv_get_arg_info (struct riscv_arg_info *info, const 
CUMULATIVE_ARGS *cum,
   unsigned num_bytes, num_words;
   unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
   unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
+   unsigned vr_base = return_p ? V_RETURN : V_ARG_FIRST;
   unsigned alignment = riscv_function_arg_boundary (mode, type);
 
   memset (info, 0, sizeof (*info));
   info->gpr_offset = cum->num_gprs;
   info->fpr_offset = cum->num_fprs;
+   info->mr_offset = cum->num_mrs;
 
   if (named)
 {
@@ -3292,6 +3306,67 @@ riscv_get_arg_info (struct riscv_arg_info *info, const 
CUMULATIVE_ARGS *cum,
  gregno, TYPE_MODE (fields[1].type),
  fields[1].offset);
}
+  /*  Pass vectors in VRs. For the argument contain scalable vectors,
+  for example: foo (vint8m1_t a), we pass this in VRs to reduce
+  redundant register spills. The maximum vector arg registers is
+  MAX_ARGS_IN_VECTOR_REGISTERS. */
+  if (rvv_mode_p (mode))
+   {
+ /*  For return vector register, we use V_RETURN as default. */
+ if (return_p)
+   {
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+   return gen_rtx_REG (mode, V_REG_FIRST);
+ else
+   return gen_rtx_REG (mode, vr_base);
+   }
+ /* The first mask register in argument we use is v0, the res of them
+we use v8,v9,.etc same as vector registers.  */
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+   {
+ info->num_mrs = 1;
+   
+ if (info->mr_offset + info->num_mrs <= MAX_ARGS_IN_MASK_REGISTERS)
+   return gen_rtx_REG (mode, V_REG_FIRST);
+   }
+ /*  The number of vectors to pass in the function arg.
+ When the mode size is less than a full vector, we
+ use 1 vector to pass. */
+ int nvecs;
+ nvecs = known_le (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) ? 1 :
+ exact_div (GET_MODE_SIZE (mode), 
BYTES_PER_RISCV_VECTOR).to_constant ();
+ int align = rvv_regsize (mode);
+ for (int i = 0; i + nvecs <= MAX_ARGS_IN_VECTOR_REGISTERS; i += 1)
+   {
+ if (!cum->used_vrs[i] && (i + 8) % align == 0)
+   {
+ bool find_space = true;
+ int j = 1;
+ for (; j < nvecs; j += 1)
+   {
+ if (cum->used_vrs[i + j])
+   {
+ find_space = false;
+ break;
+   }
+   }
+ if (find_space)
+   {
+ info->num_vrs = nvecs;
+ info->vr_offset = i;
+ return gen_rtx_REG(mode, vr_base + i);
+

[PATCH 07/21] Add register spilling support

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config/riscv/riscv-protos.h (rvv_expand_const_vector): New function.
(rvv_expand_const_mask): New function.
(rvv_const_vec_all_same_in_range_p): New function.
* config/riscv/riscv-vector.cc (classify_vtype_field): Move codes 
location.
(get_lmulx8): New function. Move codes location.
(force_reg_for_over_uimm): New function. Move codes location.
(gen_vlx2): New function. Move codes location.
(emit_int64_to_vector_32bit): Move codes location.
(rvv_expand_const_vector): New function.
(rvv_expand_const_mask): New function.
(rvv_const_vec_all_same_in_range_p): New function.
* config/riscv/riscv.cc (riscv_const_insns): Add const vector cost.
* config/riscv/vector-iterators.md: New iterators and attributes.
* config/riscv/vector.md (mov): New pattern.
(*mov): New pattern.
(*mov_reg): New pattern.
(@vmclr_m): New pattern.
(@vmset_m): New pattern.

---
 gcc/config/riscv/riscv-protos.h  |   3 +
 gcc/config/riscv/riscv-vector.cc | 349 ---
 gcc/config/riscv/riscv.cc|  67 -
 gcc/config/riscv/vector-iterators.md |  24 ++
 gcc/config/riscv/vector.md   | 201 +++
 5 files changed, 502 insertions(+), 142 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9a7e120854a..618eb746eaa 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -128,6 +128,9 @@ extern int rvv_regsize (machine_mode);
 extern rtx rvv_gen_policy (unsigned int rvv_policy = 0);
 extern opt_machine_mode rvv_get_mask_mode (machine_mode);
 extern machine_mode rvv_translate_attr_mode (rtx_insn *);
+extern bool rvv_expand_const_vector (rtx, rtx);
+extern bool rvv_expand_const_mask (rtx, rtx);
+extern bool rvv_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, 
HOST_WIDE_INT);
 extern void
 emit_op5 (
   unsigned int unspec,
diff --git a/gcc/config/riscv/riscv-vector.cc b/gcc/config/riscv/riscv-vector.cc
index 426490945dd..4b2fe2a8d11 100644
--- a/gcc/config/riscv/riscv-vector.cc
+++ b/gcc/config/riscv/riscv-vector.cc
@@ -71,7 +71,165 @@
 #include "target-def.h"
 
 #include 
-/* Helper functions for RVV */
+
+/* Internal helper functions for RVV */
+
+/* Return the vtype field for a specific machine mode. */
+static unsigned int
+classify_vtype_field (machine_mode mode)
+{
+  unsigned int vlmul = rvv_classify_vlmul_field (mode);
+  unsigned int vsew = rvv_classify_vsew_field (mode);
+  unsigned int vtype = (vsew << 3) | (vlmul & 0x7) | 0x40;
+  return vtype;
+}
+
+/* lmul = real_lmul * 8
+   guarantee integer
+   e.g.
+ 1  => 1/8
+ 2  => 1/4
+ 4  => 1/2
+ 8  => 1
+ 16 => 2
+ 32 => 4
+ 64 => 8
+ */
+static unsigned int
+get_lmulx8 (machine_mode mode)
+{
+  unsigned int vlmul = rvv_classify_vlmul_field (mode);
+  switch (vlmul)
+{
+  case VLMUL_FIELD_000:
+return 8;
+  case VLMUL_FIELD_001:
+return 16;
+  case VLMUL_FIELD_010:
+return 32;
+  case VLMUL_FIELD_011:
+return 64;
+  case VLMUL_FIELD_101:
+return 1;
+  case VLMUL_FIELD_110:
+return 2;
+  case VLMUL_FIELD_111:
+return 4;
+  default:
+gcc_unreachable ();
+}
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static rtx
+force_reg_for_over_uimm (rtx vl)
+{
+  if (CONST_SCALAR_INT_P (vl) && INTVAL (vl) >= 32)
+{
+  return force_reg (Pmode, vl);
+}
+
+  return vl;
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static rtx
+gen_vlx2 (rtx avl, machine_mode Vmode, machine_mode VSImode)
+{
+  if (rtx_equal_p (avl, gen_rtx_REG (Pmode, X0_REGNUM)))
+{
+  return avl;
+}
+  rtx i32vl = NULL_RTX;
+  if (CONST_SCALAR_INT_P (avl))
+{
+  unsigned int vlen_max;
+  unsigned int vlen_min;
+  if (riscv_vector_chunks.is_constant ())
+{
+  vlen_max = riscv_vector_chunks.to_constant () * 64;
+  vlen_min = vlen_max;
+}
+  else
+{
+  /* TODO: vlen_max will be supported as 65536 in the future. */ 
+  vlen_max = RVV_4096;
+  vlen_min = RVV_128;
+}
+  unsigned int max_vlmax = (vlen_max / GET_MODE_UNIT_BITSIZE (Vmode) * 
get_lmulx8 (Vmode)) / 8;
+  unsigned int min_vlmax = (vlen_min / GET_MODE_UNIT_BITSIZE (Vmode) * 
get_lmulx8 (Vmode)) / 8;
+  
+  unsigned HOST_WIDE_INT avl_int = INTVAL (avl);
+  if (avl_int <= min_vlmax)
+{
+  i32vl = gen_int_mode (2 * avl_int, SImode);
+}
+  else if (avl_int >= 2 * max_vlmax)
+{
+  // Just set i32vl to VLMAX in this situation
+  i32vl = gen_reg_rtx (Pmode);
+  unsigned int vtype = classify_vtype_field (VSImode);
+  emit_insn (gen_vsetvl (Pmode, i32vl, gen_rtx_REG (Pmode, X0_REGNUM), 
GEN_INT (vtype)));
+

[PATCH 08/21] Add poly manipulation

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_add_offset): Change 
riscv_add_offset as global function.
* config/riscv/riscv-vector.cc (rvv_report_required): New function.
(expand_quotient): New function.
(rvv_expand_poly_move): New function.
* config/riscv/riscv-vector.h (rvv_report_required): New function.
(rvv_expand_poly_move): New function.
* config/riscv/riscv.cc (riscv_const_insns): Fix no return value bug.
(riscv_split_symbol): Add symbol_ref with poly_int support.
(riscv_legitimize_const_move): Add const poly_int move support.
(riscv_legitimize_move): Add const poly_int move support.
(riscv_hard_regno_mode_ok): Add VL_REGNUM and VTYPE_REGNUM register 
allocation.
(riscv_conditional_register_usage): Fix RVV registers.

---
 gcc/config/riscv/riscv-protos.h  |   1 +
 gcc/config/riscv/riscv-vector.cc | 254 +++
 gcc/config/riscv/riscv-vector.h  |   2 +
 gcc/config/riscv/riscv.cc|  46 +-
 4 files changed, 299 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 618eb746eaa..2d63fe76930 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -74,6 +74,7 @@ extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *);
 extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
 extern bool riscv_gpr_save_operation_p (rtx);
+extern rtx riscv_add_offset (rtx, rtx, HOST_WIDE_INT);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv-vector.cc b/gcc/config/riscv/riscv-vector.cc
index 4b2fe2a8d11..d09fc1b8e49 100644
--- a/gcc/config/riscv/riscv-vector.cc
+++ b/gcc/config/riscv/riscv-vector.cc
@@ -592,6 +592,260 @@ rvv_const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT 
minval,
   IN_RANGE (INTVAL (elt), minval, maxval));
 }
 
+/* Report when we try to do something that requires vector when vector is 
disabled.
+   This is an error of last resort and isn't very high-quality.  It usually
+   involves attempts to measure the vector length in some way.  */
+void
+rvv_report_required (void)
+{
+  static bool reported_p = false;
+
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_p)
+return;
+
+  error ("this operation requires the RVV ISA extension");
+  inform (input_location, "you can enable RVV using the command-line"
+  " option %<-march%>, or by using the %"
+  " attribute or pragma");
+  reported_p = true;
+}
+
+/* Note: clobber register holds the vlenb or 1/2 vlenb or 1/4 vlenb or 1/8 
vlenb value. */
+/* Expand move for quotient.  */
+static void
+expand_quotient (int quotient, machine_mode mode, rtx clobber_vlenb, rtx dest)
+{
+  if (quotient == 0)
+{
+  riscv_emit_move(dest, GEN_INT(0));
+  return;
+}
+
+  bool is_neg = quotient < 0;
+  quotient = abs(quotient);
+  int log2 = exact_log2 (quotient);
+  int vlenb = BYTES_PER_RISCV_VECTOR.coeffs[1];
+
+  if (GET_MODE_SIZE (mode).to_constant () <= GET_MODE_SIZE (Pmode))
+emit_insn (gen_rtx_SET (clobber_vlenb, gen_int_mode (poly_int64 (vlenb, 
vlenb), mode)));
+  else
+{
+  riscv_emit_move (gen_highpart (Pmode, clobber_vlenb), GEN_INT (0));
+  emit_insn (gen_rtx_SET (gen_lowpart (Pmode, clobber_vlenb), gen_int_mode 
(poly_int64 (vlenb, vlenb), Pmode)));
+}
+
+  if (log2 == 0)
+{
+  if (is_neg)
+{
+  if (GET_MODE_SIZE (mode).to_constant () <= GET_MODE_SIZE (Pmode))
+emit_insn (gen_rtx_SET (dest, gen_rtx_NEG (mode, clobber_vlenb)));
+  else
+{
+  /* We should use SImode to simulate DImode negation. */
+  /* prologue and epilogue can not go through this condition. */
+  gcc_assert (can_create_pseudo_p ());
+  rtx reg = gen_reg_rtx (Pmode);
+  riscv_emit_move(dest, clobber_vlenb);
+  emit_insn (gen_rtx_SET (reg,
+  gen_rtx_NE (Pmode, gen_lowpart (Pmode, dest), const0_rtx)));
+  emit_insn (gen_rtx_SET (gen_highpart (Pmode, dest),
+  gen_rtx_NEG (Pmode, gen_highpart (Pmode, dest;
+  emit_insn (gen_rtx_SET (gen_lowpart (Pmode, dest),
+  gen_rtx_NEG (Pmode, gen_lowpart (Pmode, dest;
+  emit_insn (gen_rtx_SET (gen_highpart (Pmode, dest),
+  gen_rtx_MINUS (Pmode, gen_highpart (Pmode, dest), reg)));
+}
+}
+  else
+riscv_emit_move(dest, clobber_vlenb);
+}
+  else if (log2 != -1
+&& GET_MODE_SIZE (mode).to_constant () <= GET_MODE_SIZE (Pmode))
+{
+  gcc_assert (IN_RANGE (log2, 0, 31));
+
+  if (is_neg)
+{
+  emit_insn (gen_rtx_SET (dest, gen_rtx_NEG (mode, clobber_vlenb)));
+  e

[PATCH 06/21] Add insert-vsetvl pass

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config.gcc: Add riscv-insert-vsetvl.o extra_objs for RVV support.
* config/riscv/constraints.md (Ws5): New constraint.
* config/riscv/predicates.md (p_reg_or_const_csr_operand): New 
predicate.
(vector_reg_or_const0_operand): New predicate.
(vector_move_operand): New predicate.
(reg_or_mem_operand): New predicate.
(reg_or_simm5_operand): New predicate.
(reg_or_const_int_operand): New predicate.
* config/riscv/riscv-opts.h (enum vsew_field_enum): New enum.
* config/riscv/riscv-passes.def (INSERT_PASS_AFTER): Run insert vsetvl 
pass after pass_split_all_insns.
(INSERT_PASS_BEFORE): Run insert vsetvl pass before pass_sched2.
* config/riscv/riscv-protos.h (make_pass_insert_vsetvl): New function.
(make_pass_insert_vsetvl2): New function.
(rvv_mask_mode_p): New function.
(rvv_classify_vsew_field): New function.
(rvv_gen_policy): New function.
(rvv_get_mask_mode): New function.
(rvv_translate_attr_mode): New function.
* config/riscv/riscv-vector-builtins-iterators.def (V): New iterator.
(VF): New iterator.
(VB): New iterator.
(VFULL): New iterator.
(VPARTIAL): New iterator.
(V64BITI): New iterator.
(VM): New iterator.
(VSUB): New iterator.
(VDI_TO_VSI): New iterator.
(VDI_TO_VSI_VM): New iterator.
* config/riscv/riscv-vector.cc (enum vsew_field_enum): New enum.
(rvv_classify_vsew_field): New function.
(rvv_gen_policy): New function.
(rvv_translate_attr_mode): New function.
(TRANSLATE_VECTOR_MODE): New macro define.
(classify_vtype_field): New function.
(get_lmulx8): New function.
(force_reg_for_over_uimm): New function.
(gen_vlx2): New function.
(emit_int64_to_vector_32bit): New function.
(imm32_p): New function.
(imm_p): New function.
(gen_3): New function.
(gen_4): New function.
(gen_5): New function.
(gen_6): New function.
(gen_7): New function.
(enum GEN_CLASS): New enum.
(modify_operands): New function.
(emit_op5_vmv_v_x): New function.
(emit_op5): New function.
* config/riscv/riscv-vector.h (riscv_vector_mode_p): New function. 
(rvv_legitimate_poly_int_p): New function.
(rvv_offset_temporaries): New function.
(rvv_classify_vlmul_field): New function.
(rvv_parse_vsew_field): New function.
(rvv_parse_vlmul_field): New function.
(rvv_parse_vta_field): New function.
(rvv_parse_vma_field): New function.
(rvv_regsize): New function.
(rvv_get_mask_mode): New function.
* config/riscv/riscv.md: Add RVV modes.
* config/riscv/t-riscv: New object.
* config/riscv/vector-iterators.md: New iterators and attributes.
* config/riscv/vector.md (@vec_duplicate): New pattern.
(@vle): New pattern.
(@vse): New pattern.
(@vlm): New pattern.
(@vsm): New pattern.
(@v_v_x): New pattern.
(@vmv_v_x_internal): New pattern.
(@vmv_v_x_32bit): New pattern.
(@vfmv_v_f): New pattern.
(@vmerge_vxm_internal): New pattern.
* config/riscv/riscv-insert-vsetvl.cc: New file.

---
 gcc/config.gcc|2 +-
 gcc/config/riscv/constraints.md   |5 +
 gcc/config/riscv/predicates.md|   31 +
 gcc/config/riscv/riscv-insert-vsetvl.cc   | 2312 +
 gcc/config/riscv/riscv-opts.h |   12 +
 gcc/config/riscv/riscv-passes.def |2 +
 gcc/config/riscv/riscv-protos.h   |   19 +
 .../riscv/riscv-vector-builtins-iterators.def |  236 ++
 gcc/config/riscv/riscv-vector.cc  |  368 +++
 gcc/config/riscv/riscv-vector.h   |   10 -
 gcc/config/riscv/riscv.md |   67 +-
 gcc/config/riscv/t-riscv  |4 +
 gcc/config/riscv/vector-iterators.md  |  129 +-
 gcc/config/riscv/vector.md|  235 +-
 14 files changed, 3417 insertions(+), 15 deletions(-)
 create mode 100644 gcc/config/riscv/riscv-insert-vsetvl.cc

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 042a7a17737..1592e344531 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -518,7 +518,7 @@ pru-*-*)
 riscv*)
cpu_type=riscv
extra_headers="riscv_vector.h"
-   extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o riscv-vector.o riscv-vector-builtins-functions.o 
riscv-vector-builtins.o"
+   extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o riscv-vector.o riscv-vector-builtins-functions.o 
riscv-vector-builtins.o riscv-insert-vsetvl.o"
d_target_objs="riscv-d.o"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/ris

[PATCH 05/21] Add RVV configuration intrinsic

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_register_pragmas): New function.
(riscv_classify_vlmul_field): New enum.
(rvv_classify_vlmul_field): New enum.
(rvv_parse_vsew_field): New enum.
(rvv_parse_vlmul_field): New enum.
(rvv_parse_vta_field): New enum.
(rvv_parse_vma_field): New enum.
* config/riscv/riscv-vector-builtins-functions.cc (get_vtype_for_mode): 
New function.
(mode2data_type_str): New function.
(config::call_properties): New function.
(config::assemble_name): New function.
(config::get_return_type): New function.
(vsetvl::get_argument_types): New function.
(vsetvl::expand): New function.
(vsetvlmax::expand): New function.
* config/riscv/riscv-vector-builtins-functions.def:
(vsetvl): New macro define.
(vsetvlmax): New macro define.
* config/riscv/riscv-vector-builtins-functions.h (class config): New 
class.
(class vsetvl): New class.
(class vsetvlmax): New class.
* config/riscv/riscv-vector-builtins-iterators.def (VI): New iterator.
* config/riscv/riscv-vector.cc (rvv_parse_vsew_field): New function.
(rvv_parse_vlmul_field): New function.
(rvv_parse_vta_field): New function.
(rvv_parse_vma_field): New function.
* config/riscv/riscv-vector.h (rvv_parse_vsew_field): New function.
(rvv_parse_vlmul_field): New function.
(rvv_parse_vta_field): New function.
(rvv_parse_vma_field): New function.
* config/riscv/riscv.md: Add X0_REGNUM constant.
* config/riscv/vector-iterators.md (unspec): New unspec.
* config/riscv/vector.md: New file.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/intrinsic/rvv-intrinsic.exp: New test.
* gcc.target/riscv/rvv/intrinsic/vsetvl.c: New test.

---
 gcc/config/riscv/riscv-protos.h   |   8 +-
 .../riscv/riscv-vector-builtins-functions.cc  | 232 ++
 .../riscv/riscv-vector-builtins-functions.def |   6 +
 .../riscv/riscv-vector-builtins-functions.h   |  36 +
 .../riscv/riscv-vector-builtins-iterators.def |  23 +
 gcc/config/riscv/riscv-vector.cc  |  28 +
 gcc/config/riscv/riscv-vector.h   |   4 +
 gcc/config/riscv/riscv.md |   6 +-
 gcc/config/riscv/vector-iterators.md  |  14 +-
 gcc/config/riscv/vector.md| 140 
 .../riscv/rvv/intrinsic/rvv-intrinsic.exp |  47 ++
 .../gcc.target/riscv/rvv/intrinsic/vsetvl.c   | 733 ++
 12 files changed, 1273 insertions(+), 4 deletions(-)
 create mode 100644 gcc/config/riscv/vector.md
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/intrinsic/rvv-intrinsic.exp
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/intrinsic/vsetvl.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 4a4ac645f55..cae2974b54f 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -77,6 +77,7 @@ extern bool riscv_gpr_save_operation_p (rtx);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
+void riscv_register_pragmas (void);
 
 /* Routines implemented in riscv-d.cc  */
 extern void riscv_d_target_versions (void);
@@ -114,9 +115,12 @@ extern const riscv_cpu_info *riscv_find_cpu (const char *);
 extern bool rvv_mode_p (machine_mode);
 extern bool rvv_legitimate_poly_int_p (rtx);
 extern unsigned int rvv_offset_temporaries (bool, poly_int64);
-extern enum vlmul_field_enum riscv_classify_vlmul_field (machine_mode);
+extern enum vlmul_field_enum rvv_classify_vlmul_field (machine_mode);
+extern unsigned int rvv_parse_vsew_field (unsigned int);
+extern unsigned int rvv_parse_vlmul_field (unsigned int);
+extern bool rvv_parse_vta_field (unsigned int);
+extern bool rvv_parse_vma_field (unsigned int);
 extern int rvv_regsize (machine_mode);
-extern void riscv_register_pragmas (void);
 
 /* We classify builtin types into two classes:
1. General builtin class which is using the
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.cc 
b/gcc/config/riscv/riscv-vector-builtins-functions.cc
index 19bcb66a83f..0acda8f671e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.cc
@@ -49,6 +49,164 @@ static const unsigned int CP_WRITE_CSR = 1U << 6;
when the required extension is disabled.  */
 static bool reported_missing_extension_p;
 
+/* Generate vtype bitmap for a specific machine mode. */
+static unsigned int
+get_vtype_for_mode (machine_mode mode)
+{
+  switch (mode)
+{
+case VNx2QImode:
+case VNx2BImode:
+  return 0x45;
+
+case VNx4QImode:
+case VNx4BImode:
+  return 0x46;
+
+case VNx8QImode:
+case VNx8BImode:
+  return 0x47;
+
+case VNx16QImode:
+case VNx16BImode:
+  return 0x40;
+
+case VNx32QImode:
+

[PATCH 04/21] Add RVV intrinsic enable #pragma riscv intrinsic "vector" and introduce RVV header "riscv_vector.h"

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config.gcc: New header.
* config/riscv/riscv-c.cc (riscv_pragma_intrinsic): New function.
(riscv_check_builtin_call): New function.
(riscv_register_pragmas): New function.
* config/riscv/riscv-protos.h (riscv_register_pragmas): New function.
* config/riscv/riscv.h (REGISTER_TARGET_PRAGMAS): New targethook.
* config/riscv/riscv_vector.h: New file.

---
 gcc/config.gcc  |  1 +
 gcc/config/riscv/riscv-c.cc | 65 +
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.h|  2 +
 gcc/config/riscv/riscv_vector.h | 41 +
 5 files changed, 110 insertions(+)
 create mode 100644 gcc/config/riscv/riscv_vector.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index bdda82ae576..042a7a17737 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -517,6 +517,7 @@ pru-*-*)
;;
 riscv*)
cpu_type=riscv
+   extra_headers="riscv_vector.h"
extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o riscv-vector.o riscv-vector-builtins-functions.o 
riscv-vector-builtins.o"
d_target_objs="riscv-d.o"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-builtins.cc 
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index eb7ef09297e..5839e849092 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -25,9 +25,17 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
+#include "input.h"
+#include "memmodel.h"
+#include "tm_p.h"
+#include "flags.h"
 #include "c-family/c-common.h"
 #include "cpplib.h"
+#include "c-family/c-pragma.h"
+#include "langhooks.h"
+#include "target.h"
 #include "riscv-subset.h"
+#include "riscv-vector-builtins.h"
 
 #define builtin_define(TXT) cpp_define (pfile, TXT)
 
@@ -155,3 +163,60 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   builtin_define_with_int_value (buf, version_value);
 }
 }
+
+/* Implement "#pragma riscv intrinsic".  */
+static void
+riscv_pragma_intrinsic (cpp_reader *)
+{
+  tree x;
+
+  if (pragma_lex (&x) != CPP_STRING)
+{
+  error ("%<#pragma riscv intrinsic%> requires a string parameter");
+  return;
+}
+
+  const char *name = TREE_STRING_POINTER (x);
+
+  if (strcmp (name, "vector") == 0)
+{
+  if (!TARGET_VECTOR)
+   error ("%<#pragma riscv intrinsic%> option %qs needs 'V' extension 
enabled", name);
+
+  riscv_vector::handle_pragma_vector ();
+}
+  else
+error ("unknown %<#pragma riscv intrinsic%> option %qs", name);
+}
+
+/* Implement TARGET_CHECK_BUILTIN_CALL.  */
+
+static bool
+riscv_check_builtin_call (location_t loc, vec arg_loc,
+ tree fndecl, tree orig_fndecl,
+ unsigned int nargs, tree *args)
+{
+  unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+  unsigned int subcode = code >> RISCV_BUILTIN_SHIFT;
+
+  switch (code & RISCV_BUILTIN_CLASS)
+{
+case RISCV_BUILTIN_GENERAL:
+  return true;
+
+case RISCV_BUILTIN_VECTOR:
+  return riscv_vector::check_builtin_call (loc, arg_loc, subcode,
+  orig_fndecl, nargs, args);
+}
+
+  gcc_unreachable ();
+}
+
+/* Implement REGISTER_TARGET_PRAGMAS.  */
+
+void
+riscv_register_pragmas (void)
+{
+  targetm.check_builtin_call = riscv_check_builtin_call;
+  c_register_pragma ("riscv", "intrinsic", riscv_pragma_intrinsic);
+}
\ No newline at end of file
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 1cb3586d1f1..4a4ac645f55 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -116,6 +116,7 @@ extern bool rvv_legitimate_poly_int_p (rtx);
 extern unsigned int rvv_offset_temporaries (bool, poly_int64);
 extern enum vlmul_field_enum riscv_classify_vlmul_field (machine_mode);
 extern int rvv_regsize (machine_mode);
+extern void riscv_register_pragmas (void);
 
 /* We classify builtin types into two classes:
1. General builtin class which is using the
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 8f56a5a4746..cb4cfc0f73e 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -1067,4 +1067,6 @@ extern void riscv_remove_unneeded_save_restore_calls 
(void);
 
 #define TARGET_SUPPORTS_WIDE_INT 1
 
+#define REGISTER_TARGET_PRAGMAS() riscv_register_pragmas ()
+
 #endif /* ! GCC_RISCV_H */
diff --git a/gcc/config/riscv/riscv_vector.h b/gcc/config/riscv/riscv_vector.h
new file mode 100644
index 000..ef1820a07cb
--- /dev/null
+++ b/gcc/config/riscv/riscv_vector.h
@@ -0,0 +1,41 @@
+/* Header of intrinsics for RISC-V 'V' Extension for GNU compiler.
+   Copyright (C) 2021-2021 Free Software Foundation, Inc.
+   Contributed by Juzhe Zhong (juzhe.zh...@riv

[PATCH 03/21] Add RVV datatypes

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc (make_type_sizeless): New 
function.
(sizeless_type_p): New function.
(vector_builtin_mode): New function.
(vector_legal_vlmul): New function.
(add_vector_type_attribute): New function.
(register_general_builtin_types): New function.
(DEFINE_SCALAR_PTR_TYPE_NODE): New function.
(register_builtin_types): New function.
(register_vector_type): New function.
(handle_pragma_vector): New function.
(lookup_rvv_type_attribute): New function.
(builtin_type_p): New function.
(verify_type_context): New function.
(mangle_builtin_type): New function.
* config/riscv/riscv-vector-builtins.h (builtin_type_p): New function.
(verify_type_context): New function.
(mangle_builtin_type): New function.
* config/riscv/riscv.cc (riscv_vector_mode_supported_p): New function.
(riscv_vector_alignment): New function.
(riscv_vectorize_preferred_vector_alignment): New function.
(riscv_simd_vector_alignment_reachable): New function.
(riscv_builtin_support_vector_misalignment): New function.
(riscv_compatible_vector_types_p): New function.
(riscv_verify_type_context): New function.
(riscv_mangle_type): New function.
(TARGET_VECTOR_MODE_SUPPORTED_P): New targethook.
(TARGET_VECTOR_ALIGNMENT): New targethook.
(TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT): New targethook.
(TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE): New targethook.
(TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT): New targethook.
(TARGET_COMPATIBLE_VECTOR_TYPES_P): New targethook.
(TARGET_VERIFY_TYPE_CONTEXT): New targethook.
(TARGET_MANGLE_TYPE): New targethook.

---
 gcc/config/riscv/riscv-vector-builtins.cc | 466 ++
 gcc/config/riscv/riscv-vector-builtins.h  |   3 +
 gcc/config/riscv/riscv.cc | 144 +++
 3 files changed, 613 insertions(+)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 7ea07a24b5b..ef734572add 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -109,6 +109,286 @@ static unsigned int NUM_INSN_FUNC;
 
 static void init_def_variables ();
 
+/* Force TYPE to be a sizeless type.  */
+static void
+make_type_sizeless (tree type)
+{
+  TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("RVV sizeless type"),
+ NULL_TREE, TYPE_ATTRIBUTES (type));
+}
+
+/* Return true if TYPE is a sizeless type.  */
+static bool
+sizeless_type_p (const_tree type)
+{
+  if (type == error_mark_node)
+return NULL_TREE;
+  return lookup_attribute ("RVV sizeless type", TYPE_ATTRIBUTES (type));
+}
+
+machine_mode
+vector_builtin_mode (scalar_mode inner_mode, enum vlmul_field_enum vlmul)
+{
+  switch (inner_mode)
+{
+case E_BImode:
+  return vlmul == VLMUL_FIELD_000   ? VNx16BImode
+ : vlmul == VLMUL_FIELD_001 ? VNx32BImode
+ : vlmul == VLMUL_FIELD_010 ? VNx64BImode
+ : vlmul == VLMUL_FIELD_011 ? VNx128BImode
+ : vlmul == VLMUL_FIELD_111 ? VNx8BImode
+ : vlmul == VLMUL_FIELD_110 ? VNx4BImode
+   : VNx2BImode;
+
+case E_QImode:
+  return vlmul == VLMUL_FIELD_000   ? VNx16QImode
+ : vlmul == VLMUL_FIELD_001 ? VNx32QImode
+ : vlmul == VLMUL_FIELD_010 ? VNx64QImode
+ : vlmul == VLMUL_FIELD_011 ? VNx128QImode
+ : vlmul == VLMUL_FIELD_111 ? VNx8QImode
+ : vlmul == VLMUL_FIELD_110 ? VNx4QImode
+   : VNx2QImode;
+
+case E_HImode:
+  if (vlmul == VLMUL_FIELD_101)
+gcc_unreachable ();
+
+  return vlmul == VLMUL_FIELD_000   ? VNx8HImode
+ : vlmul == VLMUL_FIELD_001 ? VNx16HImode
+ : vlmul == VLMUL_FIELD_010 ? VNx32HImode
+ : vlmul == VLMUL_FIELD_011 ? VNx64HImode
+ : vlmul == VLMUL_FIELD_111 ? VNx4HImode
+   : VNx2HImode;
+
+case E_SImode:
+  if (vlmul == VLMUL_FIELD_101 || vlmul == VLMUL_FIELD_110)
+gcc_unreachable ();
+
+  return vlmul == VLMUL_FIELD_000   ? VNx4SImode
+ : vlmul == VLMUL_FIELD_001 ? VNx8SImode
+ : vlmul == VLMUL_FIELD_010 ? VNx16SImode
+ : vlmul == VLMUL_FIELD_011 ? VNx32SImode
+   : VNx2SImode;
+
+case E_DImode:
+  if (vlmul == VLMUL_FIELD_101 || vlmul == VLMUL_FIELD_110 ||
+  vlmul == VLMUL_FIELD_111)
+gcc_unreachable ();
+
+  return vlmul == VLMUL_FIELD_000   ? VNx2DImode
+ : vlmul == VLMUL_FIELD_001 ? VNx4DImode
+ : vlmul == VLMUL_FIELD_010 ? VNx8DImode
+   : VN

[PATCH 02/21] Add RVV intrinsic framework

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config.gcc: Add riscv-vector-builtins-functions.o and 
riscv-vector-builtins.o extra_objs for RVV support.
* config/riscv/riscv-builtins.cc (riscv_init_builtins): Add RVV support.
(riscv_builtin_decl): Add RVV support.
(riscv_expand_builtin): Add RVV support.
(riscv_gimple_fold_builtin): New function.
* config/riscv/riscv-protos.h (riscv_gimple_fold_builtin): New function.
(enum riscv_builtin_class): New macro define.
* config/riscv/riscv-vector.cc (rvv_get_mask_mode): New function.
* config/riscv/riscv-vector.h (rvv_get_mask_mode): New function.
* config/riscv/riscv.cc (riscv_class_max_nregs): Add RVV register.
(riscv_conditional_register_usage): Add RVV register.
(TARGET_GIMPLE_FOLD_BUILTIN): New targethook.
* config/riscv/t-riscv: New object.
* config/riscv/md-parser: New file.
* config/riscv/riscv-vector-builtins-functions.cc: New file.
* config/riscv/riscv-vector-builtins-functions.def: New file.
* config/riscv/riscv-vector-builtins-functions.h: New file.
* config/riscv/riscv-vector-builtins-iterators.def: New file.
* config/riscv/riscv-vector-builtins.cc: New file.
* config/riscv/riscv-vector-builtins.def: New file.
* config/riscv/riscv-vector-builtins.h: New file.
* config/riscv/vector-iterators.md: New file.

---
 gcc/config.gcc|5 +-
 gcc/config/riscv/md-parser|  205 
 gcc/config/riscv/riscv-builtins.cc|   88 +-
 gcc/config/riscv/riscv-protos.h   |   19 +
 .../riscv/riscv-vector-builtins-functions.cc  | 1012 +
 .../riscv/riscv-vector-builtins-functions.def |   34 +
 .../riscv/riscv-vector-builtins-functions.h   |  491 
 .../riscv/riscv-vector-builtins-iterators.def |   12 +
 gcc/config/riscv/riscv-vector-builtins.cc |  266 +
 gcc/config/riscv/riscv-vector-builtins.def|   37 +
 gcc/config/riscv/riscv-vector-builtins.h  |   59 +
 gcc/config/riscv/riscv-vector.cc  |   17 +
 gcc/config/riscv/riscv-vector.h   |1 +
 gcc/config/riscv/riscv.cc |   21 +
 gcc/config/riscv/t-riscv  |   36 +
 gcc/config/riscv/vector-iterators.md  |   19 +
 16 files changed, 2307 insertions(+), 15 deletions(-)
 create mode 100644 gcc/config/riscv/md-parser
 create mode 100644 gcc/config/riscv/riscv-vector-builtins-functions.cc
 create mode 100644 gcc/config/riscv/riscv-vector-builtins-functions.def
 create mode 100644 gcc/config/riscv/riscv-vector-builtins-functions.h
 create mode 100644 gcc/config/riscv/riscv-vector-builtins-iterators.def
 create mode 100644 gcc/config/riscv/riscv-vector-builtins.cc
 create mode 100644 gcc/config/riscv/riscv-vector-builtins.def
 create mode 100644 gcc/config/riscv/riscv-vector-builtins.h
 create mode 100644 gcc/config/riscv/vector-iterators.md

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 50154c2eb3a..bdda82ae576 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -517,8 +517,11 @@ pru-*-*)
;;
 riscv*)
cpu_type=riscv
-   extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o riscv-vector.o"
+   extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o riscv-vector.o riscv-vector-builtins-functions.o 
riscv-vector-builtins.o"
d_target_objs="riscv-d.o"
+   target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-builtins.cc 
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
+   target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins-functions.cc"
+   target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins-functions.h"
;;
 rs6000*-*-*)
extra_options="${extra_options} g.opt fused-madd.opt 
rs6000/rs6000-tables.opt"
diff --git a/gcc/config/riscv/md-parser b/gcc/config/riscv/md-parser
new file mode 100644
index 000..311b8709c0a
--- /dev/null
+++ b/gcc/config/riscv/md-parser
@@ -0,0 +1,205 @@
+# Mode iterators and attributes parser for RISC-V 'V' Extension for GNU 
compiler.
+# Copyright (C) 2022-2022 Free Software Foundation, Inc.
+# Contributed by Juzhe Zhong (juzhe.zh...@rivai.ai), RiVAI Technologies Ltd.
+# 
+# This file is part of GCC.
+# 
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not se

[PATCH 01/21] Add RVV modes and support scalable vector

2022-05-31 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* config.gcc: Add riscv-vector.o extra_objs for RVV support.
* config/riscv/constraints.md: New constraints.
* config/riscv/predicates.md: New predicates.
* config/riscv/riscv-modes.def: New machine mode.
* config/riscv/riscv-opts.h: New enum.
* config/riscv/riscv-protos.h: New functions declare.
* config/riscv/riscv-sr.cc (riscv_remove_unneeded_save_restore_calls): 
Adjust for poly_int.
* config/riscv/riscv.cc (struct riscv_frame_info): Change HOST_WIDE_INT 
to poly_int64.
(poly_uint16 riscv_vector_chunks): New declare.
(riscv_legitimate_constant_p): Adjust for poly_int.
(riscv_cannot_force_const_mem): Adjust for poly_int.
(riscv_valid_offset_p): Adjust for poly_int.
(riscv_valid_lo_sum_p): Adjust for poly_int.
(riscv_classify_address): Disallow PLUS, LO_SUM and CONST_INT memory 
address for RVV.
(riscv_address_insns): Adjust for poly_int.
(riscv_const_insns): Adjust for poly_int.
(riscv_load_store_insns): Adjust for poly_int.
(riscv_legitimize_move): Adjust for poly_int.
(riscv_binary_cost): Adjust for poly_int.
(riscv_rtx_costs): Adjust for poly_int.
(riscv_output_move): Adjust for poly_int.
(riscv_extend_comparands): Adjust for poly_int.
(riscv_flatten_aggregate_field): Adjust for poly_int.
(riscv_get_arg_info): Adjust for poly_int.
(riscv_pass_by_reference): Adjust for poly_int.
(riscv_elf_select_rtx_section): Adjust for poly_int.
(riscv_stack_align): Adjust for poly_int.
(riscv_compute_frame_info): Adjust for poly_int.
(riscv_initial_elimination_offset): Change HOST_WIDE_INT to poly_int64.
(riscv_set_return_address): Adjust for poly_int.
(riscv_for_each_saved_reg): Adjust for poly_int.
(riscv_first_stack_step): Adjust for poly_int.
(riscv_expand_prologue): Adjust for poly_int.
(riscv_expand_epilogue): Adjust for poly_int.
(riscv_can_use_return_insn): Adjust for poly_int.
(riscv_secondary_memory_needed): Disable secondary memory for RVV.
(riscv_hard_regno_nregs): Add RVV register allocation.
(riscv_hard_regno_mode_ok): Add RVV register allocation.
(riscv_convert_riscv_vector_bits): New function.
(riscv_option_override): Add RVV vector bits parser.
(riscv_promote_function_mode): Adjust for RVV modes.
* config/riscv/riscv.h: New macro define.
* config/riscv/riscv.md: Adjust for poly_int.
* config/riscv/riscv.opt: New option.
* config/riscv/t-riscv: New object.
* config/riscv/riscv-vector.cc: New file.
* config/riscv/riscv-vector.h: New file.

---
 gcc/config.gcc   |   2 +-
 gcc/config/riscv/constraints.md  |  17 ++
 gcc/config/riscv/predicates.md   |   5 +-
 gcc/config/riscv/riscv-modes.def | 177 ++
 gcc/config/riscv/riscv-opts.h|  27 +++
 gcc/config/riscv/riscv-protos.h  |   9 +-
 gcc/config/riscv/riscv-sr.cc |   2 +-
 gcc/config/riscv/riscv-vector.cc | 229 +++
 gcc/config/riscv/riscv-vector.h  |  28 +++
 gcc/config/riscv/riscv.cc| 302 +++
 gcc/config/riscv/riscv.h |  84 +++--
 gcc/config/riscv/riscv.md|  36 ++--
 gcc/config/riscv/riscv.opt   |  32 
 gcc/config/riscv/t-riscv |   4 +
 14 files changed, 849 insertions(+), 105 deletions(-)
 create mode 100644 gcc/config/riscv/riscv-vector.cc
 create mode 100644 gcc/config/riscv/riscv-vector.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index cdbefb5b4f5..50154c2eb3a 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -517,7 +517,7 @@ pru-*-*)
;;
 riscv*)
cpu_type=riscv
-   extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o"
+   extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o riscv-vector.o"
d_target_objs="riscv-d.o"
;;
 rs6000*-*-*)
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index bafa4188ccb..7fd61a04216 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -80,3 +80,20 @@
A constant @code{move_operand}."
   (and (match_operand 0 "move_operand")
(match_test "CONSTANT_P (op)")))
+
+;; Vector constraints.
+
+(define_register_constraint "vr" "TARGET_VECTOR ? V_REGS : NO_REGS"
+  "A vector register (if available).")
+
+(define_register_constraint "vd" "TARGET_VECTOR ? VD_REGS : NO_REGS"
+  "A vector register except mask register (if available).")
+
+(define_register_constraint "vm" "TARGET_VECTOR ? VM_REGS : NO_REGS"
+  "A vector mask register (if available).")
+
+(define_constraint "vp"
+  "POLY_INT"
+  (and (match_code "const_poly_int")
+   (match_test "CONST_POLY_INT_COEFFS (op)[0] == UNITS_PER_V_REG.coeffs[0]
+&& CONST_POLY_INT_COEF

56 matches

Mail list logo